{ "best_global_step": 5118, "best_metric": 3.08605433, "best_model_checkpoint": "/inspire/hdd/project/deepanalysis/guitao-25013/Muse/workspace/Finals/ckpt/Muse_8b_main_1.4e-4/v0-20251230-182110/checkpoint-5118", "epoch": 3.0, "eval_steps": 500, "global_step": 5118, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005863383172090296, "grad_norm": 353.3991972472677, "learning_rate": 8.2063305978898e-08, "loss": 24.279613494873047, "step": 1, "token_acc": 0.006257334790174416 }, { "epoch": 0.0011726766344180592, "grad_norm": 350.0536601985609, "learning_rate": 1.64126611957796e-07, "loss": 24.32593536376953, "step": 2, "token_acc": 0.006629192320853969 }, { "epoch": 0.001759014951627089, "grad_norm": 350.72277612537886, "learning_rate": 2.46189917936694e-07, "loss": 24.30904769897461, "step": 3, "token_acc": 0.006821216035803277 }, { "epoch": 0.0023453532688361184, "grad_norm": 349.95484398867524, "learning_rate": 3.28253223915592e-07, "loss": 24.276325225830078, "step": 4, "token_acc": 0.006935026587752649 }, { "epoch": 0.002931691586045148, "grad_norm": 350.92364205592605, "learning_rate": 4.1031652989449e-07, "loss": 24.29950523376465, "step": 5, "token_acc": 0.006618784925222573 }, { "epoch": 0.003518029903254178, "grad_norm": 353.063279858596, "learning_rate": 4.92379835873388e-07, "loss": 24.292869567871094, "step": 6, "token_acc": 0.006742489814595313 }, { "epoch": 0.004104368220463207, "grad_norm": 351.1884071141757, "learning_rate": 5.744431418522861e-07, "loss": 24.115386962890625, "step": 7, "token_acc": 0.006782887460025678 }, { "epoch": 0.004690706537672237, "grad_norm": 354.1448133387045, "learning_rate": 6.56506447831184e-07, "loss": 24.09915542602539, "step": 8, "token_acc": 0.006792604077671951 }, { "epoch": 0.005277044854881266, "grad_norm": 350.42175428441675, "learning_rate": 7.38569753810082e-07, "loss": 23.74787139892578, "step": 9, "token_acc": 0.0068703800772349465 }, { "epoch": 0.005863383172090296, "grad_norm": 352.6013674230611, "learning_rate": 8.2063305978898e-07, "loss": 23.604782104492188, "step": 10, "token_acc": 0.00683007561417538 }, { "epoch": 0.006449721489299325, "grad_norm": 351.32521265674916, "learning_rate": 9.02696365767878e-07, "loss": 23.572376251220703, "step": 11, "token_acc": 0.006992549309122515 }, { "epoch": 0.007036059806508356, "grad_norm": 347.02622475928257, "learning_rate": 9.84759671746776e-07, "loss": 22.248552322387695, "step": 12, "token_acc": 0.007175440695333753 }, { "epoch": 0.007622398123717385, "grad_norm": 348.8617545488523, "learning_rate": 1.066822977725674e-06, "loss": 22.154541015625, "step": 13, "token_acc": 0.006777886747962537 }, { "epoch": 0.008208736440926415, "grad_norm": 343.6973379841624, "learning_rate": 1.1488862837045722e-06, "loss": 21.72881317138672, "step": 14, "token_acc": 0.006503657347827978 }, { "epoch": 0.008795074758135445, "grad_norm": 341.3048722022376, "learning_rate": 1.23094958968347e-06, "loss": 21.626178741455078, "step": 15, "token_acc": 0.00670334386827422 }, { "epoch": 0.009381413075344474, "grad_norm": 180.2068215087911, "learning_rate": 1.313012895662368e-06, "loss": 18.097524642944336, "step": 16, "token_acc": 0.006140843631906853 }, { "epoch": 0.009967751392553504, "grad_norm": 173.98568033854303, "learning_rate": 1.3950762016412662e-06, "loss": 17.94785499572754, "step": 17, "token_acc": 0.006328160704640824 }, { "epoch": 0.010554089709762533, "grad_norm": 174.28139288515845, "learning_rate": 1.477139507620164e-06, "loss": 17.822351455688477, "step": 18, "token_acc": 0.006275670161458306 }, { "epoch": 0.011140428026971563, "grad_norm": 166.6479427524692, "learning_rate": 1.5592028135990621e-06, "loss": 17.317907333374023, "step": 19, "token_acc": 0.0059726743079873915 }, { "epoch": 0.011726766344180592, "grad_norm": 166.1027966467043, "learning_rate": 1.64126611957796e-06, "loss": 17.139856338500977, "step": 20, "token_acc": 0.006182355736233568 }, { "epoch": 0.012313104661389622, "grad_norm": 90.74082551872074, "learning_rate": 1.7233294255568579e-06, "loss": 14.137916564941406, "step": 21, "token_acc": 0.004946787386758282 }, { "epoch": 0.01289944297859865, "grad_norm": 62.72798753896125, "learning_rate": 1.805392731535756e-06, "loss": 13.710620880126953, "step": 22, "token_acc": 0.004691528834664486 }, { "epoch": 0.013485781295807681, "grad_norm": 55.78773460505893, "learning_rate": 1.887456037514654e-06, "loss": 13.558979034423828, "step": 23, "token_acc": 0.004765177425676655 }, { "epoch": 0.014072119613016711, "grad_norm": 50.273687477426186, "learning_rate": 1.969519343493552e-06, "loss": 13.42824935913086, "step": 24, "token_acc": 0.0046091386721859575 }, { "epoch": 0.01465845793022574, "grad_norm": 43.14364478235981, "learning_rate": 2.05158264947245e-06, "loss": 13.265416145324707, "step": 25, "token_acc": 0.004781320972733189 }, { "epoch": 0.01524479624743477, "grad_norm": 32.03762628531264, "learning_rate": 2.133645955451348e-06, "loss": 13.020715713500977, "step": 26, "token_acc": 0.004930619867678786 }, { "epoch": 0.0158311345646438, "grad_norm": 26.96031330799516, "learning_rate": 2.215709261430246e-06, "loss": 12.895532608032227, "step": 27, "token_acc": 0.005856069238352517 }, { "epoch": 0.01641747288185283, "grad_norm": 24.26451087839124, "learning_rate": 2.2977725674091444e-06, "loss": 12.806621551513672, "step": 28, "token_acc": 0.005940171808046141 }, { "epoch": 0.017003811199061858, "grad_norm": 9.740196653338913, "learning_rate": 2.379835873388042e-06, "loss": 12.42845344543457, "step": 29, "token_acc": 0.006846418397555078 }, { "epoch": 0.01759014951627089, "grad_norm": 7.486303824112801, "learning_rate": 2.46189917936694e-06, "loss": 12.334634780883789, "step": 30, "token_acc": 0.006826872012745619 }, { "epoch": 0.01817648783347992, "grad_norm": 6.001783632824859, "learning_rate": 2.543962485345838e-06, "loss": 12.280149459838867, "step": 31, "token_acc": 0.007606588508354323 }, { "epoch": 0.018762826150688947, "grad_norm": 4.880549492241735, "learning_rate": 2.626025791324736e-06, "loss": 12.226173400878906, "step": 32, "token_acc": 0.008839033926522591 }, { "epoch": 0.019349164467897976, "grad_norm": 4.17141709429935, "learning_rate": 2.7080890973036343e-06, "loss": 12.18889045715332, "step": 33, "token_acc": 0.008684544605667642 }, { "epoch": 0.019935502785107008, "grad_norm": 3.6115598562353433, "learning_rate": 2.7901524032825323e-06, "loss": 12.151744842529297, "step": 34, "token_acc": 0.00917035559157886 }, { "epoch": 0.020521841102316037, "grad_norm": 3.0890799514881246, "learning_rate": 2.87221570926143e-06, "loss": 12.10763931274414, "step": 35, "token_acc": 0.009937812623164599 }, { "epoch": 0.021108179419525065, "grad_norm": 2.6866379814053225, "learning_rate": 2.954279015240328e-06, "loss": 12.076554298400879, "step": 36, "token_acc": 0.0096938114127194 }, { "epoch": 0.021694517736734097, "grad_norm": 2.1172737148331486, "learning_rate": 3.036342321219226e-06, "loss": 12.0507230758667, "step": 37, "token_acc": 0.009487984343427925 }, { "epoch": 0.022280856053943126, "grad_norm": 1.7242673706242342, "learning_rate": 3.1184056271981242e-06, "loss": 12.029277801513672, "step": 38, "token_acc": 0.008936445257403641 }, { "epoch": 0.022867194371152155, "grad_norm": 1.487620116583969, "learning_rate": 3.2004689331770222e-06, "loss": 12.003844261169434, "step": 39, "token_acc": 0.008377003307046552 }, { "epoch": 0.023453532688361183, "grad_norm": 1.3178672925010604, "learning_rate": 3.28253223915592e-06, "loss": 11.979639053344727, "step": 40, "token_acc": 0.00939225773348996 }, { "epoch": 0.024039871005570215, "grad_norm": 1.1303030998807853, "learning_rate": 3.3645955451348178e-06, "loss": 11.960676193237305, "step": 41, "token_acc": 0.009738004464688135 }, { "epoch": 0.024626209322779244, "grad_norm": 0.9186250060482735, "learning_rate": 3.4466588511137157e-06, "loss": 11.949100494384766, "step": 42, "token_acc": 0.009821430926823348 }, { "epoch": 0.025212547639988273, "grad_norm": 0.9473638571784654, "learning_rate": 3.528722157092614e-06, "loss": 11.953974723815918, "step": 43, "token_acc": 0.008738816621751464 }, { "epoch": 0.0257988859571973, "grad_norm": 0.8210116536176688, "learning_rate": 3.610785463071512e-06, "loss": 11.93989086151123, "step": 44, "token_acc": 0.009321738043008175 }, { "epoch": 0.026385224274406333, "grad_norm": 0.7126951090689018, "learning_rate": 3.69284876905041e-06, "loss": 11.929862022399902, "step": 45, "token_acc": 0.009835562515872344 }, { "epoch": 0.026971562591615362, "grad_norm": 0.5711748472296502, "learning_rate": 3.774912075029308e-06, "loss": 11.915067672729492, "step": 46, "token_acc": 0.011866672596514818 }, { "epoch": 0.02755790090882439, "grad_norm": 0.5158692138559006, "learning_rate": 3.856975381008206e-06, "loss": 11.9104585647583, "step": 47, "token_acc": 0.011654396655386036 }, { "epoch": 0.028144239226033423, "grad_norm": 0.4769871603502968, "learning_rate": 3.939038686987104e-06, "loss": 11.898885726928711, "step": 48, "token_acc": 0.012095299320557298 }, { "epoch": 0.02873057754324245, "grad_norm": 0.580043562125423, "learning_rate": 4.021101992966002e-06, "loss": 11.89678955078125, "step": 49, "token_acc": 0.011564227778812823 }, { "epoch": 0.02931691586045148, "grad_norm": 0.43284830981288447, "learning_rate": 4.1031652989449e-06, "loss": 11.880815505981445, "step": 50, "token_acc": 0.012207451987845902 }, { "epoch": 0.02990325417766051, "grad_norm": 0.42267623135522364, "learning_rate": 4.185228604923798e-06, "loss": 11.875591278076172, "step": 51, "token_acc": 0.011903238527006876 }, { "epoch": 0.03048959249486954, "grad_norm": 0.4190264564203129, "learning_rate": 4.267291910902696e-06, "loss": 11.860198974609375, "step": 52, "token_acc": 0.012763264206840829 }, { "epoch": 0.03107593081207857, "grad_norm": 0.4316295201650057, "learning_rate": 4.349355216881594e-06, "loss": 11.856969833374023, "step": 53, "token_acc": 0.012433101148374258 }, { "epoch": 0.0316622691292876, "grad_norm": 0.4093283954262731, "learning_rate": 4.431418522860492e-06, "loss": 11.856557846069336, "step": 54, "token_acc": 0.01180031945909887 }, { "epoch": 0.03224860744649663, "grad_norm": 0.39209180275718025, "learning_rate": 4.513481828839389e-06, "loss": 11.849498748779297, "step": 55, "token_acc": 0.011768190806312053 }, { "epoch": 0.03283494576370566, "grad_norm": 0.4466528623134847, "learning_rate": 4.595545134818289e-06, "loss": 11.836506843566895, "step": 56, "token_acc": 0.012197683813627794 }, { "epoch": 0.03342128408091469, "grad_norm": 0.42285522610376636, "learning_rate": 4.677608440797186e-06, "loss": 11.831693649291992, "step": 57, "token_acc": 0.011911346238627711 }, { "epoch": 0.034007622398123716, "grad_norm": 0.41595729785113533, "learning_rate": 4.759671746776084e-06, "loss": 11.828054428100586, "step": 58, "token_acc": 0.011446931133495307 }, { "epoch": 0.034593960715332744, "grad_norm": 0.41794444908679035, "learning_rate": 4.841735052754982e-06, "loss": 11.808276176452637, "step": 59, "token_acc": 0.012374518254694083 }, { "epoch": 0.03518029903254178, "grad_norm": 0.44009696917083724, "learning_rate": 4.92379835873388e-06, "loss": 11.800575256347656, "step": 60, "token_acc": 0.01197958001758864 }, { "epoch": 0.03576663734975081, "grad_norm": 0.47676000285615605, "learning_rate": 5.005861664712778e-06, "loss": 11.784139633178711, "step": 61, "token_acc": 0.012095167177077652 }, { "epoch": 0.03635297566695984, "grad_norm": 0.5162531183377757, "learning_rate": 5.087924970691676e-06, "loss": 11.77094841003418, "step": 62, "token_acc": 0.0116184836595014 }, { "epoch": 0.036939313984168866, "grad_norm": 0.5354361559615101, "learning_rate": 5.169988276670574e-06, "loss": 11.753301620483398, "step": 63, "token_acc": 0.011516011989272755 }, { "epoch": 0.037525652301377894, "grad_norm": 0.5409300302992422, "learning_rate": 5.252051582649472e-06, "loss": 11.736246109008789, "step": 64, "token_acc": 0.011390995192671368 }, { "epoch": 0.03811199061858692, "grad_norm": 0.7359393050093247, "learning_rate": 5.33411488862837e-06, "loss": 11.713890075683594, "step": 65, "token_acc": 0.011716271182914269 }, { "epoch": 0.03869832893579595, "grad_norm": 0.8767697201056378, "learning_rate": 5.416178194607269e-06, "loss": 11.699317932128906, "step": 66, "token_acc": 0.011596935769004125 }, { "epoch": 0.03928466725300499, "grad_norm": 0.9267768490692999, "learning_rate": 5.498241500586167e-06, "loss": 11.68133544921875, "step": 67, "token_acc": 0.011488018933044079 }, { "epoch": 0.039871005570214016, "grad_norm": 0.7227860686025017, "learning_rate": 5.580304806565065e-06, "loss": 11.657129287719727, "step": 68, "token_acc": 0.011695423933334021 }, { "epoch": 0.040457343887423045, "grad_norm": 0.8402313651744766, "learning_rate": 5.662368112543962e-06, "loss": 11.631108283996582, "step": 69, "token_acc": 0.012419254683429972 }, { "epoch": 0.04104368220463207, "grad_norm": 0.7906118410184275, "learning_rate": 5.74443141852286e-06, "loss": 11.614664077758789, "step": 70, "token_acc": 0.011516319820151064 }, { "epoch": 0.0416300205218411, "grad_norm": 0.8322877908916292, "learning_rate": 5.826494724501758e-06, "loss": 11.590474128723145, "step": 71, "token_acc": 0.011352121595946802 }, { "epoch": 0.04221635883905013, "grad_norm": 1.059437267103675, "learning_rate": 5.908558030480656e-06, "loss": 11.550149917602539, "step": 72, "token_acc": 0.012050529335972265 }, { "epoch": 0.04280269715625916, "grad_norm": 1.084329561242705, "learning_rate": 5.990621336459554e-06, "loss": 11.522337913513184, "step": 73, "token_acc": 0.011590306481574587 }, { "epoch": 0.043389035473468195, "grad_norm": 1.3397929902734904, "learning_rate": 6.072684642438452e-06, "loss": 11.488041877746582, "step": 74, "token_acc": 0.011866717683133849 }, { "epoch": 0.04397537379067722, "grad_norm": 1.256433443371083, "learning_rate": 6.15474794841735e-06, "loss": 11.448644638061523, "step": 75, "token_acc": 0.012370915556290638 }, { "epoch": 0.04456171210788625, "grad_norm": 1.5273264321656337, "learning_rate": 6.2368112543962485e-06, "loss": 11.4195556640625, "step": 76, "token_acc": 0.011814501288185498 }, { "epoch": 0.04514805042509528, "grad_norm": 1.5489725307554358, "learning_rate": 6.3188745603751465e-06, "loss": 11.383012771606445, "step": 77, "token_acc": 0.01227650103264134 }, { "epoch": 0.04573438874230431, "grad_norm": 1.9959633742104053, "learning_rate": 6.4009378663540444e-06, "loss": 11.34876823425293, "step": 78, "token_acc": 0.011815972733584397 }, { "epoch": 0.04632072705951334, "grad_norm": 1.7373266034204045, "learning_rate": 6.4830011723329424e-06, "loss": 11.306633949279785, "step": 79, "token_acc": 0.012449112351533974 }, { "epoch": 0.046907065376722366, "grad_norm": 3.739424525440159, "learning_rate": 6.56506447831184e-06, "loss": 11.277594566345215, "step": 80, "token_acc": 0.012040986573906474 }, { "epoch": 0.047493403693931395, "grad_norm": 2.0105480424981095, "learning_rate": 6.6471277842907375e-06, "loss": 11.239297866821289, "step": 81, "token_acc": 0.01217278105266473 }, { "epoch": 0.04807974201114043, "grad_norm": 2.682525004385048, "learning_rate": 6.7291910902696355e-06, "loss": 11.20435905456543, "step": 82, "token_acc": 0.011083016156077825 }, { "epoch": 0.04866608032834946, "grad_norm": 2.922170204901006, "learning_rate": 6.8112543962485335e-06, "loss": 11.158313751220703, "step": 83, "token_acc": 0.011732856814205692 }, { "epoch": 0.04925241864555849, "grad_norm": 3.007051223128323, "learning_rate": 6.8933177022274315e-06, "loss": 11.121021270751953, "step": 84, "token_acc": 0.011253999215766588 }, { "epoch": 0.049838756962767516, "grad_norm": 4.693021109603985, "learning_rate": 6.9753810082063295e-06, "loss": 11.084024429321289, "step": 85, "token_acc": 0.01177730466363885 }, { "epoch": 0.050425095279976545, "grad_norm": 2.86869186485627, "learning_rate": 7.057444314185228e-06, "loss": 11.044998168945312, "step": 86, "token_acc": 0.010470217199688187 }, { "epoch": 0.051011433597185574, "grad_norm": 6.349549681675803, "learning_rate": 7.139507620164126e-06, "loss": 10.99063777923584, "step": 87, "token_acc": 0.011309202804301621 }, { "epoch": 0.0515977719143946, "grad_norm": 3.5156715298922077, "learning_rate": 7.221570926143024e-06, "loss": 10.937030792236328, "step": 88, "token_acc": 0.011740314842940141 }, { "epoch": 0.05218411023160364, "grad_norm": 4.024231165096492, "learning_rate": 7.303634232121922e-06, "loss": 10.878375053405762, "step": 89, "token_acc": 0.01106387934548419 }, { "epoch": 0.052770448548812667, "grad_norm": 6.073982718880735, "learning_rate": 7.38569753810082e-06, "loss": 10.81357192993164, "step": 90, "token_acc": 0.011670034233179477 }, { "epoch": 0.053356786866021695, "grad_norm": 4.404701215620994, "learning_rate": 7.467760844079718e-06, "loss": 10.762024879455566, "step": 91, "token_acc": 0.01219048825280223 }, { "epoch": 0.053943125183230724, "grad_norm": 15.215084711758854, "learning_rate": 7.549824150058616e-06, "loss": 10.722940444946289, "step": 92, "token_acc": 0.011778320281538315 }, { "epoch": 0.05452946350043975, "grad_norm": 7.19663634587313, "learning_rate": 7.631887456037514e-06, "loss": 10.657026290893555, "step": 93, "token_acc": 0.011953774413732514 }, { "epoch": 0.05511580181764878, "grad_norm": 5.453563211261255, "learning_rate": 7.713950762016412e-06, "loss": 10.620040893554688, "step": 94, "token_acc": 0.011370734985964496 }, { "epoch": 0.05570214013485781, "grad_norm": 5.232700925527675, "learning_rate": 7.79601406799531e-06, "loss": 10.54349136352539, "step": 95, "token_acc": 0.012036520255683912 }, { "epoch": 0.056288478452066845, "grad_norm": 16.321493056218788, "learning_rate": 7.878077373974208e-06, "loss": 10.472469329833984, "step": 96, "token_acc": 0.012198682289817336 }, { "epoch": 0.056874816769275874, "grad_norm": 7.0082057914626805, "learning_rate": 7.960140679953106e-06, "loss": 10.43171501159668, "step": 97, "token_acc": 0.011481548078652043 }, { "epoch": 0.0574611550864849, "grad_norm": 5.873538407073087, "learning_rate": 8.042203985932004e-06, "loss": 10.332284927368164, "step": 98, "token_acc": 0.011738283236948985 }, { "epoch": 0.05804749340369393, "grad_norm": 8.474957766386694, "learning_rate": 8.124267291910902e-06, "loss": 10.282196044921875, "step": 99, "token_acc": 0.011019985088469022 }, { "epoch": 0.05863383172090296, "grad_norm": 5.445353053809421, "learning_rate": 8.2063305978898e-06, "loss": 10.199283599853516, "step": 100, "token_acc": 0.011515075139409596 }, { "epoch": 0.05922017003811199, "grad_norm": 10.711733916953884, "learning_rate": 8.288393903868698e-06, "loss": 10.146799087524414, "step": 101, "token_acc": 0.011802311285960166 }, { "epoch": 0.05980650835532102, "grad_norm": 5.565870787655604, "learning_rate": 8.370457209847596e-06, "loss": 10.075040817260742, "step": 102, "token_acc": 0.011042258066142622 }, { "epoch": 0.06039284667253005, "grad_norm": 6.184076865108105, "learning_rate": 8.452520515826494e-06, "loss": 10.03441333770752, "step": 103, "token_acc": 0.011396305491181799 }, { "epoch": 0.06097918498973908, "grad_norm": 4.677904369315326, "learning_rate": 8.534583821805392e-06, "loss": 9.944074630737305, "step": 104, "token_acc": 0.011589492193744338 }, { "epoch": 0.06156552330694811, "grad_norm": 8.633749726575598, "learning_rate": 8.61664712778429e-06, "loss": 9.924915313720703, "step": 105, "token_acc": 0.011250097929125428 }, { "epoch": 0.06215186162415714, "grad_norm": 5.693080709704519, "learning_rate": 8.698710433763188e-06, "loss": 9.872126579284668, "step": 106, "token_acc": 0.012050421500488889 }, { "epoch": 0.06273819994136617, "grad_norm": 6.8026045550650975, "learning_rate": 8.780773739742086e-06, "loss": 9.798917770385742, "step": 107, "token_acc": 0.011372199585936653 }, { "epoch": 0.0633245382585752, "grad_norm": 4.402933915963803, "learning_rate": 8.862837045720984e-06, "loss": 9.762984275817871, "step": 108, "token_acc": 0.011015209658706988 }, { "epoch": 0.06391087657578423, "grad_norm": 4.451685236842638, "learning_rate": 8.944900351699882e-06, "loss": 9.714855194091797, "step": 109, "token_acc": 0.011815973159538777 }, { "epoch": 0.06449721489299326, "grad_norm": 4.467985578441344, "learning_rate": 9.026963657678778e-06, "loss": 9.674742698669434, "step": 110, "token_acc": 0.011135588616953858 }, { "epoch": 0.06508355321020229, "grad_norm": 5.027999857942171, "learning_rate": 9.109026963657678e-06, "loss": 9.61811637878418, "step": 111, "token_acc": 0.011828176856410338 }, { "epoch": 0.06566989152741132, "grad_norm": 3.5740083419814157, "learning_rate": 9.191090269636578e-06, "loss": 9.597053527832031, "step": 112, "token_acc": 0.011521914309518282 }, { "epoch": 0.06625622984462035, "grad_norm": 3.2066645264068163, "learning_rate": 9.273153575615474e-06, "loss": 9.595144271850586, "step": 113, "token_acc": 0.011856729851139261 }, { "epoch": 0.06684256816182937, "grad_norm": 3.380436631672164, "learning_rate": 9.355216881594372e-06, "loss": 9.505526542663574, "step": 114, "token_acc": 0.011621402619128526 }, { "epoch": 0.0674289064790384, "grad_norm": 2.586374201673117, "learning_rate": 9.43728018757327e-06, "loss": 9.478507995605469, "step": 115, "token_acc": 0.011260276930828993 }, { "epoch": 0.06801524479624743, "grad_norm": 4.66133607577173, "learning_rate": 9.519343493552168e-06, "loss": 9.459450721740723, "step": 116, "token_acc": 0.01190094039268104 }, { "epoch": 0.06860158311345646, "grad_norm": 2.658962787207946, "learning_rate": 9.601406799531066e-06, "loss": 9.433343887329102, "step": 117, "token_acc": 0.012701033123868357 }, { "epoch": 0.06918792143066549, "grad_norm": 3.2761725155113046, "learning_rate": 9.683470105509964e-06, "loss": 9.341836929321289, "step": 118, "token_acc": 0.012102275793135877 }, { "epoch": 0.06977425974787452, "grad_norm": 2.292269171824197, "learning_rate": 9.765533411488862e-06, "loss": 9.363323211669922, "step": 119, "token_acc": 0.012448431162093276 }, { "epoch": 0.07036059806508356, "grad_norm": 2.0287524472259113, "learning_rate": 9.84759671746776e-06, "loss": 9.30771541595459, "step": 120, "token_acc": 0.011561001740279884 }, { "epoch": 0.07094693638229259, "grad_norm": 2.1390630144273044, "learning_rate": 9.92966002344666e-06, "loss": 9.288138389587402, "step": 121, "token_acc": 0.01157997128167122 }, { "epoch": 0.07153327469950162, "grad_norm": 1.7250787565218983, "learning_rate": 1.0011723329425556e-05, "loss": 9.297527313232422, "step": 122, "token_acc": 0.012586028094654473 }, { "epoch": 0.07211961301671065, "grad_norm": 3.6634110434700897, "learning_rate": 1.0093786635404455e-05, "loss": 9.258310317993164, "step": 123, "token_acc": 0.011125447142537066 }, { "epoch": 0.07270595133391967, "grad_norm": 1.7460773975467718, "learning_rate": 1.0175849941383352e-05, "loss": 9.217559814453125, "step": 124, "token_acc": 0.011685694001289344 }, { "epoch": 0.0732922896511287, "grad_norm": 1.731892016464575, "learning_rate": 1.025791324736225e-05, "loss": 9.188346862792969, "step": 125, "token_acc": 0.011616803743544022 }, { "epoch": 0.07387862796833773, "grad_norm": 1.3978453266801356, "learning_rate": 1.0339976553341148e-05, "loss": 9.213885307312012, "step": 126, "token_acc": 0.012242752597743906 }, { "epoch": 0.07446496628554676, "grad_norm": 1.7358613672686785, "learning_rate": 1.0422039859320046e-05, "loss": 9.196192741394043, "step": 127, "token_acc": 0.011191089160931015 }, { "epoch": 0.07505130460275579, "grad_norm": 1.219226645483535, "learning_rate": 1.0504103165298944e-05, "loss": 9.227466583251953, "step": 128, "token_acc": 0.011883061692494375 }, { "epoch": 0.07563764291996482, "grad_norm": 1.8476996989018628, "learning_rate": 1.0586166471277842e-05, "loss": 9.175331115722656, "step": 129, "token_acc": 0.011889448439998125 }, { "epoch": 0.07622398123717385, "grad_norm": 1.3553990282390567, "learning_rate": 1.066822977725674e-05, "loss": 9.11203384399414, "step": 130, "token_acc": 0.011545309921107048 }, { "epoch": 0.07681031955438287, "grad_norm": 1.1180921806879636, "learning_rate": 1.0750293083235638e-05, "loss": 9.18380355834961, "step": 131, "token_acc": 0.011438533176333858 }, { "epoch": 0.0773966578715919, "grad_norm": 1.1790581495895291, "learning_rate": 1.0832356389214537e-05, "loss": 9.122804641723633, "step": 132, "token_acc": 0.011879717856700903 }, { "epoch": 0.07798299618880093, "grad_norm": 1.0455468945801532, "learning_rate": 1.0914419695193434e-05, "loss": 9.169681549072266, "step": 133, "token_acc": 0.011402734220381816 }, { "epoch": 0.07856933450600997, "grad_norm": 1.059244558103296, "learning_rate": 1.0996483001172333e-05, "loss": 9.144702911376953, "step": 134, "token_acc": 0.011548831635710006 }, { "epoch": 0.079155672823219, "grad_norm": 1.2616172974199422, "learning_rate": 1.107854630715123e-05, "loss": 9.184444427490234, "step": 135, "token_acc": 0.011868313055922102 }, { "epoch": 0.07974201114042803, "grad_norm": 1.1106130347793868, "learning_rate": 1.116060961313013e-05, "loss": 9.035835266113281, "step": 136, "token_acc": 0.011981428785382657 }, { "epoch": 0.08032834945763706, "grad_norm": 0.9232883027339179, "learning_rate": 1.1242672919109025e-05, "loss": 9.106751441955566, "step": 137, "token_acc": 0.011643069088370279 }, { "epoch": 0.08091468777484609, "grad_norm": 1.8956896598899826, "learning_rate": 1.1324736225087923e-05, "loss": 9.110843658447266, "step": 138, "token_acc": 0.011811310273412956 }, { "epoch": 0.08150102609205512, "grad_norm": 1.0734978900537682, "learning_rate": 1.1406799531066821e-05, "loss": 9.132123947143555, "step": 139, "token_acc": 0.011321745577362687 }, { "epoch": 0.08208736440926415, "grad_norm": 1.0608066801730718, "learning_rate": 1.148886283704572e-05, "loss": 9.035505294799805, "step": 140, "token_acc": 0.012309549356223175 }, { "epoch": 0.08267370272647317, "grad_norm": 0.9601927192660569, "learning_rate": 1.1570926143024619e-05, "loss": 9.115060806274414, "step": 141, "token_acc": 0.012348977664434786 }, { "epoch": 0.0832600410436822, "grad_norm": 0.9718876666025947, "learning_rate": 1.1652989449003515e-05, "loss": 9.108566284179688, "step": 142, "token_acc": 0.011118297991364089 }, { "epoch": 0.08384637936089123, "grad_norm": 1.0404533955981914, "learning_rate": 1.1735052754982415e-05, "loss": 9.093795776367188, "step": 143, "token_acc": 0.011180620258219087 }, { "epoch": 0.08443271767810026, "grad_norm": 1.119453938213593, "learning_rate": 1.1817116060961311e-05, "loss": 9.01313591003418, "step": 144, "token_acc": 0.012133318926554417 }, { "epoch": 0.08501905599530929, "grad_norm": 1.3259193691505942, "learning_rate": 1.1899179366940211e-05, "loss": 9.161356925964355, "step": 145, "token_acc": 0.011806872178118105 }, { "epoch": 0.08560539431251832, "grad_norm": 1.0389449968394369, "learning_rate": 1.1981242672919107e-05, "loss": 9.028068542480469, "step": 146, "token_acc": 0.013024734686180633 }, { "epoch": 0.08619173262972735, "grad_norm": 1.6791781677609807, "learning_rate": 1.2063305978898007e-05, "loss": 9.033411026000977, "step": 147, "token_acc": 0.011740527322155594 }, { "epoch": 0.08677807094693639, "grad_norm": 1.2765556676733394, "learning_rate": 1.2145369284876903e-05, "loss": 9.072059631347656, "step": 148, "token_acc": 0.013162245999535386 }, { "epoch": 0.08736440926414542, "grad_norm": 0.87087934831265, "learning_rate": 1.2227432590855801e-05, "loss": 9.04871940612793, "step": 149, "token_acc": 0.013032462026013686 }, { "epoch": 0.08795074758135445, "grad_norm": 1.6822057068986385, "learning_rate": 1.23094958968347e-05, "loss": 9.062845230102539, "step": 150, "token_acc": 0.01138171417150358 }, { "epoch": 0.08853708589856348, "grad_norm": 0.932020401542373, "learning_rate": 1.2391559202813597e-05, "loss": 9.097454071044922, "step": 151, "token_acc": 0.01342642041624755 }, { "epoch": 0.0891234242157725, "grad_norm": 0.8594862583222933, "learning_rate": 1.2473622508792497e-05, "loss": 9.063352584838867, "step": 152, "token_acc": 0.012999197992925886 }, { "epoch": 0.08970976253298153, "grad_norm": 0.7916235626101663, "learning_rate": 1.2555685814771393e-05, "loss": 9.045036315917969, "step": 153, "token_acc": 0.013683493250336102 }, { "epoch": 0.09029610085019056, "grad_norm": 1.05315092748921, "learning_rate": 1.2637749120750293e-05, "loss": 9.06739616394043, "step": 154, "token_acc": 0.01398111716815656 }, { "epoch": 0.09088243916739959, "grad_norm": 0.9189294333009608, "learning_rate": 1.271981242672919e-05, "loss": 9.10274887084961, "step": 155, "token_acc": 0.013108848026538729 }, { "epoch": 0.09146877748460862, "grad_norm": 0.7663692494192156, "learning_rate": 1.2801875732708089e-05, "loss": 9.035738945007324, "step": 156, "token_acc": 0.0133319040123828 }, { "epoch": 0.09205511580181765, "grad_norm": 0.9596511889568455, "learning_rate": 1.2883939038686985e-05, "loss": 9.071704864501953, "step": 157, "token_acc": 0.012912617787564576 }, { "epoch": 0.09264145411902668, "grad_norm": 0.7432858536845889, "learning_rate": 1.2966002344665885e-05, "loss": 9.02535629272461, "step": 158, "token_acc": 0.013088371505557427 }, { "epoch": 0.0932277924362357, "grad_norm": 0.6770469244480984, "learning_rate": 1.3048065650644781e-05, "loss": 9.005430221557617, "step": 159, "token_acc": 0.014889611500941298 }, { "epoch": 0.09381413075344473, "grad_norm": 0.6737060733315569, "learning_rate": 1.313012895662368e-05, "loss": 9.101192474365234, "step": 160, "token_acc": 0.01478247413003605 }, { "epoch": 0.09440046907065376, "grad_norm": 0.6230351849146687, "learning_rate": 1.3212192262602579e-05, "loss": 9.082185745239258, "step": 161, "token_acc": 0.015209576605777871 }, { "epoch": 0.09498680738786279, "grad_norm": 0.7702695193286074, "learning_rate": 1.3294255568581475e-05, "loss": 9.05761432647705, "step": 162, "token_acc": 0.015812216591090958 }, { "epoch": 0.09557314570507183, "grad_norm": 0.6827575379345043, "learning_rate": 1.3376318874560375e-05, "loss": 9.047271728515625, "step": 163, "token_acc": 0.014247530801344058 }, { "epoch": 0.09615948402228086, "grad_norm": 0.809068409116969, "learning_rate": 1.3458382180539271e-05, "loss": 9.059006690979004, "step": 164, "token_acc": 0.013906798965290915 }, { "epoch": 0.09674582233948989, "grad_norm": 0.7564059704111886, "learning_rate": 1.354044548651817e-05, "loss": 9.077688217163086, "step": 165, "token_acc": 0.016063849621091652 }, { "epoch": 0.09733216065669892, "grad_norm": 0.7190274744792501, "learning_rate": 1.3622508792497067e-05, "loss": 9.102840423583984, "step": 166, "token_acc": 0.014970634624164696 }, { "epoch": 0.09791849897390795, "grad_norm": 1.190147392988638, "learning_rate": 1.3704572098475967e-05, "loss": 9.020709991455078, "step": 167, "token_acc": 0.0154521933344787 }, { "epoch": 0.09850483729111698, "grad_norm": 1.0577410783171501, "learning_rate": 1.3786635404454863e-05, "loss": 8.99871826171875, "step": 168, "token_acc": 0.01499610823211323 }, { "epoch": 0.099091175608326, "grad_norm": 0.920999990588546, "learning_rate": 1.3868698710433763e-05, "loss": 8.981106758117676, "step": 169, "token_acc": 0.013833339647455499 }, { "epoch": 0.09967751392553503, "grad_norm": 1.0723165989104564, "learning_rate": 1.3950762016412659e-05, "loss": 9.016414642333984, "step": 170, "token_acc": 0.011658098624594626 }, { "epoch": 0.10026385224274406, "grad_norm": 1.3850130882105414, "learning_rate": 1.4032825322391559e-05, "loss": 9.004114151000977, "step": 171, "token_acc": 0.01422531889027997 }, { "epoch": 0.10085019055995309, "grad_norm": 2.257634817344095, "learning_rate": 1.4114888628370457e-05, "loss": 9.023462295532227, "step": 172, "token_acc": 0.014484141721755923 }, { "epoch": 0.10143652887716212, "grad_norm": 1.2596914152256267, "learning_rate": 1.4196951934349353e-05, "loss": 9.032676696777344, "step": 173, "token_acc": 0.014700998632404432 }, { "epoch": 0.10202286719437115, "grad_norm": 18.55502151573403, "learning_rate": 1.4279015240328253e-05, "loss": 9.127123832702637, "step": 174, "token_acc": 0.013074868278523877 }, { "epoch": 0.10260920551158018, "grad_norm": 9.186057786949455, "learning_rate": 1.4361078546307149e-05, "loss": 9.103059768676758, "step": 175, "token_acc": 0.012738161732762366 }, { "epoch": 0.1031955438287892, "grad_norm": 1.9558323444805998, "learning_rate": 1.4443141852286049e-05, "loss": 9.05103874206543, "step": 176, "token_acc": 0.013382028665931642 }, { "epoch": 0.10378188214599825, "grad_norm": 2.5295637996349667, "learning_rate": 1.4525205158264945e-05, "loss": 9.01467227935791, "step": 177, "token_acc": 0.012551975658660968 }, { "epoch": 0.10436822046320728, "grad_norm": 2.7714986381954905, "learning_rate": 1.4607268464243845e-05, "loss": 8.968669891357422, "step": 178, "token_acc": 0.013999376775353269 }, { "epoch": 0.1049545587804163, "grad_norm": 2.582398339449192, "learning_rate": 1.468933177022274e-05, "loss": 9.037277221679688, "step": 179, "token_acc": 0.014366541368012265 }, { "epoch": 0.10554089709762533, "grad_norm": 1.5364599226520086, "learning_rate": 1.477139507620164e-05, "loss": 9.023276329040527, "step": 180, "token_acc": 0.01389188308103248 }, { "epoch": 0.10612723541483436, "grad_norm": 2.6782551689863863, "learning_rate": 1.4853458382180538e-05, "loss": 9.028421401977539, "step": 181, "token_acc": 0.013619759173794741 }, { "epoch": 0.10671357373204339, "grad_norm": 1.1239082426817306, "learning_rate": 1.4935521688159436e-05, "loss": 8.895050048828125, "step": 182, "token_acc": 0.014584678882321608 }, { "epoch": 0.10729991204925242, "grad_norm": 2.334851579986876, "learning_rate": 1.5017584994138334e-05, "loss": 8.980743408203125, "step": 183, "token_acc": 0.014908741391693776 }, { "epoch": 0.10788625036646145, "grad_norm": 1.800946676496231, "learning_rate": 1.5099648300117232e-05, "loss": 8.919689178466797, "step": 184, "token_acc": 0.015940293972027862 }, { "epoch": 0.10847258868367048, "grad_norm": 2.1229729445682617, "learning_rate": 1.518171160609613e-05, "loss": 8.939411163330078, "step": 185, "token_acc": 0.015107531594334417 }, { "epoch": 0.1090589270008795, "grad_norm": 1.645077219388169, "learning_rate": 1.526377491207503e-05, "loss": 8.945083618164062, "step": 186, "token_acc": 0.014263792170958484 }, { "epoch": 0.10964526531808853, "grad_norm": 1.8219637248501346, "learning_rate": 1.5345838218053926e-05, "loss": 8.937055587768555, "step": 187, "token_acc": 0.014278861077404972 }, { "epoch": 0.11023160363529756, "grad_norm": 2.059649870790315, "learning_rate": 1.5427901524032824e-05, "loss": 8.867799758911133, "step": 188, "token_acc": 0.015887760914351422 }, { "epoch": 0.11081794195250659, "grad_norm": 0.9169745168696901, "learning_rate": 1.5509964830011722e-05, "loss": 8.889871597290039, "step": 189, "token_acc": 0.015403889840678043 }, { "epoch": 0.11140428026971562, "grad_norm": 2.609275098651519, "learning_rate": 1.559202813599062e-05, "loss": 8.920961380004883, "step": 190, "token_acc": 0.01487325605279374 }, { "epoch": 0.11199061858692466, "grad_norm": 1.8123467450507191, "learning_rate": 1.567409144196952e-05, "loss": 8.887933731079102, "step": 191, "token_acc": 0.014357918994870196 }, { "epoch": 0.11257695690413369, "grad_norm": 1.9861415498325221, "learning_rate": 1.5756154747948416e-05, "loss": 8.976934432983398, "step": 192, "token_acc": 0.015038570426742162 }, { "epoch": 0.11316329522134272, "grad_norm": 1.9241559785813132, "learning_rate": 1.5838218053927314e-05, "loss": 8.90761947631836, "step": 193, "token_acc": 0.014950170275573287 }, { "epoch": 0.11374963353855175, "grad_norm": 1.7736670508127494, "learning_rate": 1.5920281359906212e-05, "loss": 8.877954483032227, "step": 194, "token_acc": 0.015467830707914215 }, { "epoch": 0.11433597185576078, "grad_norm": 1.5080366661739326, "learning_rate": 1.600234466588511e-05, "loss": 8.865504264831543, "step": 195, "token_acc": 0.015455776173285198 }, { "epoch": 0.1149223101729698, "grad_norm": 2.438855266656718, "learning_rate": 1.6084407971864008e-05, "loss": 8.839834213256836, "step": 196, "token_acc": 0.016078665171384073 }, { "epoch": 0.11550864849017883, "grad_norm": 2.127691573927158, "learning_rate": 1.6166471277842906e-05, "loss": 8.907093048095703, "step": 197, "token_acc": 0.015969636207390434 }, { "epoch": 0.11609498680738786, "grad_norm": 3.0162677928241837, "learning_rate": 1.6248534583821804e-05, "loss": 8.805242538452148, "step": 198, "token_acc": 0.016205660322364396 }, { "epoch": 0.11668132512459689, "grad_norm": 2.3716217136494353, "learning_rate": 1.6330597889800702e-05, "loss": 8.783966064453125, "step": 199, "token_acc": 0.016431821422552922 }, { "epoch": 0.11726766344180592, "grad_norm": 3.2748743629557824, "learning_rate": 1.64126611957796e-05, "loss": 8.744998931884766, "step": 200, "token_acc": 0.015434622618366178 }, { "epoch": 0.11785400175901495, "grad_norm": 3.6470104627077555, "learning_rate": 1.6494724501758498e-05, "loss": 8.865713119506836, "step": 201, "token_acc": 0.015887437121664252 }, { "epoch": 0.11844034007622398, "grad_norm": 2.29250274115636, "learning_rate": 1.6576787807737396e-05, "loss": 8.724906921386719, "step": 202, "token_acc": 0.016373821165205565 }, { "epoch": 0.119026678393433, "grad_norm": 1.7072872873335114, "learning_rate": 1.6658851113716294e-05, "loss": 8.845634460449219, "step": 203, "token_acc": 0.01621559326650829 }, { "epoch": 0.11961301671064203, "grad_norm": 2.5391311504519374, "learning_rate": 1.6740914419695192e-05, "loss": 8.695985794067383, "step": 204, "token_acc": 0.017065286958205375 }, { "epoch": 0.12019935502785108, "grad_norm": 2.022096895331037, "learning_rate": 1.682297772567409e-05, "loss": 8.82380199432373, "step": 205, "token_acc": 0.016400515653967947 }, { "epoch": 0.1207856933450601, "grad_norm": 3.698623099299713, "learning_rate": 1.6905041031652988e-05, "loss": 8.76661491394043, "step": 206, "token_acc": 0.01617085006334142 }, { "epoch": 0.12137203166226913, "grad_norm": 3.562685569080831, "learning_rate": 1.6987104337631886e-05, "loss": 8.79345417022705, "step": 207, "token_acc": 0.015884653625936582 }, { "epoch": 0.12195836997947816, "grad_norm": 1.9644534950012935, "learning_rate": 1.7069167643610784e-05, "loss": 8.809921264648438, "step": 208, "token_acc": 0.01643835616438356 }, { "epoch": 0.12254470829668719, "grad_norm": 1.7221158612192842, "learning_rate": 1.7151230949589682e-05, "loss": 8.77109146118164, "step": 209, "token_acc": 0.015771729036000628 }, { "epoch": 0.12313104661389622, "grad_norm": 4.371569377140252, "learning_rate": 1.723329425556858e-05, "loss": 8.699161529541016, "step": 210, "token_acc": 0.016544503731353202 }, { "epoch": 0.12371738493110525, "grad_norm": 2.414047499445958, "learning_rate": 1.7315357561547478e-05, "loss": 8.802984237670898, "step": 211, "token_acc": 0.017387951453280207 }, { "epoch": 0.12430372324831428, "grad_norm": 5.732381832473199, "learning_rate": 1.7397420867526376e-05, "loss": 8.686369895935059, "step": 212, "token_acc": 0.016319822218106902 }, { "epoch": 0.1248900615655233, "grad_norm": 4.903230529198032, "learning_rate": 1.7479484173505274e-05, "loss": 8.629064559936523, "step": 213, "token_acc": 0.01756457036560063 }, { "epoch": 0.12547639988273235, "grad_norm": 2.6330107001735357, "learning_rate": 1.7561547479484172e-05, "loss": 8.737181663513184, "step": 214, "token_acc": 0.016296636656955167 }, { "epoch": 0.12606273819994138, "grad_norm": 1.8675640446596997, "learning_rate": 1.764361078546307e-05, "loss": 8.717007637023926, "step": 215, "token_acc": 0.015939513686357605 }, { "epoch": 0.1266490765171504, "grad_norm": 4.7828685900387535, "learning_rate": 1.7725674091441968e-05, "loss": 8.679970741271973, "step": 216, "token_acc": 0.017213250035849663 }, { "epoch": 0.12723541483435943, "grad_norm": 3.650826995216099, "learning_rate": 1.7807737397420866e-05, "loss": 8.63546371459961, "step": 217, "token_acc": 0.016065830721003135 }, { "epoch": 0.12782175315156846, "grad_norm": 3.51135271677668, "learning_rate": 1.7889800703399764e-05, "loss": 8.736555099487305, "step": 218, "token_acc": 0.016175878338191875 }, { "epoch": 0.1284080914687775, "grad_norm": 3.5710733832505324, "learning_rate": 1.7971864009378662e-05, "loss": 8.606273651123047, "step": 219, "token_acc": 0.01632660509323344 }, { "epoch": 0.12899442978598652, "grad_norm": 3.181540331467067, "learning_rate": 1.8053927315357556e-05, "loss": 8.612173080444336, "step": 220, "token_acc": 0.01634141056200686 }, { "epoch": 0.12958076810319555, "grad_norm": 2.6383588906249344, "learning_rate": 1.8135990621336458e-05, "loss": 8.658451080322266, "step": 221, "token_acc": 0.017745876685469356 }, { "epoch": 0.13016710642040458, "grad_norm": 3.941296506495943, "learning_rate": 1.8218053927315356e-05, "loss": 8.594991683959961, "step": 222, "token_acc": 0.017774013841288325 }, { "epoch": 0.1307534447376136, "grad_norm": 3.6636457347707627, "learning_rate": 1.8300117233294254e-05, "loss": 8.673286437988281, "step": 223, "token_acc": 0.0164903084174946 }, { "epoch": 0.13133978305482263, "grad_norm": 3.808915854304146, "learning_rate": 1.8382180539273155e-05, "loss": 8.668746948242188, "step": 224, "token_acc": 0.016302585039617366 }, { "epoch": 0.13192612137203166, "grad_norm": 2.3985883902435345, "learning_rate": 1.846424384525205e-05, "loss": 8.526185989379883, "step": 225, "token_acc": 0.01716734873032881 }, { "epoch": 0.1325124596892407, "grad_norm": 3.6023911529966663, "learning_rate": 1.8546307151230948e-05, "loss": 8.546903610229492, "step": 226, "token_acc": 0.01680478475136238 }, { "epoch": 0.13309879800644972, "grad_norm": 2.6307965541736964, "learning_rate": 1.8628370457209846e-05, "loss": 8.570002555847168, "step": 227, "token_acc": 0.016355015857474297 }, { "epoch": 0.13368513632365875, "grad_norm": 3.4037061692756354, "learning_rate": 1.8710433763188744e-05, "loss": 8.559468269348145, "step": 228, "token_acc": 0.016592329700163905 }, { "epoch": 0.13427147464086778, "grad_norm": 2.823218410919002, "learning_rate": 1.8792497069167642e-05, "loss": 8.55791187286377, "step": 229, "token_acc": 0.01710821801408109 }, { "epoch": 0.1348578129580768, "grad_norm": 3.812520602189062, "learning_rate": 1.887456037514654e-05, "loss": 8.501962661743164, "step": 230, "token_acc": 0.017194684134282295 }, { "epoch": 0.13544415127528583, "grad_norm": 3.193214573723467, "learning_rate": 1.8956623681125438e-05, "loss": 8.538557052612305, "step": 231, "token_acc": 0.01703534659370867 }, { "epoch": 0.13603048959249486, "grad_norm": 3.759886324853259, "learning_rate": 1.9038686987104336e-05, "loss": 8.469820022583008, "step": 232, "token_acc": 0.01721747320951697 }, { "epoch": 0.1366168279097039, "grad_norm": 2.8283854072183128, "learning_rate": 1.9120750293083237e-05, "loss": 8.443784713745117, "step": 233, "token_acc": 0.019059061761727978 }, { "epoch": 0.13720316622691292, "grad_norm": 3.954198915207869, "learning_rate": 1.920281359906213e-05, "loss": 8.47078800201416, "step": 234, "token_acc": 0.01827121258631942 }, { "epoch": 0.13778950454412195, "grad_norm": 4.480390519163684, "learning_rate": 1.928487690504103e-05, "loss": 8.386969566345215, "step": 235, "token_acc": 0.01862292796845987 }, { "epoch": 0.13837584286133098, "grad_norm": 2.4160714716488725, "learning_rate": 1.9366940211019928e-05, "loss": 8.319723129272461, "step": 236, "token_acc": 0.018424398087135287 }, { "epoch": 0.13896218117854, "grad_norm": 4.574217363915171, "learning_rate": 1.944900351699883e-05, "loss": 8.428250312805176, "step": 237, "token_acc": 0.01719873272495711 }, { "epoch": 0.13954851949574903, "grad_norm": 3.142113642341402, "learning_rate": 1.9531066822977724e-05, "loss": 8.369216918945312, "step": 238, "token_acc": 0.020373488364952074 }, { "epoch": 0.14013485781295806, "grad_norm": 6.991475948022251, "learning_rate": 1.961313012895662e-05, "loss": 8.391242980957031, "step": 239, "token_acc": 0.01896038375651311 }, { "epoch": 0.14072119613016712, "grad_norm": 4.36073806027156, "learning_rate": 1.969519343493552e-05, "loss": 8.310012817382812, "step": 240, "token_acc": 0.0186510248740758 }, { "epoch": 0.14130753444737615, "grad_norm": 6.04086116037777, "learning_rate": 1.9777256740914418e-05, "loss": 8.3284912109375, "step": 241, "token_acc": 0.020519232382607094 }, { "epoch": 0.14189387276458518, "grad_norm": 4.783184744366244, "learning_rate": 1.985932004689332e-05, "loss": 8.355376243591309, "step": 242, "token_acc": 0.01939424037023927 }, { "epoch": 0.1424802110817942, "grad_norm": 5.292141128220087, "learning_rate": 1.9941383352872213e-05, "loss": 8.244965553283691, "step": 243, "token_acc": 0.01935396271504026 }, { "epoch": 0.14306654939900323, "grad_norm": 3.5960840951276247, "learning_rate": 2.002344665885111e-05, "loss": 8.289033889770508, "step": 244, "token_acc": 0.019544080687080436 }, { "epoch": 0.14365288771621226, "grad_norm": 7.075470761558914, "learning_rate": 2.010550996483001e-05, "loss": 8.371967315673828, "step": 245, "token_acc": 0.019690230526722086 }, { "epoch": 0.1442392260334213, "grad_norm": 6.061144658817595, "learning_rate": 2.018757327080891e-05, "loss": 8.17292594909668, "step": 246, "token_acc": 0.02201123450576521 }, { "epoch": 0.14482556435063032, "grad_norm": 5.3686971169578825, "learning_rate": 2.0269636576787805e-05, "loss": 8.156320571899414, "step": 247, "token_acc": 0.020546058744945212 }, { "epoch": 0.14541190266783935, "grad_norm": 5.813999207393693, "learning_rate": 2.0351699882766703e-05, "loss": 8.236780166625977, "step": 248, "token_acc": 0.01902194979507667 }, { "epoch": 0.14599824098504838, "grad_norm": 5.754271220090866, "learning_rate": 2.04337631887456e-05, "loss": 8.23939323425293, "step": 249, "token_acc": 0.020121951219512196 }, { "epoch": 0.1465845793022574, "grad_norm": 6.106856803911396, "learning_rate": 2.05158264947245e-05, "loss": 8.162118911743164, "step": 250, "token_acc": 0.020759554231215255 }, { "epoch": 0.14717091761946643, "grad_norm": 3.4240663409771495, "learning_rate": 2.05978898007034e-05, "loss": 8.219804763793945, "step": 251, "token_acc": 0.019182313256929203 }, { "epoch": 0.14775725593667546, "grad_norm": 4.822865595006713, "learning_rate": 2.0679953106682295e-05, "loss": 8.139307022094727, "step": 252, "token_acc": 0.020586942415171748 }, { "epoch": 0.1483435942538845, "grad_norm": 6.702502171739605, "learning_rate": 2.0762016412661193e-05, "loss": 8.039722442626953, "step": 253, "token_acc": 0.02168764768616298 }, { "epoch": 0.14892993257109352, "grad_norm": 3.9043752850950275, "learning_rate": 2.084407971864009e-05, "loss": 8.088377952575684, "step": 254, "token_acc": 0.021260662721157533 }, { "epoch": 0.14951627088830255, "grad_norm": 4.284181847635292, "learning_rate": 2.0926143024618993e-05, "loss": 8.146215438842773, "step": 255, "token_acc": 0.021590979493216034 }, { "epoch": 0.15010260920551158, "grad_norm": 3.491380624884426, "learning_rate": 2.1008206330597887e-05, "loss": 8.097631454467773, "step": 256, "token_acc": 0.022211920316846115 }, { "epoch": 0.1506889475227206, "grad_norm": 11.848809112024556, "learning_rate": 2.1090269636576785e-05, "loss": 8.041440963745117, "step": 257, "token_acc": 0.021933909901265505 }, { "epoch": 0.15127528583992964, "grad_norm": 9.172985930552384, "learning_rate": 2.1172332942555683e-05, "loss": 8.039102554321289, "step": 258, "token_acc": 0.022924036929887474 }, { "epoch": 0.15186162415713866, "grad_norm": 9.194445948985182, "learning_rate": 2.1254396248534585e-05, "loss": 8.055174827575684, "step": 259, "token_acc": 0.024041792857532135 }, { "epoch": 0.1524479624743477, "grad_norm": 6.509250229713062, "learning_rate": 2.133645955451348e-05, "loss": 7.9800825119018555, "step": 260, "token_acc": 0.022898969817458883 }, { "epoch": 0.15303430079155672, "grad_norm": 10.140065009616492, "learning_rate": 2.1418522860492377e-05, "loss": 8.078319549560547, "step": 261, "token_acc": 0.020740872961138922 }, { "epoch": 0.15362063910876575, "grad_norm": 4.3058758575889104, "learning_rate": 2.1500586166471275e-05, "loss": 7.975379467010498, "step": 262, "token_acc": 0.023266629666173325 }, { "epoch": 0.15420697742597478, "grad_norm": 12.060044818859536, "learning_rate": 2.1582649472450173e-05, "loss": 8.065200805664062, "step": 263, "token_acc": 0.024049699087555814 }, { "epoch": 0.1547933157431838, "grad_norm": 10.333809573113374, "learning_rate": 2.1664712778429075e-05, "loss": 7.982080459594727, "step": 264, "token_acc": 0.024507800390212303 }, { "epoch": 0.15537965406039284, "grad_norm": 6.81295307370986, "learning_rate": 2.174677608440797e-05, "loss": 7.957369327545166, "step": 265, "token_acc": 0.024173902818712345 }, { "epoch": 0.15596599237760186, "grad_norm": 6.357761566520793, "learning_rate": 2.1828839390386867e-05, "loss": 7.925359725952148, "step": 266, "token_acc": 0.02534466714108088 }, { "epoch": 0.1565523306948109, "grad_norm": 6.6801064329722815, "learning_rate": 2.1910902696365765e-05, "loss": 7.931022644042969, "step": 267, "token_acc": 0.024822094433108313 }, { "epoch": 0.15713866901201995, "grad_norm": 6.844260065703394, "learning_rate": 2.1992966002344666e-05, "loss": 7.986782073974609, "step": 268, "token_acc": 0.0230308800145842 }, { "epoch": 0.15772500732922898, "grad_norm": 4.084529110818654, "learning_rate": 2.207502930832356e-05, "loss": 7.826689720153809, "step": 269, "token_acc": 0.02608825362662419 }, { "epoch": 0.158311345646438, "grad_norm": 6.692001490056697, "learning_rate": 2.215709261430246e-05, "loss": 7.939952850341797, "step": 270, "token_acc": 0.024584668915782988 }, { "epoch": 0.15889768396364704, "grad_norm": 3.795413463681977, "learning_rate": 2.2239155920281357e-05, "loss": 7.781843185424805, "step": 271, "token_acc": 0.02768234655302416 }, { "epoch": 0.15948402228085606, "grad_norm": 8.508992899379706, "learning_rate": 2.232121922626026e-05, "loss": 7.877396106719971, "step": 272, "token_acc": 0.024171704610924712 }, { "epoch": 0.1600703605980651, "grad_norm": 5.624666565897748, "learning_rate": 2.2403282532239156e-05, "loss": 7.861850261688232, "step": 273, "token_acc": 0.024692416742819932 }, { "epoch": 0.16065669891527412, "grad_norm": 7.457730027433706, "learning_rate": 2.248534583821805e-05, "loss": 7.903068542480469, "step": 274, "token_acc": 0.025924767043416543 }, { "epoch": 0.16124303723248315, "grad_norm": 5.799148446749172, "learning_rate": 2.256740914419695e-05, "loss": 7.890822887420654, "step": 275, "token_acc": 0.025598293273243197 }, { "epoch": 0.16182937554969218, "grad_norm": 6.838781032891721, "learning_rate": 2.2649472450175847e-05, "loss": 7.878671646118164, "step": 276, "token_acc": 0.024970285908336925 }, { "epoch": 0.1624157138669012, "grad_norm": 5.351665955003442, "learning_rate": 2.273153575615475e-05, "loss": 7.777127265930176, "step": 277, "token_acc": 0.02718523177169251 }, { "epoch": 0.16300205218411024, "grad_norm": 5.548791419889535, "learning_rate": 2.2813599062133643e-05, "loss": 7.721002101898193, "step": 278, "token_acc": 0.028221501044071933 }, { "epoch": 0.16358839050131926, "grad_norm": 3.7459441569023872, "learning_rate": 2.289566236811254e-05, "loss": 7.737344741821289, "step": 279, "token_acc": 0.02737406035831494 }, { "epoch": 0.1641747288185283, "grad_norm": 5.710591638749095, "learning_rate": 2.297772567409144e-05, "loss": 7.6797027587890625, "step": 280, "token_acc": 0.028434778283599086 }, { "epoch": 0.16476106713573732, "grad_norm": 2.8138814454535974, "learning_rate": 2.305978898007034e-05, "loss": 7.641755104064941, "step": 281, "token_acc": 0.03108317759822257 }, { "epoch": 0.16534740545294635, "grad_norm": 5.5663994352506245, "learning_rate": 2.3141852286049238e-05, "loss": 7.639070510864258, "step": 282, "token_acc": 0.03185563805553715 }, { "epoch": 0.16593374377015538, "grad_norm": 3.7226460692409256, "learning_rate": 2.3223915592028133e-05, "loss": 7.719721794128418, "step": 283, "token_acc": 0.02765388973791182 }, { "epoch": 0.1665200820873644, "grad_norm": 5.714925507398263, "learning_rate": 2.330597889800703e-05, "loss": 7.6826629638671875, "step": 284, "token_acc": 0.02806624473945453 }, { "epoch": 0.16710642040457344, "grad_norm": 4.2376831842455776, "learning_rate": 2.3388042203985932e-05, "loss": 7.5432844161987305, "step": 285, "token_acc": 0.03027291593041454 }, { "epoch": 0.16769275872178246, "grad_norm": 3.0580963196406143, "learning_rate": 2.347010550996483e-05, "loss": 7.636653423309326, "step": 286, "token_acc": 0.03028059676938421 }, { "epoch": 0.1682790970389915, "grad_norm": 5.686907198101258, "learning_rate": 2.3552168815943725e-05, "loss": 7.544497489929199, "step": 287, "token_acc": 0.03165042220222115 }, { "epoch": 0.16886543535620052, "grad_norm": 5.610947848626733, "learning_rate": 2.3634232121922623e-05, "loss": 7.56590461730957, "step": 288, "token_acc": 0.030103740895081017 }, { "epoch": 0.16945177367340955, "grad_norm": 3.708955920099665, "learning_rate": 2.371629542790152e-05, "loss": 7.51408576965332, "step": 289, "token_acc": 0.03282895792118912 }, { "epoch": 0.17003811199061858, "grad_norm": 5.868576732270801, "learning_rate": 2.3798358733880422e-05, "loss": 7.476420879364014, "step": 290, "token_acc": 0.033566991508519974 }, { "epoch": 0.1706244503078276, "grad_norm": 6.098114302318514, "learning_rate": 2.3880422039859317e-05, "loss": 7.493291854858398, "step": 291, "token_acc": 0.033254375744228626 }, { "epoch": 0.17121078862503664, "grad_norm": 2.5394875645795953, "learning_rate": 2.3962485345838215e-05, "loss": 7.340261459350586, "step": 292, "token_acc": 0.03699549835248983 }, { "epoch": 0.17179712694224566, "grad_norm": 10.201615528266453, "learning_rate": 2.4044548651817113e-05, "loss": 7.498136520385742, "step": 293, "token_acc": 0.03217249717796288 }, { "epoch": 0.1723834652594547, "grad_norm": 6.85334552310493, "learning_rate": 2.4126611957796014e-05, "loss": 7.466062545776367, "step": 294, "token_acc": 0.03243098225425411 }, { "epoch": 0.17296980357666372, "grad_norm": 10.365558575423215, "learning_rate": 2.4208675263774912e-05, "loss": 7.455526351928711, "step": 295, "token_acc": 0.03454595606762255 }, { "epoch": 0.17355614189387278, "grad_norm": 9.3288213244638, "learning_rate": 2.4290738569753807e-05, "loss": 7.399315357208252, "step": 296, "token_acc": 0.037120165306501564 }, { "epoch": 0.1741424802110818, "grad_norm": 4.293712622406385, "learning_rate": 2.4372801875732705e-05, "loss": 7.3079681396484375, "step": 297, "token_acc": 0.03697229640419735 }, { "epoch": 0.17472881852829084, "grad_norm": 6.908889227251877, "learning_rate": 2.4454865181711603e-05, "loss": 7.377861976623535, "step": 298, "token_acc": 0.034878200022312913 }, { "epoch": 0.17531515684549986, "grad_norm": 5.979617318144951, "learning_rate": 2.4536928487690504e-05, "loss": 7.199686050415039, "step": 299, "token_acc": 0.03668531704301895 }, { "epoch": 0.1759014951627089, "grad_norm": 4.574131899597824, "learning_rate": 2.46189917936694e-05, "loss": 7.230719566345215, "step": 300, "token_acc": 0.038200730262956034 }, { "epoch": 0.17648783347991792, "grad_norm": 4.954127023170958, "learning_rate": 2.4701055099648297e-05, "loss": 7.380388259887695, "step": 301, "token_acc": 0.03557278409577408 }, { "epoch": 0.17707417179712695, "grad_norm": 6.066811010061318, "learning_rate": 2.4783118405627195e-05, "loss": 7.241059303283691, "step": 302, "token_acc": 0.03938199338987775 }, { "epoch": 0.17766051011433598, "grad_norm": 4.50315139186192, "learning_rate": 2.4865181711606096e-05, "loss": 7.194522380828857, "step": 303, "token_acc": 0.04079912776458071 }, { "epoch": 0.178246848431545, "grad_norm": 8.445854335168255, "learning_rate": 2.4947245017584994e-05, "loss": 7.182653427124023, "step": 304, "token_acc": 0.04091390896674692 }, { "epoch": 0.17883318674875404, "grad_norm": 3.3369167266364856, "learning_rate": 2.502930832356389e-05, "loss": 7.139228820800781, "step": 305, "token_acc": 0.04075027757435449 }, { "epoch": 0.17941952506596306, "grad_norm": 11.262799050853033, "learning_rate": 2.5111371629542786e-05, "loss": 7.123529434204102, "step": 306, "token_acc": 0.03972182503574678 }, { "epoch": 0.1800058633831721, "grad_norm": 7.866751245262843, "learning_rate": 2.5193434935521688e-05, "loss": 7.234379291534424, "step": 307, "token_acc": 0.039482702349869454 }, { "epoch": 0.18059220170038112, "grad_norm": 9.196175260009392, "learning_rate": 2.5275498241500586e-05, "loss": 7.211536407470703, "step": 308, "token_acc": 0.040580688893984716 }, { "epoch": 0.18117854001759015, "grad_norm": 6.922032685757308, "learning_rate": 2.535756154747948e-05, "loss": 7.189626693725586, "step": 309, "token_acc": 0.04212458921968454 }, { "epoch": 0.18176487833479918, "grad_norm": 8.383231703396527, "learning_rate": 2.543962485345838e-05, "loss": 7.084568977355957, "step": 310, "token_acc": 0.04413164237465217 }, { "epoch": 0.1823512166520082, "grad_norm": 6.779468807751188, "learning_rate": 2.5521688159437276e-05, "loss": 7.10654354095459, "step": 311, "token_acc": 0.04092412734538333 }, { "epoch": 0.18293755496921724, "grad_norm": 5.594294123579753, "learning_rate": 2.5603751465416178e-05, "loss": 6.957389831542969, "step": 312, "token_acc": 0.04682504123984278 }, { "epoch": 0.18352389328642627, "grad_norm": 5.268304731052235, "learning_rate": 2.5685814771395076e-05, "loss": 7.018155574798584, "step": 313, "token_acc": 0.04502595830806588 }, { "epoch": 0.1841102316036353, "grad_norm": 4.243679147352299, "learning_rate": 2.576787807737397e-05, "loss": 6.998994827270508, "step": 314, "token_acc": 0.04605518946455452 }, { "epoch": 0.18469656992084432, "grad_norm": 5.069872560490284, "learning_rate": 2.584994138335287e-05, "loss": 6.891481399536133, "step": 315, "token_acc": 0.050647868043256154 }, { "epoch": 0.18528290823805335, "grad_norm": 5.966297941491455, "learning_rate": 2.593200468933177e-05, "loss": 6.871021270751953, "step": 316, "token_acc": 0.0521790462501925 }, { "epoch": 0.18586924655526238, "grad_norm": 6.114684395876698, "learning_rate": 2.6014067995310668e-05, "loss": 6.890596389770508, "step": 317, "token_acc": 0.0506723860479895 }, { "epoch": 0.1864555848724714, "grad_norm": 5.512745573353237, "learning_rate": 2.6096131301289562e-05, "loss": 6.8679633140563965, "step": 318, "token_acc": 0.051609993312215836 }, { "epoch": 0.18704192318968044, "grad_norm": 7.477696147298044, "learning_rate": 2.617819460726846e-05, "loss": 6.932560920715332, "step": 319, "token_acc": 0.049224795280894844 }, { "epoch": 0.18762826150688947, "grad_norm": 2.9716454164041948, "learning_rate": 2.626025791324736e-05, "loss": 6.843957901000977, "step": 320, "token_acc": 0.0517449452757845 }, { "epoch": 0.1882145998240985, "grad_norm": 10.569004721389046, "learning_rate": 2.634232121922626e-05, "loss": 6.895000457763672, "step": 321, "token_acc": 0.04795219022188314 }, { "epoch": 0.18880093814130752, "grad_norm": 5.598411440654056, "learning_rate": 2.6424384525205158e-05, "loss": 6.817564964294434, "step": 322, "token_acc": 0.0520371671412045 }, { "epoch": 0.18938727645851655, "grad_norm": 9.834785046839244, "learning_rate": 2.6506447831184052e-05, "loss": 6.821771621704102, "step": 323, "token_acc": 0.05322270779234488 }, { "epoch": 0.18997361477572558, "grad_norm": 7.058848236846204, "learning_rate": 2.658851113716295e-05, "loss": 6.7929582595825195, "step": 324, "token_acc": 0.05393500134120877 }, { "epoch": 0.19055995309293464, "grad_norm": 6.6178315405620065, "learning_rate": 2.667057444314185e-05, "loss": 6.666149139404297, "step": 325, "token_acc": 0.056866230515177654 }, { "epoch": 0.19114629141014366, "grad_norm": 7.00483924683113, "learning_rate": 2.675263774912075e-05, "loss": 6.666651725769043, "step": 326, "token_acc": 0.05690540037547512 }, { "epoch": 0.1917326297273527, "grad_norm": 5.079934705961767, "learning_rate": 2.6834701055099644e-05, "loss": 6.710475921630859, "step": 327, "token_acc": 0.05638290431795751 }, { "epoch": 0.19231896804456172, "grad_norm": 7.277159360110486, "learning_rate": 2.6916764361078542e-05, "loss": 6.624210357666016, "step": 328, "token_acc": 0.06208354033103861 }, { "epoch": 0.19290530636177075, "grad_norm": 6.8930766517698965, "learning_rate": 2.6998827667057443e-05, "loss": 6.631856918334961, "step": 329, "token_acc": 0.05746466339686679 }, { "epoch": 0.19349164467897978, "grad_norm": 10.611747699040464, "learning_rate": 2.708089097303634e-05, "loss": 6.647503852844238, "step": 330, "token_acc": 0.05955072394054548 }, { "epoch": 0.1940779829961888, "grad_norm": 5.910850637883799, "learning_rate": 2.7162954279015236e-05, "loss": 6.596729278564453, "step": 331, "token_acc": 0.061496661220145535 }, { "epoch": 0.19466432131339784, "grad_norm": 10.37649063178904, "learning_rate": 2.7245017584994134e-05, "loss": 6.716401100158691, "step": 332, "token_acc": 0.05534771432103188 }, { "epoch": 0.19525065963060687, "grad_norm": 8.205825164860919, "learning_rate": 2.7327080890973035e-05, "loss": 6.575899600982666, "step": 333, "token_acc": 0.06262224770822368 }, { "epoch": 0.1958369979478159, "grad_norm": 6.738555686603056, "learning_rate": 2.7409144196951933e-05, "loss": 6.6320271492004395, "step": 334, "token_acc": 0.05921731618499614 }, { "epoch": 0.19642333626502492, "grad_norm": 7.608194715912897, "learning_rate": 2.749120750293083e-05, "loss": 6.515623092651367, "step": 335, "token_acc": 0.06527202512637102 }, { "epoch": 0.19700967458223395, "grad_norm": 7.247446421640857, "learning_rate": 2.7573270808909726e-05, "loss": 6.562398910522461, "step": 336, "token_acc": 0.062424494533715603 }, { "epoch": 0.19759601289944298, "grad_norm": 5.647851292128922, "learning_rate": 2.7655334114888624e-05, "loss": 6.436605453491211, "step": 337, "token_acc": 0.07144093387787155 }, { "epoch": 0.198182351216652, "grad_norm": 6.546820186865682, "learning_rate": 2.7737397420867525e-05, "loss": 6.478397846221924, "step": 338, "token_acc": 0.06827630154860767 }, { "epoch": 0.19876868953386104, "grad_norm": 3.3966243818453035, "learning_rate": 2.7819460726846423e-05, "loss": 6.496657848358154, "step": 339, "token_acc": 0.06569103307490173 }, { "epoch": 0.19935502785107007, "grad_norm": 8.922646670057471, "learning_rate": 2.7901524032825318e-05, "loss": 6.423449993133545, "step": 340, "token_acc": 0.06927440159012095 }, { "epoch": 0.1999413661682791, "grad_norm": 5.990033084623942, "learning_rate": 2.7983587338804216e-05, "loss": 6.516849994659424, "step": 341, "token_acc": 0.06816905490842558 }, { "epoch": 0.20052770448548812, "grad_norm": 6.336726318293391, "learning_rate": 2.8065650644783117e-05, "loss": 6.381641387939453, "step": 342, "token_acc": 0.0721427706513254 }, { "epoch": 0.20111404280269715, "grad_norm": 6.29280806854031, "learning_rate": 2.8147713950762015e-05, "loss": 6.475069999694824, "step": 343, "token_acc": 0.06888381798184752 }, { "epoch": 0.20170038111990618, "grad_norm": 6.709775694827274, "learning_rate": 2.8229777256740913e-05, "loss": 6.38390588760376, "step": 344, "token_acc": 0.06849427428402091 }, { "epoch": 0.2022867194371152, "grad_norm": 6.787138640691426, "learning_rate": 2.8311840562719808e-05, "loss": 6.440901279449463, "step": 345, "token_acc": 0.06869756266783159 }, { "epoch": 0.20287305775432424, "grad_norm": 8.186447429958918, "learning_rate": 2.8393903868698706e-05, "loss": 6.4044084548950195, "step": 346, "token_acc": 0.06835312836502146 }, { "epoch": 0.20345939607153327, "grad_norm": 5.239185760055508, "learning_rate": 2.8475967174677607e-05, "loss": 6.290832042694092, "step": 347, "token_acc": 0.07705955315507686 }, { "epoch": 0.2040457343887423, "grad_norm": 2.6794803391264366, "learning_rate": 2.8558030480656505e-05, "loss": 6.281411647796631, "step": 348, "token_acc": 0.07904037878030076 }, { "epoch": 0.20463207270595132, "grad_norm": 9.189856019533304, "learning_rate": 2.86400937866354e-05, "loss": 6.379359722137451, "step": 349, "token_acc": 0.07438556255190483 }, { "epoch": 0.20521841102316035, "grad_norm": 5.985019382753672, "learning_rate": 2.8722157092614298e-05, "loss": 6.338669300079346, "step": 350, "token_acc": 0.07450111772138301 }, { "epoch": 0.20580474934036938, "grad_norm": 8.27772835875237, "learning_rate": 2.88042203985932e-05, "loss": 6.249000549316406, "step": 351, "token_acc": 0.07807650273224044 }, { "epoch": 0.2063910876575784, "grad_norm": 4.968068790964617, "learning_rate": 2.8886283704572097e-05, "loss": 6.238777160644531, "step": 352, "token_acc": 0.07617823458105995 }, { "epoch": 0.20697742597478747, "grad_norm": 7.353257839321968, "learning_rate": 2.8968347010550995e-05, "loss": 6.330348014831543, "step": 353, "token_acc": 0.07336546316393351 }, { "epoch": 0.2075637642919965, "grad_norm": 3.966773831948239, "learning_rate": 2.905041031652989e-05, "loss": 6.253887176513672, "step": 354, "token_acc": 0.07955167491332954 }, { "epoch": 0.20815010260920552, "grad_norm": 8.548288866727106, "learning_rate": 2.913247362250879e-05, "loss": 6.187613010406494, "step": 355, "token_acc": 0.08292580982236154 }, { "epoch": 0.20873644092641455, "grad_norm": 6.2221681668528195, "learning_rate": 2.921453692848769e-05, "loss": 6.29707670211792, "step": 356, "token_acc": 0.07706845373118851 }, { "epoch": 0.20932277924362358, "grad_norm": 5.175913773085352, "learning_rate": 2.9296600234466587e-05, "loss": 6.221586227416992, "step": 357, "token_acc": 0.07976189547023824 }, { "epoch": 0.2099091175608326, "grad_norm": 7.011518564505431, "learning_rate": 2.937866354044548e-05, "loss": 6.142030715942383, "step": 358, "token_acc": 0.08658054366246258 }, { "epoch": 0.21049545587804164, "grad_norm": 5.761545102540839, "learning_rate": 2.946072684642438e-05, "loss": 6.1075592041015625, "step": 359, "token_acc": 0.08501039490152336 }, { "epoch": 0.21108179419525067, "grad_norm": 5.250445543210576, "learning_rate": 2.954279015240328e-05, "loss": 6.084338188171387, "step": 360, "token_acc": 0.09044136552678385 }, { "epoch": 0.2116681325124597, "grad_norm": 5.389902101025379, "learning_rate": 2.962485345838218e-05, "loss": 6.151660919189453, "step": 361, "token_acc": 0.08483443484859904 }, { "epoch": 0.21225447082966872, "grad_norm": 4.914817189225502, "learning_rate": 2.9706916764361077e-05, "loss": 6.1269402503967285, "step": 362, "token_acc": 0.08605764599599587 }, { "epoch": 0.21284080914687775, "grad_norm": 6.574694364726248, "learning_rate": 2.978898007033997e-05, "loss": 6.026739120483398, "step": 363, "token_acc": 0.08999350058494736 }, { "epoch": 0.21342714746408678, "grad_norm": 7.7171852478008285, "learning_rate": 2.9871043376318873e-05, "loss": 6.097996234893799, "step": 364, "token_acc": 0.0869426784567914 }, { "epoch": 0.2140134857812958, "grad_norm": 4.509688260607066, "learning_rate": 2.995310668229777e-05, "loss": 5.972391128540039, "step": 365, "token_acc": 0.09577491295756092 }, { "epoch": 0.21459982409850484, "grad_norm": 5.458653278006507, "learning_rate": 3.003516998827667e-05, "loss": 5.933320045471191, "step": 366, "token_acc": 0.09642404934035861 }, { "epoch": 0.21518616241571387, "grad_norm": 5.683848252394637, "learning_rate": 3.0117233294255564e-05, "loss": 5.965089797973633, "step": 367, "token_acc": 0.09715046809164607 }, { "epoch": 0.2157725007329229, "grad_norm": 5.5727047201970805, "learning_rate": 3.0199296600234465e-05, "loss": 5.945783615112305, "step": 368, "token_acc": 0.0934168092457757 }, { "epoch": 0.21635883905013192, "grad_norm": 7.993806798397042, "learning_rate": 3.0281359906213363e-05, "loss": 5.8814473152160645, "step": 369, "token_acc": 0.10355702275535555 }, { "epoch": 0.21694517736734095, "grad_norm": 4.242216756776699, "learning_rate": 3.036342321219226e-05, "loss": 5.863864421844482, "step": 370, "token_acc": 0.10104954264262676 }, { "epoch": 0.21753151568454998, "grad_norm": 7.12559426922248, "learning_rate": 3.0445486518171155e-05, "loss": 5.851903438568115, "step": 371, "token_acc": 0.0988907776975681 }, { "epoch": 0.218117854001759, "grad_norm": 2.562175766758614, "learning_rate": 3.052754982415006e-05, "loss": 5.892111301422119, "step": 372, "token_acc": 0.10034013155282627 }, { "epoch": 0.21870419231896804, "grad_norm": 8.906073138903627, "learning_rate": 3.0609613130128955e-05, "loss": 5.915294647216797, "step": 373, "token_acc": 0.09728479833643808 }, { "epoch": 0.21929053063617707, "grad_norm": 5.76900127275279, "learning_rate": 3.069167643610785e-05, "loss": 5.888400077819824, "step": 374, "token_acc": 0.09916352260368695 }, { "epoch": 0.2198768689533861, "grad_norm": 7.010054252869409, "learning_rate": 3.077373974208675e-05, "loss": 5.904424667358398, "step": 375, "token_acc": 0.09839418428646998 }, { "epoch": 0.22046320727059512, "grad_norm": 5.5435753806715065, "learning_rate": 3.085580304806565e-05, "loss": 5.814133167266846, "step": 376, "token_acc": 0.10243825909607918 }, { "epoch": 0.22104954558780415, "grad_norm": 4.364383601823679, "learning_rate": 3.093786635404455e-05, "loss": 5.871279716491699, "step": 377, "token_acc": 0.10116971763000714 }, { "epoch": 0.22163588390501318, "grad_norm": 8.100806956559703, "learning_rate": 3.1019929660023445e-05, "loss": 5.793034553527832, "step": 378, "token_acc": 0.10408344137827652 }, { "epoch": 0.2222222222222222, "grad_norm": 6.049077745274884, "learning_rate": 3.110199296600234e-05, "loss": 5.801364898681641, "step": 379, "token_acc": 0.10839996459445904 }, { "epoch": 0.22280856053943124, "grad_norm": 3.386272956144011, "learning_rate": 3.118405627198124e-05, "loss": 5.733031272888184, "step": 380, "token_acc": 0.1094655165123261 }, { "epoch": 0.2233948988566403, "grad_norm": 9.456867599845966, "learning_rate": 3.126611957796014e-05, "loss": 5.719071388244629, "step": 381, "token_acc": 0.1083124224094667 }, { "epoch": 0.22398123717384932, "grad_norm": 4.330384081141098, "learning_rate": 3.134818288393904e-05, "loss": 5.669560432434082, "step": 382, "token_acc": 0.11199709669339958 }, { "epoch": 0.22456757549105835, "grad_norm": 9.978848251326847, "learning_rate": 3.1430246189917935e-05, "loss": 5.727608680725098, "step": 383, "token_acc": 0.10876805574724167 }, { "epoch": 0.22515391380826738, "grad_norm": 6.950342593424276, "learning_rate": 3.151230949589683e-05, "loss": 5.697427749633789, "step": 384, "token_acc": 0.11248477563676398 }, { "epoch": 0.2257402521254764, "grad_norm": 6.479830826986394, "learning_rate": 3.159437280187573e-05, "loss": 5.694333076477051, "step": 385, "token_acc": 0.10902044049902239 }, { "epoch": 0.22632659044268544, "grad_norm": 6.388302184794283, "learning_rate": 3.167643610785463e-05, "loss": 5.715522766113281, "step": 386, "token_acc": 0.11104647859506346 }, { "epoch": 0.22691292875989447, "grad_norm": 4.9447274118658235, "learning_rate": 3.1758499413833527e-05, "loss": 5.690276145935059, "step": 387, "token_acc": 0.11082315420356849 }, { "epoch": 0.2274992670771035, "grad_norm": 6.825288287594065, "learning_rate": 3.1840562719812425e-05, "loss": 5.664138317108154, "step": 388, "token_acc": 0.11341331572703575 }, { "epoch": 0.22808560539431252, "grad_norm": 5.728998858379683, "learning_rate": 3.192262602579132e-05, "loss": 5.697914123535156, "step": 389, "token_acc": 0.1109388931511394 }, { "epoch": 0.22867194371152155, "grad_norm": 3.3265138383471538, "learning_rate": 3.200468933177022e-05, "loss": 5.575888156890869, "step": 390, "token_acc": 0.1199025003352537 }, { "epoch": 0.22925828202873058, "grad_norm": 10.459675562249998, "learning_rate": 3.208675263774912e-05, "loss": 5.606986045837402, "step": 391, "token_acc": 0.12034323599185881 }, { "epoch": 0.2298446203459396, "grad_norm": 4.868130814850591, "learning_rate": 3.2168815943728016e-05, "loss": 5.64790678024292, "step": 392, "token_acc": 0.11590617389799782 }, { "epoch": 0.23043095866314864, "grad_norm": 12.768720719385545, "learning_rate": 3.2250879249706914e-05, "loss": 5.650926113128662, "step": 393, "token_acc": 0.110319839414737 }, { "epoch": 0.23101729698035767, "grad_norm": 7.090300637662734, "learning_rate": 3.233294255568581e-05, "loss": 5.5357208251953125, "step": 394, "token_acc": 0.11874065864793143 }, { "epoch": 0.2316036352975667, "grad_norm": 11.974542948234529, "learning_rate": 3.241500586166471e-05, "loss": 5.676987648010254, "step": 395, "token_acc": 0.11116802605828967 }, { "epoch": 0.23218997361477572, "grad_norm": 9.550487573974742, "learning_rate": 3.249706916764361e-05, "loss": 5.605356216430664, "step": 396, "token_acc": 0.11959896507115136 }, { "epoch": 0.23277631193198475, "grad_norm": 7.752182120700802, "learning_rate": 3.2579132473622506e-05, "loss": 5.620063781738281, "step": 397, "token_acc": 0.1139806145100941 }, { "epoch": 0.23336265024919378, "grad_norm": 6.901103737347299, "learning_rate": 3.2661195779601404e-05, "loss": 5.656468391418457, "step": 398, "token_acc": 0.10943400215099794 }, { "epoch": 0.2339489885664028, "grad_norm": 4.86173992333123, "learning_rate": 3.27432590855803e-05, "loss": 5.540590286254883, "step": 399, "token_acc": 0.12205815544498297 }, { "epoch": 0.23453532688361184, "grad_norm": 4.37658279856578, "learning_rate": 3.28253223915592e-05, "loss": 5.512296199798584, "step": 400, "token_acc": 0.12221467534382174 }, { "epoch": 0.23512166520082087, "grad_norm": 7.610321730055148, "learning_rate": 3.29073856975381e-05, "loss": 5.504565715789795, "step": 401, "token_acc": 0.12283261098064342 }, { "epoch": 0.2357080035180299, "grad_norm": 4.834560929092718, "learning_rate": 3.2989449003516996e-05, "loss": 5.471881866455078, "step": 402, "token_acc": 0.12622801565050978 }, { "epoch": 0.23629434183523892, "grad_norm": 6.4863374990144305, "learning_rate": 3.3071512309495894e-05, "loss": 5.448960304260254, "step": 403, "token_acc": 0.1272094616334707 }, { "epoch": 0.23688068015244795, "grad_norm": 6.376514340996239, "learning_rate": 3.315357561547479e-05, "loss": 5.4967474937438965, "step": 404, "token_acc": 0.12428196786789786 }, { "epoch": 0.23746701846965698, "grad_norm": 4.603992644435482, "learning_rate": 3.323563892145369e-05, "loss": 5.3730878829956055, "step": 405, "token_acc": 0.13343197910041368 }, { "epoch": 0.238053356786866, "grad_norm": 7.644842595241106, "learning_rate": 3.331770222743259e-05, "loss": 5.453896999359131, "step": 406, "token_acc": 0.12670379307884033 }, { "epoch": 0.23863969510407504, "grad_norm": 4.423087088557715, "learning_rate": 3.3399765533411486e-05, "loss": 5.409916877746582, "step": 407, "token_acc": 0.1314650683821998 }, { "epoch": 0.23922603342128407, "grad_norm": 6.152294869594081, "learning_rate": 3.3481828839390384e-05, "loss": 5.451051712036133, "step": 408, "token_acc": 0.12499536573610648 }, { "epoch": 0.23981237173849312, "grad_norm": 5.832180776655737, "learning_rate": 3.356389214536928e-05, "loss": 5.37488317489624, "step": 409, "token_acc": 0.13363264297599303 }, { "epoch": 0.24039871005570215, "grad_norm": 4.519219135834628, "learning_rate": 3.364595545134818e-05, "loss": 5.371809959411621, "step": 410, "token_acc": 0.13427762499290158 }, { "epoch": 0.24098504837291118, "grad_norm": 3.9662349757634425, "learning_rate": 3.372801875732708e-05, "loss": 5.233335494995117, "step": 411, "token_acc": 0.14384437187752938 }, { "epoch": 0.2415713866901202, "grad_norm": 6.175410218180845, "learning_rate": 3.3810082063305976e-05, "loss": 5.339487075805664, "step": 412, "token_acc": 0.1353152362682007 }, { "epoch": 0.24215772500732924, "grad_norm": 5.734904973769652, "learning_rate": 3.3892145369284874e-05, "loss": 5.325626850128174, "step": 413, "token_acc": 0.13544106760771907 }, { "epoch": 0.24274406332453827, "grad_norm": 6.748808978080904, "learning_rate": 3.397420867526377e-05, "loss": 5.316834449768066, "step": 414, "token_acc": 0.13883175702324893 }, { "epoch": 0.2433304016417473, "grad_norm": 3.733223510404855, "learning_rate": 3.405627198124267e-05, "loss": 5.31654167175293, "step": 415, "token_acc": 0.1347342103381333 }, { "epoch": 0.24391673995895632, "grad_norm": 5.68343905444421, "learning_rate": 3.413833528722157e-05, "loss": 5.376072883605957, "step": 416, "token_acc": 0.12966109854763955 }, { "epoch": 0.24450307827616535, "grad_norm": 5.587575454541941, "learning_rate": 3.4220398593200466e-05, "loss": 5.255402088165283, "step": 417, "token_acc": 0.14047008413348763 }, { "epoch": 0.24508941659337438, "grad_norm": 4.8150777335325765, "learning_rate": 3.4302461899179364e-05, "loss": 5.2430009841918945, "step": 418, "token_acc": 0.14188434890249677 }, { "epoch": 0.2456757549105834, "grad_norm": 6.71383271808846, "learning_rate": 3.438452520515826e-05, "loss": 5.282718658447266, "step": 419, "token_acc": 0.13470143134912269 }, { "epoch": 0.24626209322779244, "grad_norm": 8.138263407057881, "learning_rate": 3.446658851113716e-05, "loss": 5.304866790771484, "step": 420, "token_acc": 0.13565958635386735 }, { "epoch": 0.24684843154500147, "grad_norm": 5.212040391716807, "learning_rate": 3.454865181711606e-05, "loss": 5.295950889587402, "step": 421, "token_acc": 0.13749651700378462 }, { "epoch": 0.2474347698622105, "grad_norm": 7.165890640559476, "learning_rate": 3.4630715123094956e-05, "loss": 5.2838006019592285, "step": 422, "token_acc": 0.1374077461069823 }, { "epoch": 0.24802110817941952, "grad_norm": 4.556210119101287, "learning_rate": 3.4712778429073854e-05, "loss": 5.276226997375488, "step": 423, "token_acc": 0.13886151588216794 }, { "epoch": 0.24860744649662855, "grad_norm": 6.7196136739153465, "learning_rate": 3.479484173505275e-05, "loss": 5.141275405883789, "step": 424, "token_acc": 0.14905236454897539 }, { "epoch": 0.24919378481383758, "grad_norm": 4.5244675263915655, "learning_rate": 3.487690504103165e-05, "loss": 5.1395721435546875, "step": 425, "token_acc": 0.1501625551889078 }, { "epoch": 0.2497801231310466, "grad_norm": 4.305755272978437, "learning_rate": 3.495896834701055e-05, "loss": 5.161777496337891, "step": 426, "token_acc": 0.14801628329407618 }, { "epoch": 0.25036646144825564, "grad_norm": 5.658256696784875, "learning_rate": 3.5041031652989446e-05, "loss": 5.184259414672852, "step": 427, "token_acc": 0.14238316969091688 }, { "epoch": 0.2509527997654647, "grad_norm": 4.972662889303137, "learning_rate": 3.5123094958968344e-05, "loss": 5.185224533081055, "step": 428, "token_acc": 0.14612326306684267 }, { "epoch": 0.2515391380826737, "grad_norm": 5.705291829231124, "learning_rate": 3.520515826494724e-05, "loss": 5.1912007331848145, "step": 429, "token_acc": 0.1475584146309046 }, { "epoch": 0.25212547639988275, "grad_norm": 6.659758070748966, "learning_rate": 3.528722157092614e-05, "loss": 5.212825775146484, "step": 430, "token_acc": 0.1454485756755106 }, { "epoch": 0.25271181471709175, "grad_norm": 5.268090333457292, "learning_rate": 3.536928487690503e-05, "loss": 5.073171615600586, "step": 431, "token_acc": 0.15389058861718796 }, { "epoch": 0.2532981530343008, "grad_norm": 5.398514606098428, "learning_rate": 3.5451348182883936e-05, "loss": 5.135335922241211, "step": 432, "token_acc": 0.14954449788394764 }, { "epoch": 0.2538844913515098, "grad_norm": 6.6646015579131195, "learning_rate": 3.5533411488862834e-05, "loss": 5.15054178237915, "step": 433, "token_acc": 0.14666734936828765 }, { "epoch": 0.25447082966871887, "grad_norm": 3.559878286621562, "learning_rate": 3.561547479484173e-05, "loss": 5.091745376586914, "step": 434, "token_acc": 0.15107027747548854 }, { "epoch": 0.25505716798592787, "grad_norm": 8.224332800984234, "learning_rate": 3.569753810082063e-05, "loss": 5.09508752822876, "step": 435, "token_acc": 0.14848039694076193 }, { "epoch": 0.2556435063031369, "grad_norm": 4.4820530093887765, "learning_rate": 3.577960140679953e-05, "loss": 5.1070709228515625, "step": 436, "token_acc": 0.14960491990600006 }, { "epoch": 0.2562298446203459, "grad_norm": 5.916449641785716, "learning_rate": 3.5861664712778426e-05, "loss": 5.155655860900879, "step": 437, "token_acc": 0.14843973358464194 }, { "epoch": 0.256816182937555, "grad_norm": 5.6588710286094654, "learning_rate": 3.5943728018757324e-05, "loss": 5.150594234466553, "step": 438, "token_acc": 0.14711758852548973 }, { "epoch": 0.257402521254764, "grad_norm": 5.807443482602914, "learning_rate": 3.602579132473622e-05, "loss": 5.041116714477539, "step": 439, "token_acc": 0.15865883257256277 }, { "epoch": 0.25798885957197304, "grad_norm": 3.066935402216836, "learning_rate": 3.610785463071511e-05, "loss": 5.028489589691162, "step": 440, "token_acc": 0.1578670890679425 }, { "epoch": 0.25857519788918204, "grad_norm": 7.454903739282551, "learning_rate": 3.618991793669402e-05, "loss": 5.084744930267334, "step": 441, "token_acc": 0.15124455327694783 }, { "epoch": 0.2591615362063911, "grad_norm": 4.855878422769349, "learning_rate": 3.6271981242672916e-05, "loss": 5.067684173583984, "step": 442, "token_acc": 0.15359699438766833 }, { "epoch": 0.2597478745236001, "grad_norm": 5.250694288037539, "learning_rate": 3.6354044548651814e-05, "loss": 5.074548244476318, "step": 443, "token_acc": 0.14843909845686656 }, { "epoch": 0.26033421284080915, "grad_norm": 6.064142847715136, "learning_rate": 3.643610785463071e-05, "loss": 5.060888290405273, "step": 444, "token_acc": 0.15394387372898669 }, { "epoch": 0.26092055115801815, "grad_norm": 4.8974284209180645, "learning_rate": 3.651817116060961e-05, "loss": 5.02075719833374, "step": 445, "token_acc": 0.15538345224901023 }, { "epoch": 0.2615068894752272, "grad_norm": 4.8504137942003664, "learning_rate": 3.660023446658851e-05, "loss": 5.036099433898926, "step": 446, "token_acc": 0.15459575586157864 }, { "epoch": 0.2620932277924362, "grad_norm": 2.9613863602936634, "learning_rate": 3.6682297772567406e-05, "loss": 4.953916549682617, "step": 447, "token_acc": 0.1619095717957014 }, { "epoch": 0.26267956610964527, "grad_norm": 7.017908950738959, "learning_rate": 3.676436107854631e-05, "loss": 5.088875770568848, "step": 448, "token_acc": 0.14751002443953068 }, { "epoch": 0.26326590442685427, "grad_norm": 4.387576226004965, "learning_rate": 3.68464243845252e-05, "loss": 4.966022491455078, "step": 449, "token_acc": 0.16129543176731456 }, { "epoch": 0.2638522427440633, "grad_norm": 5.305816905622403, "learning_rate": 3.69284876905041e-05, "loss": 5.052915096282959, "step": 450, "token_acc": 0.15338949940870916 }, { "epoch": 0.2644385810612723, "grad_norm": 6.035451566160448, "learning_rate": 3.7010550996483e-05, "loss": 5.048089027404785, "step": 451, "token_acc": 0.15400516375832743 }, { "epoch": 0.2650249193784814, "grad_norm": 3.5121201704922425, "learning_rate": 3.7092614302461896e-05, "loss": 5.022364139556885, "step": 452, "token_acc": 0.15661915875225468 }, { "epoch": 0.26561125769569044, "grad_norm": 6.088430150586162, "learning_rate": 3.7174677608440794e-05, "loss": 4.956811904907227, "step": 453, "token_acc": 0.1606962224884909 }, { "epoch": 0.26619759601289944, "grad_norm": 3.9191383038118106, "learning_rate": 3.725674091441969e-05, "loss": 4.872303009033203, "step": 454, "token_acc": 0.16753443106820165 }, { "epoch": 0.2667839343301085, "grad_norm": 6.961416911227017, "learning_rate": 3.733880422039859e-05, "loss": 5.049225330352783, "step": 455, "token_acc": 0.1523634816120787 }, { "epoch": 0.2673702726473175, "grad_norm": 4.778722044033276, "learning_rate": 3.742086752637749e-05, "loss": 4.90943717956543, "step": 456, "token_acc": 0.16553806617410857 }, { "epoch": 0.26795661096452655, "grad_norm": 4.692481899166697, "learning_rate": 3.750293083235639e-05, "loss": 4.986898422241211, "step": 457, "token_acc": 0.16026830837085362 }, { "epoch": 0.26854294928173555, "grad_norm": 4.580578061397562, "learning_rate": 3.7584994138335283e-05, "loss": 4.964858055114746, "step": 458, "token_acc": 0.15767877457075585 }, { "epoch": 0.2691292875989446, "grad_norm": 3.8708301193200363, "learning_rate": 3.766705744431418e-05, "loss": 4.976293087005615, "step": 459, "token_acc": 0.15764734617093534 }, { "epoch": 0.2697156259161536, "grad_norm": 5.22549963399129, "learning_rate": 3.774912075029308e-05, "loss": 4.969941139221191, "step": 460, "token_acc": 0.15727248148494413 }, { "epoch": 0.27030196423336267, "grad_norm": 3.9221587888364593, "learning_rate": 3.783118405627198e-05, "loss": 4.986678600311279, "step": 461, "token_acc": 0.1555507480943758 }, { "epoch": 0.27088830255057167, "grad_norm": 5.76792278190766, "learning_rate": 3.7913247362250875e-05, "loss": 4.886974811553955, "step": 462, "token_acc": 0.16748608313640906 }, { "epoch": 0.2714746408677807, "grad_norm": 2.927549106959559, "learning_rate": 3.799531066822977e-05, "loss": 4.819583415985107, "step": 463, "token_acc": 0.1731849634498167 }, { "epoch": 0.2720609791849897, "grad_norm": 4.311879057406091, "learning_rate": 3.807737397420867e-05, "loss": 4.862274646759033, "step": 464, "token_acc": 0.16724169814809922 }, { "epoch": 0.2726473175021988, "grad_norm": 7.9268557112795035, "learning_rate": 3.815943728018757e-05, "loss": 4.993745803833008, "step": 465, "token_acc": 0.15440605642022304 }, { "epoch": 0.2732336558194078, "grad_norm": 5.174726235951455, "learning_rate": 3.8241500586166474e-05, "loss": 4.876766204833984, "step": 466, "token_acc": 0.16630642965378786 }, { "epoch": 0.27381999413661684, "grad_norm": 5.324514735526076, "learning_rate": 3.8323563892145365e-05, "loss": 4.9098286628723145, "step": 467, "token_acc": 0.1622337877160335 }, { "epoch": 0.27440633245382584, "grad_norm": 3.8230161864156154, "learning_rate": 3.840562719812426e-05, "loss": 4.8760504722595215, "step": 468, "token_acc": 0.1670110063483716 }, { "epoch": 0.2749926707710349, "grad_norm": 6.531581457449913, "learning_rate": 3.848769050410316e-05, "loss": 4.856128215789795, "step": 469, "token_acc": 0.16428578978521902 }, { "epoch": 0.2755790090882439, "grad_norm": 3.8723758469892884, "learning_rate": 3.856975381008206e-05, "loss": 4.870810508728027, "step": 470, "token_acc": 0.16378886984870017 }, { "epoch": 0.27616534740545295, "grad_norm": 6.060564796565729, "learning_rate": 3.865181711606096e-05, "loss": 4.866532325744629, "step": 471, "token_acc": 0.16861252224979537 }, { "epoch": 0.27675168572266196, "grad_norm": 4.158868558541277, "learning_rate": 3.8733880422039855e-05, "loss": 4.863700866699219, "step": 472, "token_acc": 0.16568546753942434 }, { "epoch": 0.277338024039871, "grad_norm": 6.890178880254814, "learning_rate": 3.881594372801875e-05, "loss": 4.900069236755371, "step": 473, "token_acc": 0.16240871789593653 }, { "epoch": 0.27792436235708, "grad_norm": 6.075839627985804, "learning_rate": 3.889800703399766e-05, "loss": 4.827624320983887, "step": 474, "token_acc": 0.1690576413176927 }, { "epoch": 0.27851070067428907, "grad_norm": 3.7536917237957907, "learning_rate": 3.8980070339976556e-05, "loss": 4.7210211753845215, "step": 475, "token_acc": 0.1784072418205659 }, { "epoch": 0.27909703899149807, "grad_norm": 7.846119632939475, "learning_rate": 3.906213364595545e-05, "loss": 4.902039527893066, "step": 476, "token_acc": 0.1612549185392345 }, { "epoch": 0.2796833773087071, "grad_norm": 4.604159514257903, "learning_rate": 3.9144196951934345e-05, "loss": 4.826611518859863, "step": 477, "token_acc": 0.16720481964997705 }, { "epoch": 0.2802697156259161, "grad_norm": 6.571972386929722, "learning_rate": 3.922626025791324e-05, "loss": 4.8582658767700195, "step": 478, "token_acc": 0.17041208929617874 }, { "epoch": 0.2808560539431252, "grad_norm": 4.595181361235622, "learning_rate": 3.930832356389214e-05, "loss": 4.912710666656494, "step": 479, "token_acc": 0.16197342217131086 }, { "epoch": 0.28144239226033424, "grad_norm": 4.940192232931624, "learning_rate": 3.939038686987104e-05, "loss": 4.774998664855957, "step": 480, "token_acc": 0.17084471156987 }, { "epoch": 0.28202873057754324, "grad_norm": 5.112579533634725, "learning_rate": 3.947245017584994e-05, "loss": 4.776060581207275, "step": 481, "token_acc": 0.1709170510515206 }, { "epoch": 0.2826150688947523, "grad_norm": 5.966988350355773, "learning_rate": 3.9554513481828835e-05, "loss": 4.813092231750488, "step": 482, "token_acc": 0.16952886977239423 }, { "epoch": 0.2832014072119613, "grad_norm": 5.007591138618022, "learning_rate": 3.963657678780774e-05, "loss": 4.825842380523682, "step": 483, "token_acc": 0.16916373643173827 }, { "epoch": 0.28378774552917035, "grad_norm": 5.051898317509681, "learning_rate": 3.971864009378664e-05, "loss": 4.794732093811035, "step": 484, "token_acc": 0.1692357963091669 }, { "epoch": 0.28437408384637936, "grad_norm": 4.711763343865885, "learning_rate": 3.980070339976553e-05, "loss": 4.788509368896484, "step": 485, "token_acc": 0.17035854400166878 }, { "epoch": 0.2849604221635884, "grad_norm": 3.819670135340478, "learning_rate": 3.988276670574443e-05, "loss": 4.711777687072754, "step": 486, "token_acc": 0.17715314858534165 }, { "epoch": 0.2855467604807974, "grad_norm": 5.970907743815383, "learning_rate": 3.9964830011723325e-05, "loss": 4.755890846252441, "step": 487, "token_acc": 0.17389933774114566 }, { "epoch": 0.28613309879800647, "grad_norm": 4.508949535865645, "learning_rate": 4.004689331770222e-05, "loss": 4.786577224731445, "step": 488, "token_acc": 0.17178006323067477 }, { "epoch": 0.28671943711521547, "grad_norm": 4.7039836772607275, "learning_rate": 4.012895662368112e-05, "loss": 4.661779880523682, "step": 489, "token_acc": 0.18148803993658003 }, { "epoch": 0.2873057754324245, "grad_norm": 4.099877234508298, "learning_rate": 4.021101992966002e-05, "loss": 4.757030010223389, "step": 490, "token_acc": 0.17181380069671318 }, { "epoch": 0.2878921137496335, "grad_norm": 7.878111921252635, "learning_rate": 4.029308323563892e-05, "loss": 4.677789688110352, "step": 491, "token_acc": 0.17885842897538257 }, { "epoch": 0.2884784520668426, "grad_norm": 3.991116361814418, "learning_rate": 4.037514654161782e-05, "loss": 4.732224464416504, "step": 492, "token_acc": 0.17598534656474948 }, { "epoch": 0.2890647903840516, "grad_norm": 8.77967181210225, "learning_rate": 4.045720984759672e-05, "loss": 4.756728172302246, "step": 493, "token_acc": 0.1733485132601125 }, { "epoch": 0.28965112870126064, "grad_norm": 5.715927704378967, "learning_rate": 4.053927315357561e-05, "loss": 4.751640796661377, "step": 494, "token_acc": 0.17318351413720964 }, { "epoch": 0.29023746701846964, "grad_norm": 6.802560523270176, "learning_rate": 4.062133645955451e-05, "loss": 4.760959625244141, "step": 495, "token_acc": 0.17220478182843135 }, { "epoch": 0.2908238053356787, "grad_norm": 5.072369745747379, "learning_rate": 4.070339976553341e-05, "loss": 4.716753959655762, "step": 496, "token_acc": 0.17656106375866967 }, { "epoch": 0.2914101436528877, "grad_norm": 4.90117491775021, "learning_rate": 4.0785463071512305e-05, "loss": 4.752071380615234, "step": 497, "token_acc": 0.17300147353210046 }, { "epoch": 0.29199648197009676, "grad_norm": 5.4957393353858865, "learning_rate": 4.08675263774912e-05, "loss": 4.681450843811035, "step": 498, "token_acc": 0.1782429481868255 }, { "epoch": 0.29258282028730576, "grad_norm": 4.95462142170705, "learning_rate": 4.09495896834701e-05, "loss": 4.659943580627441, "step": 499, "token_acc": 0.18024463667727314 }, { "epoch": 0.2931691586045148, "grad_norm": 5.955133078258152, "learning_rate": 4.1031652989449e-05, "loss": 4.7830939292907715, "step": 500, "token_acc": 0.1678144186512178 }, { "epoch": 0.2937554969217238, "grad_norm": 6.031623149406746, "learning_rate": 4.1113716295427904e-05, "loss": 4.656826972961426, "step": 501, "token_acc": 0.18212388155719533 }, { "epoch": 0.29434183523893287, "grad_norm": 3.9467654836948056, "learning_rate": 4.11957796014068e-05, "loss": 4.635341644287109, "step": 502, "token_acc": 0.18288904750082755 }, { "epoch": 0.29492817355614187, "grad_norm": 5.270988018328669, "learning_rate": 4.127784290738569e-05, "loss": 4.578634262084961, "step": 503, "token_acc": 0.1883495461407736 }, { "epoch": 0.2955145118733509, "grad_norm": 4.955821900675812, "learning_rate": 4.135990621336459e-05, "loss": 4.6846466064453125, "step": 504, "token_acc": 0.17669373603992516 }, { "epoch": 0.2961008501905599, "grad_norm": 4.350351258889933, "learning_rate": 4.144196951934349e-05, "loss": 4.686916351318359, "step": 505, "token_acc": 0.17451957112149413 }, { "epoch": 0.296687188507769, "grad_norm": 6.779972490933298, "learning_rate": 4.152403282532239e-05, "loss": 4.664619445800781, "step": 506, "token_acc": 0.1783146322912501 }, { "epoch": 0.297273526824978, "grad_norm": 2.516193002793484, "learning_rate": 4.1606096131301285e-05, "loss": 4.657482147216797, "step": 507, "token_acc": 0.1795607850559665 }, { "epoch": 0.29785986514218704, "grad_norm": 7.917869431286497, "learning_rate": 4.168815943728018e-05, "loss": 4.618627548217773, "step": 508, "token_acc": 0.18406706493287173 }, { "epoch": 0.2984462034593961, "grad_norm": 5.401828678481543, "learning_rate": 4.177022274325909e-05, "loss": 4.700881004333496, "step": 509, "token_acc": 0.17571589831333073 }, { "epoch": 0.2990325417766051, "grad_norm": 5.612374293805915, "learning_rate": 4.1852286049237985e-05, "loss": 4.602940559387207, "step": 510, "token_acc": 0.1833063605970912 }, { "epoch": 0.29961888009381415, "grad_norm": 4.44346370741346, "learning_rate": 4.1934349355216877e-05, "loss": 4.622890472412109, "step": 511, "token_acc": 0.18342795420628047 }, { "epoch": 0.30020521841102316, "grad_norm": 4.394793460960435, "learning_rate": 4.2016412661195775e-05, "loss": 4.701310157775879, "step": 512, "token_acc": 0.17294349686525656 }, { "epoch": 0.3007915567282322, "grad_norm": 5.12559901698279, "learning_rate": 4.209847596717467e-05, "loss": 4.669729232788086, "step": 513, "token_acc": 0.17563004865451648 }, { "epoch": 0.3013778950454412, "grad_norm": 5.981152478181236, "learning_rate": 4.218053927315357e-05, "loss": 4.613595008850098, "step": 514, "token_acc": 0.18036203514838425 }, { "epoch": 0.30196423336265027, "grad_norm": 5.965352563229646, "learning_rate": 4.226260257913247e-05, "loss": 4.624350070953369, "step": 515, "token_acc": 0.182394285055882 }, { "epoch": 0.30255057167985927, "grad_norm": 5.374282132665988, "learning_rate": 4.2344665885111366e-05, "loss": 4.681499481201172, "step": 516, "token_acc": 0.1765534802978332 }, { "epoch": 0.3031369099970683, "grad_norm": 5.051166520187232, "learning_rate": 4.2426729191090264e-05, "loss": 4.583155632019043, "step": 517, "token_acc": 0.1848408302787974 }, { "epoch": 0.3037232483142773, "grad_norm": 4.354903076724776, "learning_rate": 4.250879249706917e-05, "loss": 4.5662055015563965, "step": 518, "token_acc": 0.18501617616364016 }, { "epoch": 0.3043095866314864, "grad_norm": 7.254814879247495, "learning_rate": 4.259085580304807e-05, "loss": 4.63272762298584, "step": 519, "token_acc": 0.1814322888287 }, { "epoch": 0.3048959249486954, "grad_norm": 2.8344317826714165, "learning_rate": 4.267291910902696e-05, "loss": 4.566251277923584, "step": 520, "token_acc": 0.18252042917412659 }, { "epoch": 0.30548226326590444, "grad_norm": 5.286420691375452, "learning_rate": 4.2754982415005856e-05, "loss": 4.647183895111084, "step": 521, "token_acc": 0.1760656428817472 }, { "epoch": 0.30606860158311344, "grad_norm": 5.141743028250384, "learning_rate": 4.2837045720984754e-05, "loss": 4.534359931945801, "step": 522, "token_acc": 0.18841153549645742 }, { "epoch": 0.3066549399003225, "grad_norm": 6.712464526146822, "learning_rate": 4.291910902696365e-05, "loss": 4.575827598571777, "step": 523, "token_acc": 0.18434154351395732 }, { "epoch": 0.3072412782175315, "grad_norm": 4.5108593646587565, "learning_rate": 4.300117233294255e-05, "loss": 4.6136322021484375, "step": 524, "token_acc": 0.17909760470212055 }, { "epoch": 0.30782761653474056, "grad_norm": 6.074280889169607, "learning_rate": 4.308323563892145e-05, "loss": 4.533474922180176, "step": 525, "token_acc": 0.18856864834808396 }, { "epoch": 0.30841395485194956, "grad_norm": 5.075177379537212, "learning_rate": 4.3165298944900346e-05, "loss": 4.533350944519043, "step": 526, "token_acc": 0.18758292288398976 }, { "epoch": 0.3090002931691586, "grad_norm": 7.125022653072245, "learning_rate": 4.324736225087925e-05, "loss": 4.567195892333984, "step": 527, "token_acc": 0.18469439295092674 }, { "epoch": 0.3095866314863676, "grad_norm": 4.373813287264896, "learning_rate": 4.332942555685815e-05, "loss": 4.537093162536621, "step": 528, "token_acc": 0.18826839480491772 }, { "epoch": 0.31017296980357667, "grad_norm": 6.839596206891016, "learning_rate": 4.341148886283704e-05, "loss": 4.5922441482543945, "step": 529, "token_acc": 0.18332998719777224 }, { "epoch": 0.31075930812078567, "grad_norm": 4.636625620323953, "learning_rate": 4.349355216881594e-05, "loss": 4.512372016906738, "step": 530, "token_acc": 0.189996297527451 }, { "epoch": 0.3113456464379947, "grad_norm": 6.939024922502758, "learning_rate": 4.3575615474794836e-05, "loss": 4.534167289733887, "step": 531, "token_acc": 0.18478207539455394 }, { "epoch": 0.31193198475520373, "grad_norm": 4.9190237347534485, "learning_rate": 4.3657678780773734e-05, "loss": 4.477829933166504, "step": 532, "token_acc": 0.19221272755661292 }, { "epoch": 0.3125183230724128, "grad_norm": 5.385586585954992, "learning_rate": 4.373974208675263e-05, "loss": 4.456300258636475, "step": 533, "token_acc": 0.19495484514091363 }, { "epoch": 0.3131046613896218, "grad_norm": 5.711207057589939, "learning_rate": 4.382180539273153e-05, "loss": 4.460087776184082, "step": 534, "token_acc": 0.1928616727257543 }, { "epoch": 0.31369099970683084, "grad_norm": 5.8989313206787575, "learning_rate": 4.390386869871043e-05, "loss": 4.484177112579346, "step": 535, "token_acc": 0.19025959854577973 }, { "epoch": 0.3142773380240399, "grad_norm": 5.239531672071058, "learning_rate": 4.398593200468933e-05, "loss": 4.5287322998046875, "step": 536, "token_acc": 0.1855750170686579 }, { "epoch": 0.3148636763412489, "grad_norm": 3.5463464554853017, "learning_rate": 4.406799531066823e-05, "loss": 4.522578239440918, "step": 537, "token_acc": 0.18565694349335923 }, { "epoch": 0.31545001465845796, "grad_norm": 6.604086194843378, "learning_rate": 4.415005861664712e-05, "loss": 4.484546661376953, "step": 538, "token_acc": 0.18996912184597098 }, { "epoch": 0.31603635297566696, "grad_norm": 4.745285705276692, "learning_rate": 4.423212192262602e-05, "loss": 4.475639343261719, "step": 539, "token_acc": 0.19128830680128026 }, { "epoch": 0.316622691292876, "grad_norm": 5.8346735401001455, "learning_rate": 4.431418522860492e-05, "loss": 4.5008158683776855, "step": 540, "token_acc": 0.18859267707981203 }, { "epoch": 0.317209029610085, "grad_norm": 6.547702205302202, "learning_rate": 4.4396248534583816e-05, "loss": 4.480155944824219, "step": 541, "token_acc": 0.18974612414742165 }, { "epoch": 0.31779536792729407, "grad_norm": 5.743626658794127, "learning_rate": 4.4478311840562714e-05, "loss": 4.537848472595215, "step": 542, "token_acc": 0.18700443453092222 }, { "epoch": 0.31838170624450307, "grad_norm": 5.314244724847178, "learning_rate": 4.456037514654161e-05, "loss": 4.522566795349121, "step": 543, "token_acc": 0.18432651737298222 }, { "epoch": 0.3189680445617121, "grad_norm": 6.041757818787196, "learning_rate": 4.464243845252052e-05, "loss": 4.5613322257995605, "step": 544, "token_acc": 0.18117134597051096 }, { "epoch": 0.31955438287892113, "grad_norm": 6.632733398775879, "learning_rate": 4.4724501758499415e-05, "loss": 4.425638198852539, "step": 545, "token_acc": 0.1966081197365259 }, { "epoch": 0.3201407211961302, "grad_norm": 4.355380606979025, "learning_rate": 4.480656506447831e-05, "loss": 4.463742256164551, "step": 546, "token_acc": 0.18941737298914316 }, { "epoch": 0.3207270595133392, "grad_norm": 7.659555627912166, "learning_rate": 4.4888628370457204e-05, "loss": 4.510584831237793, "step": 547, "token_acc": 0.18727835778965388 }, { "epoch": 0.32131339783054824, "grad_norm": 5.773509503241845, "learning_rate": 4.49706916764361e-05, "loss": 4.535423278808594, "step": 548, "token_acc": 0.1858875768036705 }, { "epoch": 0.32189973614775724, "grad_norm": 8.836138949870497, "learning_rate": 4.5052754982415e-05, "loss": 4.49858283996582, "step": 549, "token_acc": 0.18657096585336183 }, { "epoch": 0.3224860744649663, "grad_norm": 5.27196626205651, "learning_rate": 4.51348182883939e-05, "loss": 4.419318199157715, "step": 550, "token_acc": 0.1946249987204815 }, { "epoch": 0.3230724127821753, "grad_norm": 8.282159121276031, "learning_rate": 4.5216881594372796e-05, "loss": 4.496129035949707, "step": 551, "token_acc": 0.18838829197948492 }, { "epoch": 0.32365875109938436, "grad_norm": 5.993117978987693, "learning_rate": 4.5298944900351694e-05, "loss": 4.461013317108154, "step": 552, "token_acc": 0.19080874737371645 }, { "epoch": 0.32424508941659336, "grad_norm": 3.948394316134522, "learning_rate": 4.53810082063306e-05, "loss": 4.479466438293457, "step": 553, "token_acc": 0.18935493018214392 }, { "epoch": 0.3248314277338024, "grad_norm": 7.689630909084427, "learning_rate": 4.54630715123095e-05, "loss": 4.415638446807861, "step": 554, "token_acc": 0.19777227514533213 }, { "epoch": 0.3254177660510114, "grad_norm": 7.320774738127758, "learning_rate": 4.5545134818288395e-05, "loss": 4.428081512451172, "step": 555, "token_acc": 0.1960862680807083 }, { "epoch": 0.32600410436822047, "grad_norm": 5.862654536263259, "learning_rate": 4.5627198124267286e-05, "loss": 4.470279693603516, "step": 556, "token_acc": 0.19028585239148738 }, { "epoch": 0.32659044268542947, "grad_norm": 5.105823546914807, "learning_rate": 4.5709261430246184e-05, "loss": 4.5024919509887695, "step": 557, "token_acc": 0.18545226403110623 }, { "epoch": 0.32717678100263853, "grad_norm": 6.028710401444009, "learning_rate": 4.579132473622508e-05, "loss": 4.416274070739746, "step": 558, "token_acc": 0.19277732950657256 }, { "epoch": 0.32776311931984753, "grad_norm": 6.3098463850094895, "learning_rate": 4.587338804220398e-05, "loss": 4.432454586029053, "step": 559, "token_acc": 0.19296357743454878 }, { "epoch": 0.3283494576370566, "grad_norm": 7.7506743787126995, "learning_rate": 4.595545134818288e-05, "loss": 4.461402416229248, "step": 560, "token_acc": 0.1898100580759658 }, { "epoch": 0.3289357959542656, "grad_norm": 6.557894074424091, "learning_rate": 4.6037514654161776e-05, "loss": 4.4386186599731445, "step": 561, "token_acc": 0.19209386902311817 }, { "epoch": 0.32952213427147464, "grad_norm": 4.1822001242563545, "learning_rate": 4.611957796014068e-05, "loss": 4.367791175842285, "step": 562, "token_acc": 0.1983296095052554 }, { "epoch": 0.33010847258868364, "grad_norm": 8.359275481722483, "learning_rate": 4.620164126611958e-05, "loss": 4.379465579986572, "step": 563, "token_acc": 0.20002784036130603 }, { "epoch": 0.3306948109058927, "grad_norm": 4.150650938335347, "learning_rate": 4.6283704572098477e-05, "loss": 4.4019670486450195, "step": 564, "token_acc": 0.19714070930455738 }, { "epoch": 0.33128114922310176, "grad_norm": 9.504457061160851, "learning_rate": 4.636576787807737e-05, "loss": 4.430449962615967, "step": 565, "token_acc": 0.19324560284998296 }, { "epoch": 0.33186748754031076, "grad_norm": 5.658739697814227, "learning_rate": 4.6447831184056266e-05, "loss": 4.451925754547119, "step": 566, "token_acc": 0.18910567831583347 }, { "epoch": 0.3324538258575198, "grad_norm": 8.887793535710317, "learning_rate": 4.6529894490035164e-05, "loss": 4.482825756072998, "step": 567, "token_acc": 0.18872363186921615 }, { "epoch": 0.3330401641747288, "grad_norm": 5.937473436789054, "learning_rate": 4.661195779601406e-05, "loss": 4.4818830490112305, "step": 568, "token_acc": 0.18827303548086388 }, { "epoch": 0.33362650249193787, "grad_norm": 6.223251974775681, "learning_rate": 4.669402110199296e-05, "loss": 4.42918586730957, "step": 569, "token_acc": 0.1919210409897335 }, { "epoch": 0.33421284080914687, "grad_norm": 7.002097879881412, "learning_rate": 4.6776084407971864e-05, "loss": 4.486233711242676, "step": 570, "token_acc": 0.18689357666161005 }, { "epoch": 0.33479917912635593, "grad_norm": 5.47675686619503, "learning_rate": 4.685814771395076e-05, "loss": 4.3734259605407715, "step": 571, "token_acc": 0.19837908801202714 }, { "epoch": 0.33538551744356493, "grad_norm": 6.0266905736728456, "learning_rate": 4.694021101992966e-05, "loss": 4.396583557128906, "step": 572, "token_acc": 0.19571154613434238 }, { "epoch": 0.335971855760774, "grad_norm": 4.658813691796653, "learning_rate": 4.702227432590856e-05, "loss": 4.367709159851074, "step": 573, "token_acc": 0.19878226157488507 }, { "epoch": 0.336558194077983, "grad_norm": 9.395882672486985, "learning_rate": 4.710433763188745e-05, "loss": 4.435266494750977, "step": 574, "token_acc": 0.190687181739285 }, { "epoch": 0.33714453239519204, "grad_norm": 3.4531224434636214, "learning_rate": 4.718640093786635e-05, "loss": 4.372501850128174, "step": 575, "token_acc": 0.19843487692040238 }, { "epoch": 0.33773087071240104, "grad_norm": 8.846575320080282, "learning_rate": 4.7268464243845246e-05, "loss": 4.401895523071289, "step": 576, "token_acc": 0.194584976189257 }, { "epoch": 0.3383172090296101, "grad_norm": 6.34762391674699, "learning_rate": 4.7350527549824144e-05, "loss": 4.460208892822266, "step": 577, "token_acc": 0.19122330644088487 }, { "epoch": 0.3389035473468191, "grad_norm": 4.431158013832074, "learning_rate": 4.743259085580304e-05, "loss": 4.46501350402832, "step": 578, "token_acc": 0.18966632660678295 }, { "epoch": 0.33948988566402816, "grad_norm": 4.882452446056982, "learning_rate": 4.7514654161781946e-05, "loss": 4.391036033630371, "step": 579, "token_acc": 0.19597744853808838 }, { "epoch": 0.34007622398123716, "grad_norm": 7.674952597614779, "learning_rate": 4.7596717467760844e-05, "loss": 4.347477436065674, "step": 580, "token_acc": 0.199366841142713 }, { "epoch": 0.3406625622984462, "grad_norm": 5.030967460889975, "learning_rate": 4.767878077373974e-05, "loss": 4.418288707733154, "step": 581, "token_acc": 0.1916214551902067 }, { "epoch": 0.3412489006156552, "grad_norm": 8.896203828070119, "learning_rate": 4.7760844079718633e-05, "loss": 4.416926383972168, "step": 582, "token_acc": 0.19323235494166274 }, { "epoch": 0.34183523893286427, "grad_norm": 4.489306072034783, "learning_rate": 4.784290738569753e-05, "loss": 4.426893711090088, "step": 583, "token_acc": 0.19094187577937521 }, { "epoch": 0.3424215772500733, "grad_norm": 9.204048802736533, "learning_rate": 4.792497069167643e-05, "loss": 4.451262474060059, "step": 584, "token_acc": 0.19010222448275044 }, { "epoch": 0.34300791556728233, "grad_norm": 6.859814676939098, "learning_rate": 4.800703399765533e-05, "loss": 4.46299409866333, "step": 585, "token_acc": 0.1906721463748978 }, { "epoch": 0.34359425388449133, "grad_norm": 7.533827152115777, "learning_rate": 4.8089097303634225e-05, "loss": 4.417018890380859, "step": 586, "token_acc": 0.19287514915878398 }, { "epoch": 0.3441805922017004, "grad_norm": 8.172277786659507, "learning_rate": 4.817116060961312e-05, "loss": 4.394757270812988, "step": 587, "token_acc": 0.194650138726811 }, { "epoch": 0.3447669305189094, "grad_norm": 6.956038937526932, "learning_rate": 4.825322391559203e-05, "loss": 4.359362602233887, "step": 588, "token_acc": 0.19735680202209507 }, { "epoch": 0.34535326883611844, "grad_norm": 5.1733747985879965, "learning_rate": 4.8335287221570926e-05, "loss": 4.36881160736084, "step": 589, "token_acc": 0.19789089071170593 }, { "epoch": 0.34593960715332744, "grad_norm": 5.18686725818335, "learning_rate": 4.8417350527549824e-05, "loss": 4.352592945098877, "step": 590, "token_acc": 0.19756552140773018 }, { "epoch": 0.3465259454705365, "grad_norm": 4.241528137874688, "learning_rate": 4.8499413833528715e-05, "loss": 4.409492492675781, "step": 591, "token_acc": 0.19087769867423396 }, { "epoch": 0.34711228378774556, "grad_norm": 7.187543154768812, "learning_rate": 4.858147713950761e-05, "loss": 4.281979084014893, "step": 592, "token_acc": 0.20348683013078864 }, { "epoch": 0.34769862210495456, "grad_norm": 6.731449993214299, "learning_rate": 4.866354044548651e-05, "loss": 4.329821586608887, "step": 593, "token_acc": 0.19997488869939156 }, { "epoch": 0.3482849604221636, "grad_norm": 8.403755192686713, "learning_rate": 4.874560375146541e-05, "loss": 4.358314514160156, "step": 594, "token_acc": 0.19530622213118629 }, { "epoch": 0.3488712987393726, "grad_norm": 4.528393967952365, "learning_rate": 4.882766705744431e-05, "loss": 4.301444053649902, "step": 595, "token_acc": 0.2015556768558952 }, { "epoch": 0.34945763705658167, "grad_norm": 6.966055045560965, "learning_rate": 4.8909730363423205e-05, "loss": 4.321412563323975, "step": 596, "token_acc": 0.20148697102244575 }, { "epoch": 0.3500439753737907, "grad_norm": 6.881316858375219, "learning_rate": 4.899179366940211e-05, "loss": 4.366338729858398, "step": 597, "token_acc": 0.19566463460364056 }, { "epoch": 0.35063031369099973, "grad_norm": 6.83300977581268, "learning_rate": 4.907385697538101e-05, "loss": 4.357544898986816, "step": 598, "token_acc": 0.19502726805186804 }, { "epoch": 0.35121665200820873, "grad_norm": 5.489291699937008, "learning_rate": 4.9155920281359906e-05, "loss": 4.353487968444824, "step": 599, "token_acc": 0.1956040154089252 }, { "epoch": 0.3518029903254178, "grad_norm": 5.748467794890245, "learning_rate": 4.92379835873388e-05, "loss": 4.32010555267334, "step": 600, "token_acc": 0.2004268590420086 }, { "epoch": 0.3523893286426268, "grad_norm": 6.335321485462031, "learning_rate": 4.9320046893317695e-05, "loss": 4.313159942626953, "step": 601, "token_acc": 0.19927321984448468 }, { "epoch": 0.35297566695983584, "grad_norm": 3.7368206365732513, "learning_rate": 4.940211019929659e-05, "loss": 4.370244026184082, "step": 602, "token_acc": 0.1923649152165671 }, { "epoch": 0.35356200527704484, "grad_norm": 10.44467872139782, "learning_rate": 4.948417350527549e-05, "loss": 4.313765525817871, "step": 603, "token_acc": 0.2034349048115739 }, { "epoch": 0.3541483435942539, "grad_norm": 5.3176314880898525, "learning_rate": 4.956623681125439e-05, "loss": 4.321287155151367, "step": 604, "token_acc": 0.1991299567368597 }, { "epoch": 0.3547346819114629, "grad_norm": 6.233879510796788, "learning_rate": 4.9648300117233294e-05, "loss": 4.298736572265625, "step": 605, "token_acc": 0.2031674786067473 }, { "epoch": 0.35532102022867196, "grad_norm": 7.207066786392458, "learning_rate": 4.973036342321219e-05, "loss": 4.318865776062012, "step": 606, "token_acc": 0.2011544449929746 }, { "epoch": 0.35590735854588096, "grad_norm": 4.997552499651344, "learning_rate": 4.981242672919109e-05, "loss": 4.333331108093262, "step": 607, "token_acc": 0.20019160321000282 }, { "epoch": 0.35649369686309, "grad_norm": 6.810892953065391, "learning_rate": 4.989449003516999e-05, "loss": 4.368122577667236, "step": 608, "token_acc": 0.19231795850302202 }, { "epoch": 0.357080035180299, "grad_norm": 5.789893708176766, "learning_rate": 4.997655334114888e-05, "loss": 4.321147441864014, "step": 609, "token_acc": 0.20054164323664614 }, { "epoch": 0.35766637349750807, "grad_norm": 6.566545651045261, "learning_rate": 5.005861664712778e-05, "loss": 4.36564826965332, "step": 610, "token_acc": 0.19370404294186577 }, { "epoch": 0.3582527118147171, "grad_norm": 7.439063988575006, "learning_rate": 5.0140679953106675e-05, "loss": 4.338534832000732, "step": 611, "token_acc": 0.19844616204690832 }, { "epoch": 0.35883905013192613, "grad_norm": 6.6669879988847836, "learning_rate": 5.022274325908557e-05, "loss": 4.325567722320557, "step": 612, "token_acc": 0.20045966886629357 }, { "epoch": 0.35942538844913513, "grad_norm": 6.687994302475722, "learning_rate": 5.030480656506447e-05, "loss": 4.332146644592285, "step": 613, "token_acc": 0.20098085616412073 }, { "epoch": 0.3600117267663442, "grad_norm": 4.4555993465093575, "learning_rate": 5.0386869871043376e-05, "loss": 4.381261348724365, "step": 614, "token_acc": 0.19281257281930456 }, { "epoch": 0.3605980650835532, "grad_norm": 6.884530804038673, "learning_rate": 5.0468933177022274e-05, "loss": 4.287456035614014, "step": 615, "token_acc": 0.20258283154667886 }, { "epoch": 0.36118440340076224, "grad_norm": 3.9426626333463277, "learning_rate": 5.055099648300117e-05, "loss": 4.228626728057861, "step": 616, "token_acc": 0.2080618053673082 }, { "epoch": 0.36177074171797124, "grad_norm": 8.468973017835788, "learning_rate": 5.063305978898007e-05, "loss": 4.2621941566467285, "step": 617, "token_acc": 0.2061359786583569 }, { "epoch": 0.3623570800351803, "grad_norm": 4.7226266501157035, "learning_rate": 5.071512309495896e-05, "loss": 4.298932075500488, "step": 618, "token_acc": 0.2024293547476061 }, { "epoch": 0.3629434183523893, "grad_norm": 10.247823179573718, "learning_rate": 5.079718640093786e-05, "loss": 4.3769917488098145, "step": 619, "token_acc": 0.19332201201963772 }, { "epoch": 0.36352975666959836, "grad_norm": 7.731050579633973, "learning_rate": 5.087924970691676e-05, "loss": 4.340371131896973, "step": 620, "token_acc": 0.19514768754954323 }, { "epoch": 0.3641160949868074, "grad_norm": 4.962868099430327, "learning_rate": 5.0961313012895655e-05, "loss": 4.297729969024658, "step": 621, "token_acc": 0.20349455427657048 }, { "epoch": 0.3647024333040164, "grad_norm": 10.806352539050675, "learning_rate": 5.104337631887455e-05, "loss": 4.274240016937256, "step": 622, "token_acc": 0.20278523034523077 }, { "epoch": 0.36528877162122547, "grad_norm": 6.19575035906615, "learning_rate": 5.112543962485346e-05, "loss": 4.358561038970947, "step": 623, "token_acc": 0.19831221376490557 }, { "epoch": 0.3658751099384345, "grad_norm": 11.352687956646522, "learning_rate": 5.1207502930832356e-05, "loss": 4.337742805480957, "step": 624, "token_acc": 0.19651950879442884 }, { "epoch": 0.36646144825564353, "grad_norm": 7.693508242233182, "learning_rate": 5.1289566236811254e-05, "loss": 4.280763626098633, "step": 625, "token_acc": 0.20407976138465936 }, { "epoch": 0.36704778657285253, "grad_norm": 9.064200943271722, "learning_rate": 5.137162954279015e-05, "loss": 4.325865745544434, "step": 626, "token_acc": 0.19913288080717417 }, { "epoch": 0.3676341248900616, "grad_norm": 7.5082242443432365, "learning_rate": 5.145369284876904e-05, "loss": 4.3048906326293945, "step": 627, "token_acc": 0.20137878828841008 }, { "epoch": 0.3682204632072706, "grad_norm": 5.117586923589491, "learning_rate": 5.153575615474794e-05, "loss": 4.274219512939453, "step": 628, "token_acc": 0.20411066283684523 }, { "epoch": 0.36880680152447964, "grad_norm": 6.3038731305539, "learning_rate": 5.161781946072684e-05, "loss": 4.3289899826049805, "step": 629, "token_acc": 0.19631616427817716 }, { "epoch": 0.36939313984168864, "grad_norm": 7.05866879671453, "learning_rate": 5.169988276670574e-05, "loss": 4.268229961395264, "step": 630, "token_acc": 0.20327354622866284 }, { "epoch": 0.3699794781588977, "grad_norm": 5.071653610171233, "learning_rate": 5.1781946072684635e-05, "loss": 4.28590202331543, "step": 631, "token_acc": 0.20130190173080048 }, { "epoch": 0.3705658164761067, "grad_norm": 8.016254781251437, "learning_rate": 5.186400937866354e-05, "loss": 4.256338596343994, "step": 632, "token_acc": 0.20127530091078086 }, { "epoch": 0.37115215479331576, "grad_norm": 5.064988963622655, "learning_rate": 5.194607268464244e-05, "loss": 4.262134075164795, "step": 633, "token_acc": 0.20192980295889149 }, { "epoch": 0.37173849311052476, "grad_norm": 6.070005089696539, "learning_rate": 5.2028135990621335e-05, "loss": 4.247917175292969, "step": 634, "token_acc": 0.20496268898838493 }, { "epoch": 0.3723248314277338, "grad_norm": 7.035437886556663, "learning_rate": 5.211019929660023e-05, "loss": 4.255850791931152, "step": 635, "token_acc": 0.20356832027850305 }, { "epoch": 0.3729111697449428, "grad_norm": 6.117971571153342, "learning_rate": 5.2192262602579125e-05, "loss": 4.260165214538574, "step": 636, "token_acc": 0.2047510152180155 }, { "epoch": 0.3734975080621519, "grad_norm": 12.060227109146973, "learning_rate": 5.227432590855802e-05, "loss": 4.253174781799316, "step": 637, "token_acc": 0.20350842740904004 }, { "epoch": 0.3740838463793609, "grad_norm": 3.949708660184968, "learning_rate": 5.235638921453692e-05, "loss": 4.26906156539917, "step": 638, "token_acc": 0.20094818658619423 }, { "epoch": 0.37467018469656993, "grad_norm": 8.524892019361486, "learning_rate": 5.243845252051582e-05, "loss": 4.257997989654541, "step": 639, "token_acc": 0.20150783862929297 }, { "epoch": 0.37525652301377893, "grad_norm": 7.325621817381045, "learning_rate": 5.252051582649472e-05, "loss": 4.324463367462158, "step": 640, "token_acc": 0.1951068522124921 }, { "epoch": 0.375842861330988, "grad_norm": 6.6332324928320485, "learning_rate": 5.260257913247362e-05, "loss": 4.255081653594971, "step": 641, "token_acc": 0.20332773542111807 }, { "epoch": 0.376429199648197, "grad_norm": 7.372021097396333, "learning_rate": 5.268464243845252e-05, "loss": 4.265812397003174, "step": 642, "token_acc": 0.20216091155635685 }, { "epoch": 0.37701553796540604, "grad_norm": 4.968326969691676, "learning_rate": 5.276670574443142e-05, "loss": 4.176780700683594, "step": 643, "token_acc": 0.21011885574053468 }, { "epoch": 0.37760187628261505, "grad_norm": 5.468141668006452, "learning_rate": 5.2848769050410315e-05, "loss": 4.208514213562012, "step": 644, "token_acc": 0.2071975231073194 }, { "epoch": 0.3781882145998241, "grad_norm": 5.699032656341187, "learning_rate": 5.2930832356389206e-05, "loss": 4.256397247314453, "step": 645, "token_acc": 0.20213619130941965 }, { "epoch": 0.3787745529170331, "grad_norm": 7.695058660013026, "learning_rate": 5.3012895662368104e-05, "loss": 4.333415508270264, "step": 646, "token_acc": 0.19477001440625644 }, { "epoch": 0.37936089123424216, "grad_norm": 5.342643623289217, "learning_rate": 5.3094958968347e-05, "loss": 4.234793663024902, "step": 647, "token_acc": 0.20469448023915765 }, { "epoch": 0.37994722955145116, "grad_norm": 9.50816321744696, "learning_rate": 5.31770222743259e-05, "loss": 4.23974609375, "step": 648, "token_acc": 0.20590203865223747 }, { "epoch": 0.3805335678686602, "grad_norm": 5.2672766197213825, "learning_rate": 5.3259085580304805e-05, "loss": 4.329718589782715, "step": 649, "token_acc": 0.19572213440159827 }, { "epoch": 0.3811199061858693, "grad_norm": 9.291106055736, "learning_rate": 5.33411488862837e-05, "loss": 4.272111415863037, "step": 650, "token_acc": 0.19994436259105475 }, { "epoch": 0.3817062445030783, "grad_norm": 6.0741692853216955, "learning_rate": 5.34232121922626e-05, "loss": 4.19622802734375, "step": 651, "token_acc": 0.20987739410787173 }, { "epoch": 0.38229258282028733, "grad_norm": 5.533836567441832, "learning_rate": 5.35052754982415e-05, "loss": 4.302114486694336, "step": 652, "token_acc": 0.19659415260826232 }, { "epoch": 0.38287892113749633, "grad_norm": 8.553847965407476, "learning_rate": 5.35873388042204e-05, "loss": 4.287426471710205, "step": 653, "token_acc": 0.1989726405714917 }, { "epoch": 0.3834652594547054, "grad_norm": 4.4105232895566955, "learning_rate": 5.366940211019929e-05, "loss": 4.216601848602295, "step": 654, "token_acc": 0.20663760948393198 }, { "epoch": 0.3840515977719144, "grad_norm": 9.113729704927863, "learning_rate": 5.3751465416178186e-05, "loss": 4.238519191741943, "step": 655, "token_acc": 0.2031450603686175 }, { "epoch": 0.38463793608912344, "grad_norm": 5.348400808975778, "learning_rate": 5.3833528722157084e-05, "loss": 4.238986968994141, "step": 656, "token_acc": 0.20360236938589654 }, { "epoch": 0.38522427440633245, "grad_norm": 6.764859900160404, "learning_rate": 5.391559202813598e-05, "loss": 4.250058174133301, "step": 657, "token_acc": 0.2034909072254854 }, { "epoch": 0.3858106127235415, "grad_norm": 4.256537835605083, "learning_rate": 5.399765533411489e-05, "loss": 4.2425761222839355, "step": 658, "token_acc": 0.20315329264004284 }, { "epoch": 0.3863969510407505, "grad_norm": 7.047299068881363, "learning_rate": 5.4079718640093785e-05, "loss": 4.261916160583496, "step": 659, "token_acc": 0.20068792978959793 }, { "epoch": 0.38698328935795956, "grad_norm": 4.769386620516134, "learning_rate": 5.416178194607268e-05, "loss": 4.211328506469727, "step": 660, "token_acc": 0.20559451571854195 }, { "epoch": 0.38756962767516856, "grad_norm": 6.098845066360906, "learning_rate": 5.424384525205158e-05, "loss": 4.264926910400391, "step": 661, "token_acc": 0.20190850527195128 }, { "epoch": 0.3881559659923776, "grad_norm": 4.837044068288691, "learning_rate": 5.432590855803047e-05, "loss": 4.256516933441162, "step": 662, "token_acc": 0.20202830876988181 }, { "epoch": 0.3887423043095866, "grad_norm": 6.70982110798902, "learning_rate": 5.440797186400937e-05, "loss": 4.154748916625977, "step": 663, "token_acc": 0.21118555815459436 }, { "epoch": 0.3893286426267957, "grad_norm": 7.461742114936183, "learning_rate": 5.449003516998827e-05, "loss": 4.203394889831543, "step": 664, "token_acc": 0.20551437753333893 }, { "epoch": 0.3899149809440047, "grad_norm": 4.652825065191578, "learning_rate": 5.4572098475967166e-05, "loss": 4.2032647132873535, "step": 665, "token_acc": 0.20763021202619733 }, { "epoch": 0.39050131926121373, "grad_norm": 8.540272248397173, "learning_rate": 5.465416178194607e-05, "loss": 4.235311508178711, "step": 666, "token_acc": 0.20420387010641627 }, { "epoch": 0.39108765757842273, "grad_norm": 4.2331679056094265, "learning_rate": 5.473622508792497e-05, "loss": 4.260921478271484, "step": 667, "token_acc": 0.20075133197919878 }, { "epoch": 0.3916739958956318, "grad_norm": 6.576679074719142, "learning_rate": 5.481828839390387e-05, "loss": 4.147568702697754, "step": 668, "token_acc": 0.21375431565410727 }, { "epoch": 0.3922603342128408, "grad_norm": 4.80209191626738, "learning_rate": 5.4900351699882765e-05, "loss": 4.245433807373047, "step": 669, "token_acc": 0.20293622955242924 }, { "epoch": 0.39284667253004985, "grad_norm": 7.736238777858105, "learning_rate": 5.498241500586166e-05, "loss": 4.2774505615234375, "step": 670, "token_acc": 0.19808356935840904 }, { "epoch": 0.39343301084725885, "grad_norm": 8.579043790049862, "learning_rate": 5.5064478311840554e-05, "loss": 4.195842266082764, "step": 671, "token_acc": 0.20660908415556695 }, { "epoch": 0.3940193491644679, "grad_norm": 2.705451206256234, "learning_rate": 5.514654161781945e-05, "loss": 4.185248374938965, "step": 672, "token_acc": 0.20905620646553288 }, { "epoch": 0.3946056874816769, "grad_norm": 11.431402440916541, "learning_rate": 5.522860492379835e-05, "loss": 4.206908226013184, "step": 673, "token_acc": 0.20610974220385236 }, { "epoch": 0.39519202579888596, "grad_norm": 5.285335074542644, "learning_rate": 5.531066822977725e-05, "loss": 4.256257057189941, "step": 674, "token_acc": 0.20515903548467027 }, { "epoch": 0.39577836411609496, "grad_norm": 8.987733859903123, "learning_rate": 5.539273153575615e-05, "loss": 4.28242301940918, "step": 675, "token_acc": 0.20150640555637023 }, { "epoch": 0.396364702433304, "grad_norm": 6.249525903352653, "learning_rate": 5.547479484173505e-05, "loss": 4.207487106323242, "step": 676, "token_acc": 0.20964671472919927 }, { "epoch": 0.3969510407505131, "grad_norm": 5.0884343977261555, "learning_rate": 5.555685814771395e-05, "loss": 4.203760147094727, "step": 677, "token_acc": 0.20659704165577983 }, { "epoch": 0.3975373790677221, "grad_norm": 6.560956502373544, "learning_rate": 5.563892145369285e-05, "loss": 4.175498962402344, "step": 678, "token_acc": 0.2076709570113264 }, { "epoch": 0.39812371738493113, "grad_norm": 3.21155361480233, "learning_rate": 5.5720984759671745e-05, "loss": 4.247404098510742, "step": 679, "token_acc": 0.2023355250410138 }, { "epoch": 0.39871005570214013, "grad_norm": 6.000142226397825, "learning_rate": 5.5803048065650636e-05, "loss": 4.17470645904541, "step": 680, "token_acc": 0.2079712739351207 }, { "epoch": 0.3992963940193492, "grad_norm": 5.163044478363358, "learning_rate": 5.5885111371629534e-05, "loss": 4.334084987640381, "step": 681, "token_acc": 0.19304747956385798 }, { "epoch": 0.3998827323365582, "grad_norm": 5.775827465594905, "learning_rate": 5.596717467760843e-05, "loss": 4.2259931564331055, "step": 682, "token_acc": 0.20339967663744066 }, { "epoch": 0.40046907065376725, "grad_norm": 5.469309809611569, "learning_rate": 5.604923798358733e-05, "loss": 4.236342906951904, "step": 683, "token_acc": 0.20294456159865285 }, { "epoch": 0.40105540897097625, "grad_norm": 6.1674367504282115, "learning_rate": 5.6131301289566235e-05, "loss": 4.219335079193115, "step": 684, "token_acc": 0.20542775465816773 }, { "epoch": 0.4016417472881853, "grad_norm": 4.261188541523941, "learning_rate": 5.621336459554513e-05, "loss": 4.162544250488281, "step": 685, "token_acc": 0.21062752469296167 }, { "epoch": 0.4022280856053943, "grad_norm": 6.12411599441717, "learning_rate": 5.629542790152403e-05, "loss": 4.204689025878906, "step": 686, "token_acc": 0.2051478187277743 }, { "epoch": 0.40281442392260336, "grad_norm": 3.808443114456628, "learning_rate": 5.637749120750293e-05, "loss": 4.190600395202637, "step": 687, "token_acc": 0.20551752781527305 }, { "epoch": 0.40340076223981236, "grad_norm": 6.082695933506258, "learning_rate": 5.6459554513481827e-05, "loss": 4.199838638305664, "step": 688, "token_acc": 0.20602594579908987 }, { "epoch": 0.4039871005570214, "grad_norm": 5.498151956905652, "learning_rate": 5.654161781946072e-05, "loss": 4.201581954956055, "step": 689, "token_acc": 0.20699494128131288 }, { "epoch": 0.4045734388742304, "grad_norm": 5.769371998160246, "learning_rate": 5.6623681125439616e-05, "loss": 4.167081832885742, "step": 690, "token_acc": 0.20919080611007174 }, { "epoch": 0.4051597771914395, "grad_norm": 4.8127302195745525, "learning_rate": 5.6705744431418514e-05, "loss": 4.205621719360352, "step": 691, "token_acc": 0.205567081604426 }, { "epoch": 0.4057461155086485, "grad_norm": 4.711062674769272, "learning_rate": 5.678780773739741e-05, "loss": 4.227962017059326, "step": 692, "token_acc": 0.2018501459617256 }, { "epoch": 0.40633245382585753, "grad_norm": 6.382862690871602, "learning_rate": 5.6869871043376316e-05, "loss": 4.214358329772949, "step": 693, "token_acc": 0.20356120810979939 }, { "epoch": 0.40691879214306653, "grad_norm": 3.7484614915595973, "learning_rate": 5.6951934349355214e-05, "loss": 4.232317924499512, "step": 694, "token_acc": 0.2041442127991581 }, { "epoch": 0.4075051304602756, "grad_norm": 5.007036338560286, "learning_rate": 5.703399765533411e-05, "loss": 4.104582786560059, "step": 695, "token_acc": 0.21457855845784782 }, { "epoch": 0.4080914687774846, "grad_norm": 5.231535436627986, "learning_rate": 5.711606096131301e-05, "loss": 4.158563613891602, "step": 696, "token_acc": 0.2091188954339574 }, { "epoch": 0.40867780709469365, "grad_norm": 6.681352838638222, "learning_rate": 5.719812426729191e-05, "loss": 4.135995864868164, "step": 697, "token_acc": 0.21215096186999424 }, { "epoch": 0.40926414541190265, "grad_norm": 4.323274110154445, "learning_rate": 5.72801875732708e-05, "loss": 4.179586887359619, "step": 698, "token_acc": 0.20552419570244235 }, { "epoch": 0.4098504837291117, "grad_norm": 6.85420066927111, "learning_rate": 5.73622508792497e-05, "loss": 4.090293884277344, "step": 699, "token_acc": 0.21603218320659723 }, { "epoch": 0.4104368220463207, "grad_norm": 3.4930822274653766, "learning_rate": 5.7444314185228596e-05, "loss": 4.1796956062316895, "step": 700, "token_acc": 0.20468234788397927 }, { "epoch": 0.41102316036352976, "grad_norm": 8.955607941594574, "learning_rate": 5.75263774912075e-05, "loss": 4.1959004402160645, "step": 701, "token_acc": 0.20585980010592858 }, { "epoch": 0.41160949868073876, "grad_norm": 5.488655143025368, "learning_rate": 5.76084407971864e-05, "loss": 4.216902256011963, "step": 702, "token_acc": 0.20418105061764338 }, { "epoch": 0.4121958369979478, "grad_norm": 6.5665338828052775, "learning_rate": 5.7690504103165296e-05, "loss": 4.160950660705566, "step": 703, "token_acc": 0.20898828272647096 }, { "epoch": 0.4127821753151568, "grad_norm": 4.827311090143434, "learning_rate": 5.7772567409144194e-05, "loss": 4.191999912261963, "step": 704, "token_acc": 0.2044706303762662 }, { "epoch": 0.4133685136323659, "grad_norm": 5.340756247730568, "learning_rate": 5.785463071512309e-05, "loss": 4.205822944641113, "step": 705, "token_acc": 0.20248091304279758 }, { "epoch": 0.41395485194957493, "grad_norm": 6.400983175884303, "learning_rate": 5.793669402110199e-05, "loss": 4.137466907501221, "step": 706, "token_acc": 0.2097769155150084 }, { "epoch": 0.41454119026678393, "grad_norm": 6.701433086702472, "learning_rate": 5.801875732708088e-05, "loss": 4.122692584991455, "step": 707, "token_acc": 0.21178068848845258 }, { "epoch": 0.415127528583993, "grad_norm": 3.724613052876181, "learning_rate": 5.810082063305978e-05, "loss": 4.093698501586914, "step": 708, "token_acc": 0.21447327281832024 }, { "epoch": 0.415713866901202, "grad_norm": 7.013574370295882, "learning_rate": 5.818288393903868e-05, "loss": 4.14458703994751, "step": 709, "token_acc": 0.2128691778309926 }, { "epoch": 0.41630020521841105, "grad_norm": 4.433594166391518, "learning_rate": 5.826494724501758e-05, "loss": 4.145415306091309, "step": 710, "token_acc": 0.2111799756552615 }, { "epoch": 0.41688654353562005, "grad_norm": 8.562807618790616, "learning_rate": 5.834701055099648e-05, "loss": 4.145606994628906, "step": 711, "token_acc": 0.20886160618505178 }, { "epoch": 0.4174728818528291, "grad_norm": 4.71640556750639, "learning_rate": 5.842907385697538e-05, "loss": 4.19354248046875, "step": 712, "token_acc": 0.2035779857954017 }, { "epoch": 0.4180592201700381, "grad_norm": 5.065617495900307, "learning_rate": 5.8511137162954276e-05, "loss": 4.19216251373291, "step": 713, "token_acc": 0.2055668735366375 }, { "epoch": 0.41864555848724716, "grad_norm": 5.549692737992384, "learning_rate": 5.8593200468933174e-05, "loss": 4.141023635864258, "step": 714, "token_acc": 0.2081619019753553 }, { "epoch": 0.41923189680445616, "grad_norm": 5.114101547372383, "learning_rate": 5.867526377491207e-05, "loss": 4.121971130371094, "step": 715, "token_acc": 0.2113343052803417 }, { "epoch": 0.4198182351216652, "grad_norm": 5.579266668043171, "learning_rate": 5.875732708089096e-05, "loss": 4.0913543701171875, "step": 716, "token_acc": 0.21559033885629417 }, { "epoch": 0.4204045734388742, "grad_norm": 4.694940547126315, "learning_rate": 5.883939038686986e-05, "loss": 4.136467933654785, "step": 717, "token_acc": 0.2098276066076417 }, { "epoch": 0.4209909117560833, "grad_norm": 6.340555808406843, "learning_rate": 5.892145369284876e-05, "loss": 4.15889835357666, "step": 718, "token_acc": 0.20600056624092758 }, { "epoch": 0.4215772500732923, "grad_norm": 5.213191570752816, "learning_rate": 5.9003516998827664e-05, "loss": 4.099459648132324, "step": 719, "token_acc": 0.2142941638823681 }, { "epoch": 0.42216358839050133, "grad_norm": 6.278967182497455, "learning_rate": 5.908558030480656e-05, "loss": 4.174129486083984, "step": 720, "token_acc": 0.20631060544328553 }, { "epoch": 0.42274992670771033, "grad_norm": 3.709784921330175, "learning_rate": 5.916764361078546e-05, "loss": 4.042458534240723, "step": 721, "token_acc": 0.21835256529561387 }, { "epoch": 0.4233362650249194, "grad_norm": 5.586953604909075, "learning_rate": 5.924970691676436e-05, "loss": 4.143787860870361, "step": 722, "token_acc": 0.20750663498293112 }, { "epoch": 0.4239226033421284, "grad_norm": 4.56629087830607, "learning_rate": 5.9331770222743256e-05, "loss": 4.133569240570068, "step": 723, "token_acc": 0.21038678435699398 }, { "epoch": 0.42450894165933745, "grad_norm": 4.036103425279676, "learning_rate": 5.9413833528722154e-05, "loss": 4.041327476501465, "step": 724, "token_acc": 0.21928318147847795 }, { "epoch": 0.42509527997654645, "grad_norm": 6.767492736506062, "learning_rate": 5.9495896834701045e-05, "loss": 4.161279678344727, "step": 725, "token_acc": 0.20512148042465697 }, { "epoch": 0.4256816182937555, "grad_norm": 5.065260523826766, "learning_rate": 5.957796014067994e-05, "loss": 4.168001651763916, "step": 726, "token_acc": 0.20429929458020712 }, { "epoch": 0.4262679566109645, "grad_norm": 5.531995663876098, "learning_rate": 5.966002344665884e-05, "loss": 4.085506916046143, "step": 727, "token_acc": 0.2132195431706141 }, { "epoch": 0.42685429492817356, "grad_norm": 6.537170283925281, "learning_rate": 5.9742086752637746e-05, "loss": 4.111567497253418, "step": 728, "token_acc": 0.2124138106830145 }, { "epoch": 0.42744063324538256, "grad_norm": 3.926897959552154, "learning_rate": 5.9824150058616644e-05, "loss": 4.084894180297852, "step": 729, "token_acc": 0.21293772447408926 }, { "epoch": 0.4280269715625916, "grad_norm": 7.690922528933665, "learning_rate": 5.990621336459554e-05, "loss": 4.120464324951172, "step": 730, "token_acc": 0.21219283546098913 }, { "epoch": 0.4286133098798006, "grad_norm": 3.8756421542125663, "learning_rate": 5.998827667057444e-05, "loss": 4.135234355926514, "step": 731, "token_acc": 0.20832390745501286 }, { "epoch": 0.4291996481970097, "grad_norm": 7.445468777171998, "learning_rate": 6.007033997655334e-05, "loss": 4.1082563400268555, "step": 732, "token_acc": 0.21236238617953557 }, { "epoch": 0.42978598651421873, "grad_norm": 3.820678732926357, "learning_rate": 6.0152403282532236e-05, "loss": 4.090451717376709, "step": 733, "token_acc": 0.21370788934399346 }, { "epoch": 0.43037232483142773, "grad_norm": 5.710618810201329, "learning_rate": 6.023446658851113e-05, "loss": 4.08793830871582, "step": 734, "token_acc": 0.21196567528739338 }, { "epoch": 0.4309586631486368, "grad_norm": 5.392673999942736, "learning_rate": 6.0316529894490025e-05, "loss": 4.105639457702637, "step": 735, "token_acc": 0.2126338972870157 }, { "epoch": 0.4315450014658458, "grad_norm": 4.971434822494723, "learning_rate": 6.039859320046893e-05, "loss": 4.056774139404297, "step": 736, "token_acc": 0.21927300915111567 }, { "epoch": 0.43213133978305485, "grad_norm": 6.004185320984899, "learning_rate": 6.048065650644783e-05, "loss": 4.115375518798828, "step": 737, "token_acc": 0.2123379002489912 }, { "epoch": 0.43271767810026385, "grad_norm": 4.8816387426639976, "learning_rate": 6.0562719812426726e-05, "loss": 4.111758708953857, "step": 738, "token_acc": 0.2102325605920395 }, { "epoch": 0.4333040164174729, "grad_norm": 6.426680953167651, "learning_rate": 6.0644783118405624e-05, "loss": 4.107548713684082, "step": 739, "token_acc": 0.21164426724969454 }, { "epoch": 0.4338903547346819, "grad_norm": 4.226173696181125, "learning_rate": 6.072684642438452e-05, "loss": 4.060413837432861, "step": 740, "token_acc": 0.2172739256525178 }, { "epoch": 0.43447669305189096, "grad_norm": 7.618018312018148, "learning_rate": 6.080890973036342e-05, "loss": 4.167675018310547, "step": 741, "token_acc": 0.20574997066885672 }, { "epoch": 0.43506303136909996, "grad_norm": 3.7076965117358864, "learning_rate": 6.089097303634231e-05, "loss": 4.0514020919799805, "step": 742, "token_acc": 0.21745280754978163 }, { "epoch": 0.435649369686309, "grad_norm": 6.403849352684368, "learning_rate": 6.097303634232121e-05, "loss": 4.10659122467041, "step": 743, "token_acc": 0.2127811103285217 }, { "epoch": 0.436235708003518, "grad_norm": 4.9477706004904825, "learning_rate": 6.105509964830011e-05, "loss": 4.063077449798584, "step": 744, "token_acc": 0.2155527225439876 }, { "epoch": 0.4368220463207271, "grad_norm": 5.70651725041729, "learning_rate": 6.113716295427901e-05, "loss": 4.0971269607543945, "step": 745, "token_acc": 0.21257264007560953 }, { "epoch": 0.4374083846379361, "grad_norm": 8.201175989736933, "learning_rate": 6.121922626025791e-05, "loss": 4.1412858963012695, "step": 746, "token_acc": 0.20583142301571436 }, { "epoch": 0.43799472295514513, "grad_norm": 4.315864901252941, "learning_rate": 6.130128956623681e-05, "loss": 4.070844650268555, "step": 747, "token_acc": 0.2150485116493961 }, { "epoch": 0.43858106127235413, "grad_norm": 6.678621435289561, "learning_rate": 6.13833528722157e-05, "loss": 4.155675411224365, "step": 748, "token_acc": 0.2047260192436092 }, { "epoch": 0.4391673995895632, "grad_norm": 4.288474693404013, "learning_rate": 6.14654161781946e-05, "loss": 4.099294662475586, "step": 749, "token_acc": 0.21298414635729773 }, { "epoch": 0.4397537379067722, "grad_norm": 4.825265843377594, "learning_rate": 6.15474794841735e-05, "loss": 4.111228942871094, "step": 750, "token_acc": 0.21178295227946528 }, { "epoch": 0.44034007622398125, "grad_norm": 6.647912642882177, "learning_rate": 6.16295427901524e-05, "loss": 4.127750396728516, "step": 751, "token_acc": 0.2068447357818744 }, { "epoch": 0.44092641454119025, "grad_norm": 6.61583689021098, "learning_rate": 6.17116060961313e-05, "loss": 4.114223480224609, "step": 752, "token_acc": 0.20892537923285154 }, { "epoch": 0.4415127528583993, "grad_norm": 4.480109992919529, "learning_rate": 6.17936694021102e-05, "loss": 4.079311847686768, "step": 753, "token_acc": 0.21533014157792182 }, { "epoch": 0.4420990911756083, "grad_norm": 6.888122259844035, "learning_rate": 6.18757327080891e-05, "loss": 4.046022891998291, "step": 754, "token_acc": 0.21643093730627871 }, { "epoch": 0.44268542949281736, "grad_norm": 2.9024707980486264, "learning_rate": 6.195779601406799e-05, "loss": 4.097957611083984, "step": 755, "token_acc": 0.21023586934017105 }, { "epoch": 0.44327176781002636, "grad_norm": 7.1683543663645, "learning_rate": 6.203985932004689e-05, "loss": 4.111137390136719, "step": 756, "token_acc": 0.21366301584842798 }, { "epoch": 0.4438581061272354, "grad_norm": 5.228513188408095, "learning_rate": 6.212192262602579e-05, "loss": 4.120720863342285, "step": 757, "token_acc": 0.20936401430820153 }, { "epoch": 0.4444444444444444, "grad_norm": 6.695585981623289, "learning_rate": 6.220398593200469e-05, "loss": 4.129217624664307, "step": 758, "token_acc": 0.20681253133879324 }, { "epoch": 0.4450307827616535, "grad_norm": 4.440925726101526, "learning_rate": 6.228604923798358e-05, "loss": 4.070681095123291, "step": 759, "token_acc": 0.21466587207607432 }, { "epoch": 0.4456171210788625, "grad_norm": 4.8071648886887886, "learning_rate": 6.236811254396248e-05, "loss": 4.104011058807373, "step": 760, "token_acc": 0.21046202895250318 }, { "epoch": 0.44620345939607153, "grad_norm": 5.303969375265496, "learning_rate": 6.245017584994138e-05, "loss": 4.077654838562012, "step": 761, "token_acc": 0.21338909925718733 }, { "epoch": 0.4467897977132806, "grad_norm": 4.2044613693246795, "learning_rate": 6.253223915592028e-05, "loss": 4.057465553283691, "step": 762, "token_acc": 0.214498190518995 }, { "epoch": 0.4473761360304896, "grad_norm": 5.908054074818977, "learning_rate": 6.261430246189918e-05, "loss": 4.061487197875977, "step": 763, "token_acc": 0.21570638765708755 }, { "epoch": 0.44796247434769865, "grad_norm": 6.021553993864116, "learning_rate": 6.269636576787807e-05, "loss": 4.042322158813477, "step": 764, "token_acc": 0.21750293562730066 }, { "epoch": 0.44854881266490765, "grad_norm": 3.63104793768578, "learning_rate": 6.277842907385697e-05, "loss": 4.083230972290039, "step": 765, "token_acc": 0.21296144776764037 }, { "epoch": 0.4491351509821167, "grad_norm": 7.472090332995207, "learning_rate": 6.286049237983587e-05, "loss": 4.041102886199951, "step": 766, "token_acc": 0.21816202405949708 }, { "epoch": 0.4497214892993257, "grad_norm": 4.0288501598641115, "learning_rate": 6.294255568581477e-05, "loss": 4.090574264526367, "step": 767, "token_acc": 0.2127265497188395 }, { "epoch": 0.45030782761653476, "grad_norm": 7.074433521578454, "learning_rate": 6.302461899179367e-05, "loss": 4.0869951248168945, "step": 768, "token_acc": 0.21064017051419362 }, { "epoch": 0.45089416593374376, "grad_norm": 4.16343708212507, "learning_rate": 6.310668229777256e-05, "loss": 4.129717826843262, "step": 769, "token_acc": 0.20743499842522964 }, { "epoch": 0.4514805042509528, "grad_norm": 5.087280517204008, "learning_rate": 6.318874560375146e-05, "loss": 4.055534839630127, "step": 770, "token_acc": 0.21638147863370388 }, { "epoch": 0.4520668425681618, "grad_norm": 4.912551123275517, "learning_rate": 6.327080890973036e-05, "loss": 4.04734992980957, "step": 771, "token_acc": 0.21546405803586352 }, { "epoch": 0.4526531808853709, "grad_norm": 6.012292623593857, "learning_rate": 6.335287221570926e-05, "loss": 4.0179877281188965, "step": 772, "token_acc": 0.22023658505645058 }, { "epoch": 0.4532395192025799, "grad_norm": 3.618287335602584, "learning_rate": 6.343493552168816e-05, "loss": 4.069624900817871, "step": 773, "token_acc": 0.2154240159494792 }, { "epoch": 0.45382585751978893, "grad_norm": 6.118985284309547, "learning_rate": 6.351699882766705e-05, "loss": 4.104743480682373, "step": 774, "token_acc": 0.2114108018788139 }, { "epoch": 0.45441219583699793, "grad_norm": 4.964537114713395, "learning_rate": 6.359906213364595e-05, "loss": 4.118830680847168, "step": 775, "token_acc": 0.20860223547533474 }, { "epoch": 0.454998534154207, "grad_norm": 4.17806497393013, "learning_rate": 6.368112543962485e-05, "loss": 4.059308052062988, "step": 776, "token_acc": 0.21496380446775845 }, { "epoch": 0.455584872471416, "grad_norm": 6.644856595094865, "learning_rate": 6.376318874560375e-05, "loss": 4.054924011230469, "step": 777, "token_acc": 0.21495542103249865 }, { "epoch": 0.45617121078862505, "grad_norm": 3.240986415940476, "learning_rate": 6.384525205158265e-05, "loss": 4.056221008300781, "step": 778, "token_acc": 0.21613459645935396 }, { "epoch": 0.45675754910583405, "grad_norm": 7.305868384031052, "learning_rate": 6.392731535756154e-05, "loss": 4.071720123291016, "step": 779, "token_acc": 0.21205278553559848 }, { "epoch": 0.4573438874230431, "grad_norm": 6.1282390778907745, "learning_rate": 6.400937866354044e-05, "loss": 4.075984477996826, "step": 780, "token_acc": 0.2104517708277205 }, { "epoch": 0.4579302257402521, "grad_norm": 4.092787147119571, "learning_rate": 6.409144196951934e-05, "loss": 4.057436943054199, "step": 781, "token_acc": 0.21637315836440124 }, { "epoch": 0.45851656405746116, "grad_norm": 5.960452762804376, "learning_rate": 6.417350527549824e-05, "loss": 4.043334484100342, "step": 782, "token_acc": 0.21579216397883813 }, { "epoch": 0.45910290237467016, "grad_norm": 3.5712552289131394, "learning_rate": 6.425556858147713e-05, "loss": 4.0843186378479, "step": 783, "token_acc": 0.21234321468166292 }, { "epoch": 0.4596892406918792, "grad_norm": 5.729685171301163, "learning_rate": 6.433763188745603e-05, "loss": 4.095596790313721, "step": 784, "token_acc": 0.2111191831874677 }, { "epoch": 0.4602755790090882, "grad_norm": 3.3307347079350316, "learning_rate": 6.441969519343493e-05, "loss": 4.078511714935303, "step": 785, "token_acc": 0.21295191160050822 }, { "epoch": 0.4608619173262973, "grad_norm": 4.240882934666269, "learning_rate": 6.450175849941383e-05, "loss": 4.046321868896484, "step": 786, "token_acc": 0.21484360771580366 }, { "epoch": 0.4614482556435063, "grad_norm": 4.699549985134434, "learning_rate": 6.458382180539273e-05, "loss": 4.104073524475098, "step": 787, "token_acc": 0.2084332714556068 }, { "epoch": 0.46203459396071533, "grad_norm": 5.481704256863831, "learning_rate": 6.466588511137162e-05, "loss": 4.05832576751709, "step": 788, "token_acc": 0.2149072259261436 }, { "epoch": 0.46262093227792433, "grad_norm": 2.817924037726149, "learning_rate": 6.474794841735052e-05, "loss": 4.053046703338623, "step": 789, "token_acc": 0.21451936812902425 }, { "epoch": 0.4632072705951334, "grad_norm": 5.691301326157076, "learning_rate": 6.483001172332942e-05, "loss": 4.066107273101807, "step": 790, "token_acc": 0.21437340442471967 }, { "epoch": 0.46379360891234245, "grad_norm": 3.378327341464904, "learning_rate": 6.491207502930832e-05, "loss": 4.1215620040893555, "step": 791, "token_acc": 0.20711647764477853 }, { "epoch": 0.46437994722955145, "grad_norm": 4.460702793020244, "learning_rate": 6.499413833528722e-05, "loss": 4.027914047241211, "step": 792, "token_acc": 0.21829832775919733 }, { "epoch": 0.4649662855467605, "grad_norm": 5.464669142349539, "learning_rate": 6.507620164126611e-05, "loss": 4.102588653564453, "step": 793, "token_acc": 0.21014650393452708 }, { "epoch": 0.4655526238639695, "grad_norm": 4.448583527409169, "learning_rate": 6.515826494724501e-05, "loss": 4.055959224700928, "step": 794, "token_acc": 0.2134151901042458 }, { "epoch": 0.46613896218117856, "grad_norm": 5.1349201607474715, "learning_rate": 6.524032825322391e-05, "loss": 4.034310817718506, "step": 795, "token_acc": 0.21668292853346244 }, { "epoch": 0.46672530049838756, "grad_norm": 4.5210699795669695, "learning_rate": 6.532239155920281e-05, "loss": 4.062445163726807, "step": 796, "token_acc": 0.21575042959527235 }, { "epoch": 0.4673116388155966, "grad_norm": 4.811041422453679, "learning_rate": 6.54044548651817e-05, "loss": 4.036968231201172, "step": 797, "token_acc": 0.21730169991917522 }, { "epoch": 0.4678979771328056, "grad_norm": 4.68037780882389, "learning_rate": 6.54865181711606e-05, "loss": 3.966832399368286, "step": 798, "token_acc": 0.22272376882524278 }, { "epoch": 0.4684843154500147, "grad_norm": 3.832528698351509, "learning_rate": 6.55685814771395e-05, "loss": 4.03481388092041, "step": 799, "token_acc": 0.21444375256693915 }, { "epoch": 0.4690706537672237, "grad_norm": 5.085002612796332, "learning_rate": 6.56506447831184e-05, "loss": 4.040844440460205, "step": 800, "token_acc": 0.21513813918991306 }, { "epoch": 0.46965699208443273, "grad_norm": 5.159114561565329, "learning_rate": 6.57327080890973e-05, "loss": 4.041948318481445, "step": 801, "token_acc": 0.21609032413414847 }, { "epoch": 0.47024333040164173, "grad_norm": 3.5855464618679904, "learning_rate": 6.58147713950762e-05, "loss": 4.031923770904541, "step": 802, "token_acc": 0.21435230731986216 }, { "epoch": 0.4708296687188508, "grad_norm": 5.753062004348817, "learning_rate": 6.58968347010551e-05, "loss": 4.0522003173828125, "step": 803, "token_acc": 0.2155308616660226 }, { "epoch": 0.4714160070360598, "grad_norm": 3.394091310811631, "learning_rate": 6.597889800703399e-05, "loss": 4.077088356018066, "step": 804, "token_acc": 0.2103382382542002 }, { "epoch": 0.47200234535326885, "grad_norm": 5.790928520527392, "learning_rate": 6.606096131301289e-05, "loss": 4.04644250869751, "step": 805, "token_acc": 0.21362232295260344 }, { "epoch": 0.47258868367047785, "grad_norm": 2.8794334226316685, "learning_rate": 6.614302461899179e-05, "loss": 3.968930959701538, "step": 806, "token_acc": 0.2215706449841017 }, { "epoch": 0.4731750219876869, "grad_norm": 5.777484653725544, "learning_rate": 6.622508792497069e-05, "loss": 4.076010704040527, "step": 807, "token_acc": 0.2109606115349116 }, { "epoch": 0.4737613603048959, "grad_norm": 4.027667007149946, "learning_rate": 6.630715123094958e-05, "loss": 4.050105571746826, "step": 808, "token_acc": 0.2139537168205648 }, { "epoch": 0.47434769862210496, "grad_norm": 4.694283129384224, "learning_rate": 6.638921453692848e-05, "loss": 4.021165370941162, "step": 809, "token_acc": 0.2166883293459035 }, { "epoch": 0.47493403693931396, "grad_norm": 5.634513434650093, "learning_rate": 6.647127784290738e-05, "loss": 4.004396438598633, "step": 810, "token_acc": 0.21737749348689822 }, { "epoch": 0.475520375256523, "grad_norm": 4.874597054510726, "learning_rate": 6.655334114888628e-05, "loss": 4.058430194854736, "step": 811, "token_acc": 0.21351653327688716 }, { "epoch": 0.476106713573732, "grad_norm": 4.246625045347158, "learning_rate": 6.663540445486518e-05, "loss": 4.004892826080322, "step": 812, "token_acc": 0.21960231944952863 }, { "epoch": 0.4766930518909411, "grad_norm": 3.2497735868631965, "learning_rate": 6.671746776084407e-05, "loss": 3.9837310314178467, "step": 813, "token_acc": 0.21899919476679208 }, { "epoch": 0.4772793902081501, "grad_norm": 5.341321448805372, "learning_rate": 6.679953106682297e-05, "loss": 4.0097270011901855, "step": 814, "token_acc": 0.21651922246906585 }, { "epoch": 0.47786572852535913, "grad_norm": 3.6718191727682497, "learning_rate": 6.688159437280187e-05, "loss": 4.033107757568359, "step": 815, "token_acc": 0.21488122536297774 }, { "epoch": 0.47845206684256814, "grad_norm": 7.305765591297953, "learning_rate": 6.696365767878077e-05, "loss": 4.046302318572998, "step": 816, "token_acc": 0.21270874719641142 }, { "epoch": 0.4790384051597772, "grad_norm": 2.6863816673479763, "learning_rate": 6.704572098475967e-05, "loss": 4.0154900550842285, "step": 817, "token_acc": 0.21801591727074568 }, { "epoch": 0.47962474347698625, "grad_norm": 6.438865388948921, "learning_rate": 6.712778429073856e-05, "loss": 4.00985860824585, "step": 818, "token_acc": 0.21753190755432908 }, { "epoch": 0.48021108179419525, "grad_norm": 4.259731133756506, "learning_rate": 6.720984759671746e-05, "loss": 4.011630058288574, "step": 819, "token_acc": 0.2180763419859866 }, { "epoch": 0.4807974201114043, "grad_norm": 4.7477933044214415, "learning_rate": 6.729191090269636e-05, "loss": 4.006157875061035, "step": 820, "token_acc": 0.2164464293936185 }, { "epoch": 0.4813837584286133, "grad_norm": 3.2981815112000246, "learning_rate": 6.737397420867526e-05, "loss": 4.033952713012695, "step": 821, "token_acc": 0.214432444369123 }, { "epoch": 0.48197009674582236, "grad_norm": 4.612507856748187, "learning_rate": 6.745603751465416e-05, "loss": 4.019974231719971, "step": 822, "token_acc": 0.21820074826408517 }, { "epoch": 0.48255643506303136, "grad_norm": 3.836725240344396, "learning_rate": 6.753810082063305e-05, "loss": 3.980194330215454, "step": 823, "token_acc": 0.22201967517880658 }, { "epoch": 0.4831427733802404, "grad_norm": 4.802984966882959, "learning_rate": 6.762016412661195e-05, "loss": 3.9957339763641357, "step": 824, "token_acc": 0.21802652458568472 }, { "epoch": 0.4837291116974494, "grad_norm": 5.152519858861021, "learning_rate": 6.770222743259085e-05, "loss": 4.057910442352295, "step": 825, "token_acc": 0.21181657164281373 }, { "epoch": 0.4843154500146585, "grad_norm": 4.433238928229318, "learning_rate": 6.778429073856975e-05, "loss": 3.9672060012817383, "step": 826, "token_acc": 0.22388096135746582 }, { "epoch": 0.4849017883318675, "grad_norm": 4.520959270076583, "learning_rate": 6.786635404454865e-05, "loss": 3.940217971801758, "step": 827, "token_acc": 0.22445812261790385 }, { "epoch": 0.48548812664907653, "grad_norm": 6.788972715607243, "learning_rate": 6.794841735052754e-05, "loss": 4.062726020812988, "step": 828, "token_acc": 0.21115039758137993 }, { "epoch": 0.48607446496628554, "grad_norm": 3.0956771067886115, "learning_rate": 6.803048065650644e-05, "loss": 3.9951138496398926, "step": 829, "token_acc": 0.2190778666082576 }, { "epoch": 0.4866608032834946, "grad_norm": 5.696698618129569, "learning_rate": 6.811254396248534e-05, "loss": 3.990934371948242, "step": 830, "token_acc": 0.21920582528774726 }, { "epoch": 0.4872471416007036, "grad_norm": 3.3943526120013923, "learning_rate": 6.819460726846424e-05, "loss": 3.961142063140869, "step": 831, "token_acc": 0.22282085337455385 }, { "epoch": 0.48783347991791265, "grad_norm": 7.034190513970132, "learning_rate": 6.827667057444314e-05, "loss": 4.0496745109558105, "step": 832, "token_acc": 0.21266195417128528 }, { "epoch": 0.48841981823512165, "grad_norm": 3.2203863755841224, "learning_rate": 6.835873388042203e-05, "loss": 3.9616713523864746, "step": 833, "token_acc": 0.22296148152814085 }, { "epoch": 0.4890061565523307, "grad_norm": 4.992999536862018, "learning_rate": 6.844079718640093e-05, "loss": 3.9896860122680664, "step": 834, "token_acc": 0.21847508496621218 }, { "epoch": 0.4895924948695397, "grad_norm": 5.217503769547477, "learning_rate": 6.852286049237983e-05, "loss": 3.9951581954956055, "step": 835, "token_acc": 0.2189320836908455 }, { "epoch": 0.49017883318674876, "grad_norm": 4.802373612412434, "learning_rate": 6.860492379835873e-05, "loss": 3.9440159797668457, "step": 836, "token_acc": 0.22156611107870328 }, { "epoch": 0.49076517150395776, "grad_norm": 4.439798899640177, "learning_rate": 6.868698710433763e-05, "loss": 4.035882472991943, "step": 837, "token_acc": 0.21453252216919874 }, { "epoch": 0.4913515098211668, "grad_norm": 4.45858539117359, "learning_rate": 6.876905041031652e-05, "loss": 3.9638586044311523, "step": 838, "token_acc": 0.22277777777777777 }, { "epoch": 0.4919378481383758, "grad_norm": 3.8991037432482925, "learning_rate": 6.885111371629542e-05, "loss": 3.9965476989746094, "step": 839, "token_acc": 0.21630768664815053 }, { "epoch": 0.4925241864555849, "grad_norm": 5.294590930701878, "learning_rate": 6.893317702227432e-05, "loss": 3.9517555236816406, "step": 840, "token_acc": 0.22350560107807188 }, { "epoch": 0.4931105247727939, "grad_norm": 3.8762030871395527, "learning_rate": 6.901524032825322e-05, "loss": 3.9987030029296875, "step": 841, "token_acc": 0.21646050643745843 }, { "epoch": 0.49369686309000294, "grad_norm": 5.226671133508383, "learning_rate": 6.909730363423212e-05, "loss": 3.972357988357544, "step": 842, "token_acc": 0.22039892121837037 }, { "epoch": 0.49428320140721194, "grad_norm": 5.248159644147203, "learning_rate": 6.917936694021101e-05, "loss": 4.009156227111816, "step": 843, "token_acc": 0.21540408332861163 }, { "epoch": 0.494869539724421, "grad_norm": 3.0625152638377724, "learning_rate": 6.926143024618991e-05, "loss": 3.9520890712738037, "step": 844, "token_acc": 0.22420237200367493 }, { "epoch": 0.49545587804163, "grad_norm": 5.2969751167623995, "learning_rate": 6.934349355216881e-05, "loss": 3.9215798377990723, "step": 845, "token_acc": 0.22605102943293773 }, { "epoch": 0.49604221635883905, "grad_norm": 3.893484568704019, "learning_rate": 6.942555685814771e-05, "loss": 3.9572455883026123, "step": 846, "token_acc": 0.21943391387588967 }, { "epoch": 0.4966285546760481, "grad_norm": 6.524297086600854, "learning_rate": 6.95076201641266e-05, "loss": 4.00451135635376, "step": 847, "token_acc": 0.21761363066397826 }, { "epoch": 0.4972148929932571, "grad_norm": 3.109434153817786, "learning_rate": 6.95896834701055e-05, "loss": 3.995995044708252, "step": 848, "token_acc": 0.2191871889617463 }, { "epoch": 0.49780123131046616, "grad_norm": 5.226130412008868, "learning_rate": 6.96717467760844e-05, "loss": 4.031238555908203, "step": 849, "token_acc": 0.21236272769119485 }, { "epoch": 0.49838756962767516, "grad_norm": 3.7466784467786693, "learning_rate": 6.97538100820633e-05, "loss": 4.011455059051514, "step": 850, "token_acc": 0.21532591334330553 }, { "epoch": 0.4989739079448842, "grad_norm": 7.173364308176404, "learning_rate": 6.98358733880422e-05, "loss": 3.9455344676971436, "step": 851, "token_acc": 0.22367477435893424 }, { "epoch": 0.4995602462620932, "grad_norm": 3.3513706818245423, "learning_rate": 6.99179366940211e-05, "loss": 4.007432460784912, "step": 852, "token_acc": 0.2182759632185081 }, { "epoch": 0.5001465845793023, "grad_norm": 8.6649703776723, "learning_rate": 7e-05, "loss": 3.9926390647888184, "step": 853, "token_acc": 0.2181222064266595 }, { "epoch": 0.5007329228965113, "grad_norm": 4.352675286888637, "learning_rate": 7.008206330597889e-05, "loss": 4.01979398727417, "step": 854, "token_acc": 0.21553492646597247 }, { "epoch": 0.5013192612137203, "grad_norm": 5.711007515364585, "learning_rate": 7.016412661195779e-05, "loss": 4.0128350257873535, "step": 855, "token_acc": 0.21589557635105247 }, { "epoch": 0.5019055995309294, "grad_norm": 3.656445345419733, "learning_rate": 7.024618991793669e-05, "loss": 4.00874137878418, "step": 856, "token_acc": 0.21513901765827978 }, { "epoch": 0.5024919378481384, "grad_norm": 4.057422859025177, "learning_rate": 7.032825322391559e-05, "loss": 3.9652767181396484, "step": 857, "token_acc": 0.2221678747417772 }, { "epoch": 0.5030782761653474, "grad_norm": 3.444897759941114, "learning_rate": 7.041031652989448e-05, "loss": 3.9911670684814453, "step": 858, "token_acc": 0.21638694559265656 }, { "epoch": 0.5036646144825564, "grad_norm": 5.750083563618263, "learning_rate": 7.049237983587338e-05, "loss": 4.0121235847473145, "step": 859, "token_acc": 0.21741492002027107 }, { "epoch": 0.5042509527997655, "grad_norm": 3.4633672135683065, "learning_rate": 7.057444314185228e-05, "loss": 3.987497091293335, "step": 860, "token_acc": 0.2168935219814372 }, { "epoch": 0.5048372911169745, "grad_norm": 4.033214313903988, "learning_rate": 7.065650644783119e-05, "loss": 3.9413931369781494, "step": 861, "token_acc": 0.22270985017071285 }, { "epoch": 0.5054236294341835, "grad_norm": 4.14091094745713, "learning_rate": 7.073856975381006e-05, "loss": 3.9866116046905518, "step": 862, "token_acc": 0.21899040242108717 }, { "epoch": 0.5060099677513925, "grad_norm": 4.440484227663608, "learning_rate": 7.082063305978897e-05, "loss": 3.9983103275299072, "step": 863, "token_acc": 0.2147575009195523 }, { "epoch": 0.5065963060686016, "grad_norm": 3.7214046582832365, "learning_rate": 7.090269636576787e-05, "loss": 4.019493103027344, "step": 864, "token_acc": 0.21327359866638884 }, { "epoch": 0.5071826443858106, "grad_norm": 4.486378213749062, "learning_rate": 7.098475967174677e-05, "loss": 3.9570109844207764, "step": 865, "token_acc": 0.2214040674995458 }, { "epoch": 0.5077689827030196, "grad_norm": 3.8467668693372308, "learning_rate": 7.106682297772567e-05, "loss": 3.9893617630004883, "step": 866, "token_acc": 0.21912038986823898 }, { "epoch": 0.5083553210202286, "grad_norm": 6.146675121610778, "learning_rate": 7.114888628370457e-05, "loss": 3.934000015258789, "step": 867, "token_acc": 0.22261524087744924 }, { "epoch": 0.5089416593374377, "grad_norm": 3.0193792674239983, "learning_rate": 7.123094958968346e-05, "loss": 3.879106283187866, "step": 868, "token_acc": 0.22801801036445501 }, { "epoch": 0.5095279976546467, "grad_norm": 7.929730089888444, "learning_rate": 7.131301289566236e-05, "loss": 4.037492275238037, "step": 869, "token_acc": 0.2142709155614767 }, { "epoch": 0.5101143359718557, "grad_norm": 5.21060678523679, "learning_rate": 7.139507620164126e-05, "loss": 3.9994630813598633, "step": 870, "token_acc": 0.2173769510107253 }, { "epoch": 0.5107006742890647, "grad_norm": 5.409436630914686, "learning_rate": 7.147713950762016e-05, "loss": 4.012307167053223, "step": 871, "token_acc": 0.21459939365400182 }, { "epoch": 0.5112870126062738, "grad_norm": 4.270792580029859, "learning_rate": 7.155920281359906e-05, "loss": 3.9235074520111084, "step": 872, "token_acc": 0.22711250979461303 }, { "epoch": 0.5118733509234829, "grad_norm": 3.721081466540487, "learning_rate": 7.164126611957795e-05, "loss": 4.00079345703125, "step": 873, "token_acc": 0.2172611024664437 }, { "epoch": 0.5124596892406919, "grad_norm": 5.233746634953643, "learning_rate": 7.172332942555685e-05, "loss": 3.932919979095459, "step": 874, "token_acc": 0.22385943166569064 }, { "epoch": 0.513046027557901, "grad_norm": 3.7464398839309943, "learning_rate": 7.180539273153575e-05, "loss": 3.9187636375427246, "step": 875, "token_acc": 0.22386681119372223 }, { "epoch": 0.51363236587511, "grad_norm": 4.442836358862631, "learning_rate": 7.188745603751465e-05, "loss": 3.9152987003326416, "step": 876, "token_acc": 0.2252617148554337 }, { "epoch": 0.514218704192319, "grad_norm": 3.975531814815956, "learning_rate": 7.196951934349355e-05, "loss": 3.9355785846710205, "step": 877, "token_acc": 0.22226019232297545 }, { "epoch": 0.514805042509528, "grad_norm": 4.541750775651291, "learning_rate": 7.205158264947244e-05, "loss": 3.9372718334198, "step": 878, "token_acc": 0.22283512504183875 }, { "epoch": 0.5153913808267371, "grad_norm": 3.878191100706564, "learning_rate": 7.213364595545136e-05, "loss": 3.9745213985443115, "step": 879, "token_acc": 0.2193806413737433 }, { "epoch": 0.5159777191439461, "grad_norm": 5.779776923028234, "learning_rate": 7.221570926143023e-05, "loss": 3.8926379680633545, "step": 880, "token_acc": 0.2300731855641999 }, { "epoch": 0.5165640574611551, "grad_norm": 2.8259193465927885, "learning_rate": 7.229777256740914e-05, "loss": 3.958523750305176, "step": 881, "token_acc": 0.219617717397754 }, { "epoch": 0.5171503957783641, "grad_norm": 5.953905079652252, "learning_rate": 7.237983587338804e-05, "loss": 3.906073570251465, "step": 882, "token_acc": 0.22284724890194899 }, { "epoch": 0.5177367340955732, "grad_norm": 3.424066460604576, "learning_rate": 7.246189917936693e-05, "loss": 3.9637608528137207, "step": 883, "token_acc": 0.21793221029060003 }, { "epoch": 0.5183230724127822, "grad_norm": 4.431494527412378, "learning_rate": 7.254396248534583e-05, "loss": 3.9044528007507324, "step": 884, "token_acc": 0.22711755670245587 }, { "epoch": 0.5189094107299912, "grad_norm": 5.0536939877353975, "learning_rate": 7.262602579132473e-05, "loss": 3.941391944885254, "step": 885, "token_acc": 0.22255097168747523 }, { "epoch": 0.5194957490472002, "grad_norm": 3.0585335755153644, "learning_rate": 7.270808909730363e-05, "loss": 3.953786849975586, "step": 886, "token_acc": 0.2189842872142502 }, { "epoch": 0.5200820873644093, "grad_norm": 3.58658304994689, "learning_rate": 7.279015240328253e-05, "loss": 3.9502034187316895, "step": 887, "token_acc": 0.22080998769047921 }, { "epoch": 0.5206684256816183, "grad_norm": 4.022615966478019, "learning_rate": 7.287221570926142e-05, "loss": 3.869863510131836, "step": 888, "token_acc": 0.23007992897579133 }, { "epoch": 0.5212547639988273, "grad_norm": 4.933834732727239, "learning_rate": 7.295427901524032e-05, "loss": 3.8581924438476562, "step": 889, "token_acc": 0.2322468294128856 }, { "epoch": 0.5218411023160363, "grad_norm": 4.2477553037008855, "learning_rate": 7.303634232121922e-05, "loss": 4.047770023345947, "step": 890, "token_acc": 0.21254070412383255 }, { "epoch": 0.5224274406332454, "grad_norm": 2.993079493007284, "learning_rate": 7.311840562719812e-05, "loss": 3.9408206939697266, "step": 891, "token_acc": 0.22034887392792635 }, { "epoch": 0.5230137789504544, "grad_norm": 4.310028473311131, "learning_rate": 7.320046893317702e-05, "loss": 3.8987066745758057, "step": 892, "token_acc": 0.2246339106518992 }, { "epoch": 0.5236001172676634, "grad_norm": 5.338960084915028, "learning_rate": 7.328253223915591e-05, "loss": 3.980449914932251, "step": 893, "token_acc": 0.2169134098795536 }, { "epoch": 0.5241864555848724, "grad_norm": 3.019209666074229, "learning_rate": 7.336459554513481e-05, "loss": 3.9784231185913086, "step": 894, "token_acc": 0.21574862131759417 }, { "epoch": 0.5247727939020815, "grad_norm": 4.8939575307265555, "learning_rate": 7.344665885111371e-05, "loss": 3.898198127746582, "step": 895, "token_acc": 0.2253466074405409 }, { "epoch": 0.5253591322192905, "grad_norm": 2.8546093667038, "learning_rate": 7.352872215709262e-05, "loss": 3.893986940383911, "step": 896, "token_acc": 0.22656573742095062 }, { "epoch": 0.5259454705364995, "grad_norm": 5.857708422466417, "learning_rate": 7.361078546307152e-05, "loss": 3.9416933059692383, "step": 897, "token_acc": 0.2220271912423012 }, { "epoch": 0.5265318088537085, "grad_norm": 3.4329769467322953, "learning_rate": 7.36928487690504e-05, "loss": 3.989224910736084, "step": 898, "token_acc": 0.21665791512214902 }, { "epoch": 0.5271181471709177, "grad_norm": 5.8285255496060255, "learning_rate": 7.37749120750293e-05, "loss": 3.931288719177246, "step": 899, "token_acc": 0.22245900400513866 }, { "epoch": 0.5277044854881267, "grad_norm": 3.8255927671258037, "learning_rate": 7.38569753810082e-05, "loss": 3.903797149658203, "step": 900, "token_acc": 0.22568931148689367 }, { "epoch": 0.5282908238053357, "grad_norm": 2.3302054348179198, "learning_rate": 7.39390386869871e-05, "loss": 3.8834705352783203, "step": 901, "token_acc": 0.2272481622758921 }, { "epoch": 0.5288771621225447, "grad_norm": 5.917167165962022, "learning_rate": 7.4021101992966e-05, "loss": 3.9495420455932617, "step": 902, "token_acc": 0.22068242166755178 }, { "epoch": 0.5294635004397538, "grad_norm": 2.8763906550265412, "learning_rate": 7.410316529894489e-05, "loss": 4.001081943511963, "step": 903, "token_acc": 0.21539195507272058 }, { "epoch": 0.5300498387569628, "grad_norm": 5.001622948569647, "learning_rate": 7.418522860492379e-05, "loss": 4.000034332275391, "step": 904, "token_acc": 0.21476615409950803 }, { "epoch": 0.5306361770741718, "grad_norm": 3.571959051065895, "learning_rate": 7.426729191090269e-05, "loss": 3.98193359375, "step": 905, "token_acc": 0.2158800052902232 }, { "epoch": 0.5312225153913809, "grad_norm": 4.314495024250772, "learning_rate": 7.434935521688159e-05, "loss": 3.954977035522461, "step": 906, "token_acc": 0.22094877416782938 }, { "epoch": 0.5318088537085899, "grad_norm": 4.760160344986089, "learning_rate": 7.443141852286049e-05, "loss": 3.905460834503174, "step": 907, "token_acc": 0.22663310559750688 }, { "epoch": 0.5323951920257989, "grad_norm": 3.375661926320089, "learning_rate": 7.451348182883938e-05, "loss": 3.933032512664795, "step": 908, "token_acc": 0.2227987739415911 }, { "epoch": 0.5329815303430079, "grad_norm": 4.1356343859429545, "learning_rate": 7.459554513481828e-05, "loss": 3.887661933898926, "step": 909, "token_acc": 0.2270420162551823 }, { "epoch": 0.533567868660217, "grad_norm": 3.366607829254808, "learning_rate": 7.467760844079718e-05, "loss": 3.954080104827881, "step": 910, "token_acc": 0.22044739869615237 }, { "epoch": 0.534154206977426, "grad_norm": 5.549424512471356, "learning_rate": 7.475967174677608e-05, "loss": 4.005929470062256, "step": 911, "token_acc": 0.21477800310888295 }, { "epoch": 0.534740545294635, "grad_norm": 2.600919493494448, "learning_rate": 7.484173505275497e-05, "loss": 3.960184335708618, "step": 912, "token_acc": 0.21650387049878764 }, { "epoch": 0.535326883611844, "grad_norm": 4.502918341638091, "learning_rate": 7.492379835873389e-05, "loss": 3.9495348930358887, "step": 913, "token_acc": 0.221415117582077 }, { "epoch": 0.5359132219290531, "grad_norm": 3.498191326307574, "learning_rate": 7.500586166471278e-05, "loss": 3.9135985374450684, "step": 914, "token_acc": 0.22394118570726196 }, { "epoch": 0.5364995602462621, "grad_norm": 3.593785183875704, "learning_rate": 7.508792497069168e-05, "loss": 3.905601739883423, "step": 915, "token_acc": 0.22396632947065132 }, { "epoch": 0.5370858985634711, "grad_norm": 5.52076900255028, "learning_rate": 7.516998827667057e-05, "loss": 3.892564535140991, "step": 916, "token_acc": 0.22547355833449947 }, { "epoch": 0.5376722368806801, "grad_norm": 3.1674819562606937, "learning_rate": 7.525205158264946e-05, "loss": 3.930121421813965, "step": 917, "token_acc": 0.22316433710118405 }, { "epoch": 0.5382585751978892, "grad_norm": 3.588282321328592, "learning_rate": 7.533411488862836e-05, "loss": 3.8929686546325684, "step": 918, "token_acc": 0.2257181907267007 }, { "epoch": 0.5388449135150982, "grad_norm": 4.956463348598123, "learning_rate": 7.541617819460726e-05, "loss": 3.944598436355591, "step": 919, "token_acc": 0.21990550352147428 }, { "epoch": 0.5394312518323072, "grad_norm": 4.316020753094943, "learning_rate": 7.549824150058616e-05, "loss": 3.9181320667266846, "step": 920, "token_acc": 0.22260710062759603 }, { "epoch": 0.5400175901495162, "grad_norm": 3.840725134227517, "learning_rate": 7.558030480656506e-05, "loss": 3.9240145683288574, "step": 921, "token_acc": 0.2196389186973895 }, { "epoch": 0.5406039284667253, "grad_norm": 3.3237048216771026, "learning_rate": 7.566236811254395e-05, "loss": 3.911754608154297, "step": 922, "token_acc": 0.22117295066930895 }, { "epoch": 0.5411902667839343, "grad_norm": 3.6250707529186417, "learning_rate": 7.574443141852285e-05, "loss": 3.8985207080841064, "step": 923, "token_acc": 0.22436844635132008 }, { "epoch": 0.5417766051011433, "grad_norm": 3.7120036571529282, "learning_rate": 7.582649472450175e-05, "loss": 3.9529762268066406, "step": 924, "token_acc": 0.21985204921583917 }, { "epoch": 0.5423629434183523, "grad_norm": 5.383160656316933, "learning_rate": 7.590855803048065e-05, "loss": 3.9192419052124023, "step": 925, "token_acc": 0.22281855239198808 }, { "epoch": 0.5429492817355615, "grad_norm": 2.0527480363140653, "learning_rate": 7.599062133645955e-05, "loss": 3.891284942626953, "step": 926, "token_acc": 0.2236781882410655 }, { "epoch": 0.5435356200527705, "grad_norm": 3.263266165618146, "learning_rate": 7.607268464243844e-05, "loss": 3.945679187774658, "step": 927, "token_acc": 0.21928222878494386 }, { "epoch": 0.5441219583699795, "grad_norm": 4.234682571124678, "learning_rate": 7.615474794841734e-05, "loss": 3.9352431297302246, "step": 928, "token_acc": 0.2190277792083389 }, { "epoch": 0.5447082966871885, "grad_norm": 2.761705785590396, "learning_rate": 7.623681125439624e-05, "loss": 3.8988049030303955, "step": 929, "token_acc": 0.22568798797944506 }, { "epoch": 0.5452946350043976, "grad_norm": 3.9690195712761156, "learning_rate": 7.631887456037514e-05, "loss": 3.8889317512512207, "step": 930, "token_acc": 0.22487847367103397 }, { "epoch": 0.5458809733216066, "grad_norm": 2.777011776409667, "learning_rate": 7.640093786635405e-05, "loss": 3.9464635848999023, "step": 931, "token_acc": 0.219765797981268 }, { "epoch": 0.5464673116388156, "grad_norm": 3.552419808478299, "learning_rate": 7.648300117233295e-05, "loss": 3.8892805576324463, "step": 932, "token_acc": 0.22564620538391222 }, { "epoch": 0.5470536499560247, "grad_norm": 3.1801561150165143, "learning_rate": 7.656506447831183e-05, "loss": 3.900843620300293, "step": 933, "token_acc": 0.2246993504683916 }, { "epoch": 0.5476399882732337, "grad_norm": 3.968305321540962, "learning_rate": 7.664712778429073e-05, "loss": 3.9216039180755615, "step": 934, "token_acc": 0.22265100542415012 }, { "epoch": 0.5482263265904427, "grad_norm": 2.72321068246262, "learning_rate": 7.672919109026963e-05, "loss": 3.8249402046203613, "step": 935, "token_acc": 0.23361378695773394 }, { "epoch": 0.5488126649076517, "grad_norm": 4.338261740074034, "learning_rate": 7.681125439624853e-05, "loss": 3.9207706451416016, "step": 936, "token_acc": 0.2226716113944028 }, { "epoch": 0.5493990032248608, "grad_norm": 4.395011774319159, "learning_rate": 7.689331770222742e-05, "loss": 3.9280495643615723, "step": 937, "token_acc": 0.22203949096793854 }, { "epoch": 0.5499853415420698, "grad_norm": 3.325537024141444, "learning_rate": 7.697538100820632e-05, "loss": 3.904790163040161, "step": 938, "token_acc": 0.2250743838050709 }, { "epoch": 0.5505716798592788, "grad_norm": 3.9823258684187266, "learning_rate": 7.705744431418522e-05, "loss": 3.8971245288848877, "step": 939, "token_acc": 0.22593879428760585 }, { "epoch": 0.5511580181764878, "grad_norm": 3.944714517481396, "learning_rate": 7.713950762016412e-05, "loss": 3.9638924598693848, "step": 940, "token_acc": 0.21506544130728603 }, { "epoch": 0.5517443564936969, "grad_norm": 3.2877293615052383, "learning_rate": 7.722157092614302e-05, "loss": 3.872718572616577, "step": 941, "token_acc": 0.22773351209689124 }, { "epoch": 0.5523306948109059, "grad_norm": 4.8399644203222065, "learning_rate": 7.730363423212191e-05, "loss": 3.9278793334960938, "step": 942, "token_acc": 0.22157682775712514 }, { "epoch": 0.5529170331281149, "grad_norm": 2.68554868312657, "learning_rate": 7.738569753810081e-05, "loss": 3.8825113773345947, "step": 943, "token_acc": 0.22638048113562748 }, { "epoch": 0.5535033714453239, "grad_norm": 2.736919658190876, "learning_rate": 7.746776084407971e-05, "loss": 3.8771533966064453, "step": 944, "token_acc": 0.22490796693755644 }, { "epoch": 0.554089709762533, "grad_norm": 3.279178671179111, "learning_rate": 7.754982415005861e-05, "loss": 3.896569013595581, "step": 945, "token_acc": 0.22379683409739348 }, { "epoch": 0.554676048079742, "grad_norm": 3.7235756234502313, "learning_rate": 7.76318874560375e-05, "loss": 3.8837943077087402, "step": 946, "token_acc": 0.22533231724199762 }, { "epoch": 0.555262386396951, "grad_norm": 3.1662908855190537, "learning_rate": 7.77139507620164e-05, "loss": 3.8931336402893066, "step": 947, "token_acc": 0.22274568874573683 }, { "epoch": 0.55584872471416, "grad_norm": 5.197714669975523, "learning_rate": 7.779601406799532e-05, "loss": 3.898205280303955, "step": 948, "token_acc": 0.22474386720540732 }, { "epoch": 0.5564350630313691, "grad_norm": 3.554308146990438, "learning_rate": 7.787807737397421e-05, "loss": 3.8957624435424805, "step": 949, "token_acc": 0.2242974035314944 }, { "epoch": 0.5570214013485781, "grad_norm": 3.3562448081353855, "learning_rate": 7.796014067995311e-05, "loss": 3.9175848960876465, "step": 950, "token_acc": 0.2214336955655339 }, { "epoch": 0.5576077396657871, "grad_norm": 3.253693877911425, "learning_rate": 7.8042203985932e-05, "loss": 3.893742561340332, "step": 951, "token_acc": 0.2237078837642612 }, { "epoch": 0.5581940779829961, "grad_norm": 3.415461906251086, "learning_rate": 7.81242672919109e-05, "loss": 3.864027976989746, "step": 952, "token_acc": 0.2274428251762737 }, { "epoch": 0.5587804163002053, "grad_norm": 3.34311488115709, "learning_rate": 7.820633059788979e-05, "loss": 3.9224228858947754, "step": 953, "token_acc": 0.22049115949680975 }, { "epoch": 0.5593667546174143, "grad_norm": 3.55119858655416, "learning_rate": 7.828839390386869e-05, "loss": 3.8433961868286133, "step": 954, "token_acc": 0.22821853817715113 }, { "epoch": 0.5599530929346233, "grad_norm": 3.6379440858234435, "learning_rate": 7.837045720984759e-05, "loss": 3.845787525177002, "step": 955, "token_acc": 0.2277142050326816 }, { "epoch": 0.5605394312518323, "grad_norm": 4.911104202124572, "learning_rate": 7.845252051582649e-05, "loss": 3.793720245361328, "step": 956, "token_acc": 0.23277526493334635 }, { "epoch": 0.5611257695690414, "grad_norm": 3.0046046696018176, "learning_rate": 7.853458382180538e-05, "loss": 3.8267722129821777, "step": 957, "token_acc": 0.23047471819645732 }, { "epoch": 0.5617121078862504, "grad_norm": 4.197325105947007, "learning_rate": 7.861664712778428e-05, "loss": 3.818058967590332, "step": 958, "token_acc": 0.23168694696112738 }, { "epoch": 0.5622984462034594, "grad_norm": 3.4418132419606637, "learning_rate": 7.869871043376318e-05, "loss": 3.8434574604034424, "step": 959, "token_acc": 0.2277691542187773 }, { "epoch": 0.5628847845206685, "grad_norm": 4.028887928871811, "learning_rate": 7.878077373974208e-05, "loss": 3.855584144592285, "step": 960, "token_acc": 0.22577280805302963 }, { "epoch": 0.5634711228378775, "grad_norm": 3.1334857441013084, "learning_rate": 7.886283704572098e-05, "loss": 3.834045886993408, "step": 961, "token_acc": 0.2291132213766788 }, { "epoch": 0.5640574611550865, "grad_norm": 3.4515580992555503, "learning_rate": 7.894490035169987e-05, "loss": 3.851886749267578, "step": 962, "token_acc": 0.22691975619533153 }, { "epoch": 0.5646437994722955, "grad_norm": 4.453338764321934, "learning_rate": 7.902696365767877e-05, "loss": 3.8006951808929443, "step": 963, "token_acc": 0.2355371156799247 }, { "epoch": 0.5652301377895046, "grad_norm": 4.533303865172233, "learning_rate": 7.910902696365767e-05, "loss": 3.811504602432251, "step": 964, "token_acc": 0.22935611183030616 }, { "epoch": 0.5658164761067136, "grad_norm": 3.221479116110373, "learning_rate": 7.919109026963657e-05, "loss": 3.838282346725464, "step": 965, "token_acc": 0.2277996781040797 }, { "epoch": 0.5664028144239226, "grad_norm": 5.345395767924592, "learning_rate": 7.927315357561548e-05, "loss": 3.852888584136963, "step": 966, "token_acc": 0.22853371041270334 }, { "epoch": 0.5669891527411316, "grad_norm": 2.6754339870035375, "learning_rate": 7.935521688159438e-05, "loss": 3.899423122406006, "step": 967, "token_acc": 0.220977424646992 }, { "epoch": 0.5675754910583407, "grad_norm": 8.317987045475705, "learning_rate": 7.943728018757328e-05, "loss": 3.886955738067627, "step": 968, "token_acc": 0.22348943524855572 }, { "epoch": 0.5681618293755497, "grad_norm": 4.085085559551292, "learning_rate": 7.951934349355216e-05, "loss": 3.8788070678710938, "step": 969, "token_acc": 0.22676565486024436 }, { "epoch": 0.5687481676927587, "grad_norm": 7.174908146785812, "learning_rate": 7.960140679953106e-05, "loss": 3.93218994140625, "step": 970, "token_acc": 0.21808026150029122 }, { "epoch": 0.5693345060099677, "grad_norm": 4.421127123008212, "learning_rate": 7.968347010550996e-05, "loss": 3.8866190910339355, "step": 971, "token_acc": 0.22490683014408314 }, { "epoch": 0.5699208443271768, "grad_norm": 5.367228969842158, "learning_rate": 7.976553341148885e-05, "loss": 3.8557634353637695, "step": 972, "token_acc": 0.22785068424902213 }, { "epoch": 0.5705071826443858, "grad_norm": 3.16810781283421, "learning_rate": 7.984759671746775e-05, "loss": 3.9037954807281494, "step": 973, "token_acc": 0.22018437157385987 }, { "epoch": 0.5710935209615948, "grad_norm": 3.8105249774460224, "learning_rate": 7.992966002344665e-05, "loss": 3.8523316383361816, "step": 974, "token_acc": 0.22604303874422266 }, { "epoch": 0.5716798592788038, "grad_norm": 3.4622471120927036, "learning_rate": 8.001172332942555e-05, "loss": 3.8481078147888184, "step": 975, "token_acc": 0.22952883161013612 }, { "epoch": 0.5722661975960129, "grad_norm": 3.639228196611874, "learning_rate": 8.009378663540445e-05, "loss": 3.8009986877441406, "step": 976, "token_acc": 0.233648143500601 }, { "epoch": 0.5728525359132219, "grad_norm": 5.00103382334742, "learning_rate": 8.017584994138334e-05, "loss": 3.814574718475342, "step": 977, "token_acc": 0.23216061226573795 }, { "epoch": 0.5734388742304309, "grad_norm": 3.7071355160869612, "learning_rate": 8.025791324736224e-05, "loss": 3.8253519535064697, "step": 978, "token_acc": 0.2289901702472639 }, { "epoch": 0.5740252125476399, "grad_norm": 4.481567337595048, "learning_rate": 8.033997655334114e-05, "loss": 3.8796417713165283, "step": 979, "token_acc": 0.22221097029321965 }, { "epoch": 0.574611550864849, "grad_norm": 3.237650655814511, "learning_rate": 8.042203985932004e-05, "loss": 3.833104133605957, "step": 980, "token_acc": 0.22838650627615062 }, { "epoch": 0.575197889182058, "grad_norm": 3.2423057510132693, "learning_rate": 8.050410316529894e-05, "loss": 3.889162063598633, "step": 981, "token_acc": 0.22233752924370095 }, { "epoch": 0.575784227499267, "grad_norm": 3.513320179650478, "learning_rate": 8.058616647127783e-05, "loss": 3.827302932739258, "step": 982, "token_acc": 0.2291965557167383 }, { "epoch": 0.576370565816476, "grad_norm": 4.732291375856808, "learning_rate": 8.066822977725675e-05, "loss": 3.8552141189575195, "step": 983, "token_acc": 0.2254483786422735 }, { "epoch": 0.5769569041336852, "grad_norm": 2.5648213631552346, "learning_rate": 8.075029308323564e-05, "loss": 3.7717976570129395, "step": 984, "token_acc": 0.23429037452165366 }, { "epoch": 0.5775432424508942, "grad_norm": 4.971729409570693, "learning_rate": 8.083235638921454e-05, "loss": 3.7867424488067627, "step": 985, "token_acc": 0.23247293877745084 }, { "epoch": 0.5781295807681032, "grad_norm": 3.76378411267868, "learning_rate": 8.091441969519344e-05, "loss": 3.799938678741455, "step": 986, "token_acc": 0.2325908520973163 }, { "epoch": 0.5787159190853123, "grad_norm": 4.084654307778991, "learning_rate": 8.099648300117232e-05, "loss": 3.883906364440918, "step": 987, "token_acc": 0.2216784762400052 }, { "epoch": 0.5793022574025213, "grad_norm": 2.8510927600248688, "learning_rate": 8.107854630715122e-05, "loss": 3.84855318069458, "step": 988, "token_acc": 0.22796355722388453 }, { "epoch": 0.5798885957197303, "grad_norm": 4.9541197346937675, "learning_rate": 8.116060961313012e-05, "loss": 3.819871425628662, "step": 989, "token_acc": 0.22912656693312966 }, { "epoch": 0.5804749340369393, "grad_norm": 3.5064190020491766, "learning_rate": 8.124267291910902e-05, "loss": 3.8603312969207764, "step": 990, "token_acc": 0.22497547606767593 }, { "epoch": 0.5810612723541484, "grad_norm": 3.8543529276605493, "learning_rate": 8.132473622508792e-05, "loss": 3.7963509559631348, "step": 991, "token_acc": 0.231695692497939 }, { "epoch": 0.5816476106713574, "grad_norm": 3.7517608582258704, "learning_rate": 8.140679953106681e-05, "loss": 3.840233087539673, "step": 992, "token_acc": 0.22612567002614864 }, { "epoch": 0.5822339489885664, "grad_norm": 4.125403802049692, "learning_rate": 8.148886283704571e-05, "loss": 3.871232509613037, "step": 993, "token_acc": 0.22211566872113336 }, { "epoch": 0.5828202873057754, "grad_norm": 2.467875132986388, "learning_rate": 8.157092614302461e-05, "loss": 3.798952579498291, "step": 994, "token_acc": 0.23011739313272972 }, { "epoch": 0.5834066256229845, "grad_norm": 4.377110377657691, "learning_rate": 8.165298944900351e-05, "loss": 3.821575164794922, "step": 995, "token_acc": 0.22830792054299254 }, { "epoch": 0.5839929639401935, "grad_norm": 2.493508094623644, "learning_rate": 8.17350527549824e-05, "loss": 3.821772575378418, "step": 996, "token_acc": 0.22838963026013123 }, { "epoch": 0.5845793022574025, "grad_norm": 4.365756314753711, "learning_rate": 8.18171160609613e-05, "loss": 3.812507152557373, "step": 997, "token_acc": 0.22803536725578177 }, { "epoch": 0.5851656405746115, "grad_norm": 3.0145318160243026, "learning_rate": 8.18991793669402e-05, "loss": 3.8375725746154785, "step": 998, "token_acc": 0.22480492448014727 }, { "epoch": 0.5857519788918206, "grad_norm": 3.8988707731223124, "learning_rate": 8.19812426729191e-05, "loss": 3.805799722671509, "step": 999, "token_acc": 0.23037427871729924 }, { "epoch": 0.5863383172090296, "grad_norm": 3.0808158733267352, "learning_rate": 8.2063305978898e-05, "loss": 3.772102117538452, "step": 1000, "token_acc": 0.2322652993316163 }, { "epoch": 0.5869246555262386, "grad_norm": 4.052979513195679, "learning_rate": 8.214536928487691e-05, "loss": 3.7995927333831787, "step": 1001, "token_acc": 0.2302411922692085 }, { "epoch": 0.5875109938434476, "grad_norm": 3.3241052941074765, "learning_rate": 8.222743259085581e-05, "loss": 3.8492989540100098, "step": 1002, "token_acc": 0.22498169750244246 }, { "epoch": 0.5880973321606567, "grad_norm": 4.493207372726703, "learning_rate": 8.23094958968347e-05, "loss": 3.8294248580932617, "step": 1003, "token_acc": 0.22555666439618346 }, { "epoch": 0.5886836704778657, "grad_norm": 3.6574692454369067, "learning_rate": 8.23915592028136e-05, "loss": 3.78488826751709, "step": 1004, "token_acc": 0.23005784369621488 }, { "epoch": 0.5892700087950747, "grad_norm": 4.664427598528099, "learning_rate": 8.247362250879249e-05, "loss": 3.792283535003662, "step": 1005, "token_acc": 0.22856545591744334 }, { "epoch": 0.5898563471122837, "grad_norm": 3.6045116390437184, "learning_rate": 8.255568581477139e-05, "loss": 3.789374828338623, "step": 1006, "token_acc": 0.2289365496062212 }, { "epoch": 0.5904426854294929, "grad_norm": 3.3496260892697896, "learning_rate": 8.263774912075028e-05, "loss": 3.7920684814453125, "step": 1007, "token_acc": 0.2311466633401423 }, { "epoch": 0.5910290237467019, "grad_norm": 3.619648182489753, "learning_rate": 8.271981242672918e-05, "loss": 3.7120020389556885, "step": 1008, "token_acc": 0.2391520909357829 }, { "epoch": 0.5916153620639109, "grad_norm": 4.3879056276709925, "learning_rate": 8.280187573270808e-05, "loss": 3.740029811859131, "step": 1009, "token_acc": 0.2370463118774374 }, { "epoch": 0.5922017003811199, "grad_norm": 4.4660548087970655, "learning_rate": 8.288393903868698e-05, "loss": 3.750211715698242, "step": 1010, "token_acc": 0.23159138124867243 }, { "epoch": 0.592788038698329, "grad_norm": 1.907076142560956, "learning_rate": 8.296600234466588e-05, "loss": 3.7922568321228027, "step": 1011, "token_acc": 0.2287552534633944 }, { "epoch": 0.593374377015538, "grad_norm": 3.7097748134385293, "learning_rate": 8.304806565064477e-05, "loss": 3.7345829010009766, "step": 1012, "token_acc": 0.23655176571661116 }, { "epoch": 0.593960715332747, "grad_norm": 4.163467583377942, "learning_rate": 8.313012895662367e-05, "loss": 3.776966094970703, "step": 1013, "token_acc": 0.23140533798413615 }, { "epoch": 0.594547053649956, "grad_norm": 3.260173078907783, "learning_rate": 8.321219226260257e-05, "loss": 3.791015863418579, "step": 1014, "token_acc": 0.23041021288083502 }, { "epoch": 0.5951333919671651, "grad_norm": 3.514703885640435, "learning_rate": 8.329425556858147e-05, "loss": 3.7367687225341797, "step": 1015, "token_acc": 0.2350947352379351 }, { "epoch": 0.5957197302843741, "grad_norm": 3.4714243345061067, "learning_rate": 8.337631887456037e-05, "loss": 3.77022123336792, "step": 1016, "token_acc": 0.23148969582020457 }, { "epoch": 0.5963060686015831, "grad_norm": 2.877015534546585, "learning_rate": 8.345838218053926e-05, "loss": 3.7913310527801514, "step": 1017, "token_acc": 0.22898245172388473 }, { "epoch": 0.5968924069187922, "grad_norm": 2.5590966237896695, "learning_rate": 8.354044548651817e-05, "loss": 3.755291223526001, "step": 1018, "token_acc": 0.2351382315780457 }, { "epoch": 0.5974787452360012, "grad_norm": 3.9699392669798286, "learning_rate": 8.362250879249707e-05, "loss": 3.7486839294433594, "step": 1019, "token_acc": 0.23506730561853312 }, { "epoch": 0.5980650835532102, "grad_norm": 2.998953762342674, "learning_rate": 8.370457209847597e-05, "loss": 3.736001968383789, "step": 1020, "token_acc": 0.23651671096162474 }, { "epoch": 0.5986514218704192, "grad_norm": 4.237700727176671, "learning_rate": 8.378663540445487e-05, "loss": 3.7611141204833984, "step": 1021, "token_acc": 0.23102844855157065 }, { "epoch": 0.5992377601876283, "grad_norm": 3.5538029873037194, "learning_rate": 8.386869871043375e-05, "loss": 3.7258243560791016, "step": 1022, "token_acc": 0.23802262653530862 }, { "epoch": 0.5998240985048373, "grad_norm": 2.3206302334963826, "learning_rate": 8.395076201641265e-05, "loss": 3.7062408924102783, "step": 1023, "token_acc": 0.23824927140494132 }, { "epoch": 0.6004104368220463, "grad_norm": 4.5421253566708115, "learning_rate": 8.403282532239155e-05, "loss": 3.711066722869873, "step": 1024, "token_acc": 0.23627117253824387 }, { "epoch": 0.6009967751392553, "grad_norm": 2.803887573222275, "learning_rate": 8.411488862837045e-05, "loss": 3.706165313720703, "step": 1025, "token_acc": 0.23566379868385534 }, { "epoch": 0.6015831134564644, "grad_norm": 4.259402169832166, "learning_rate": 8.419695193434935e-05, "loss": 3.725330352783203, "step": 1026, "token_acc": 0.23529890468161507 }, { "epoch": 0.6021694517736734, "grad_norm": 2.7303569657284914, "learning_rate": 8.427901524032824e-05, "loss": 3.7342751026153564, "step": 1027, "token_acc": 0.23525420062992553 }, { "epoch": 0.6027557900908824, "grad_norm": 3.6093176522584356, "learning_rate": 8.436107854630714e-05, "loss": 3.72261381149292, "step": 1028, "token_acc": 0.23667605387035354 }, { "epoch": 0.6033421284080914, "grad_norm": 4.047757923738355, "learning_rate": 8.444314185228604e-05, "loss": 3.767302989959717, "step": 1029, "token_acc": 0.23157634761548143 }, { "epoch": 0.6039284667253005, "grad_norm": 2.3706404937674423, "learning_rate": 8.452520515826494e-05, "loss": 3.712876319885254, "step": 1030, "token_acc": 0.23685148705511302 }, { "epoch": 0.6045148050425095, "grad_norm": 5.332984072327762, "learning_rate": 8.460726846424384e-05, "loss": 3.7768425941467285, "step": 1031, "token_acc": 0.23090280051227924 }, { "epoch": 0.6051011433597185, "grad_norm": 2.998308173797864, "learning_rate": 8.468933177022273e-05, "loss": 3.7286295890808105, "step": 1032, "token_acc": 0.2367908193974169 }, { "epoch": 0.6056874816769275, "grad_norm": 3.6982111907045265, "learning_rate": 8.477139507620163e-05, "loss": 3.7446091175079346, "step": 1033, "token_acc": 0.23353397935003387 }, { "epoch": 0.6062738199941367, "grad_norm": 3.4246905874889504, "learning_rate": 8.485345838218053e-05, "loss": 3.7749390602111816, "step": 1034, "token_acc": 0.23203850390809422 }, { "epoch": 0.6068601583113457, "grad_norm": 3.168977790872297, "learning_rate": 8.493552168815943e-05, "loss": 3.6910877227783203, "step": 1035, "token_acc": 0.23968113780456454 }, { "epoch": 0.6074464966285547, "grad_norm": 3.19579919696364, "learning_rate": 8.501758499413834e-05, "loss": 3.7172834873199463, "step": 1036, "token_acc": 0.2358989617591376 }, { "epoch": 0.6080328349457637, "grad_norm": 2.9278837359029586, "learning_rate": 8.509964830011724e-05, "loss": 3.7255663871765137, "step": 1037, "token_acc": 0.23685890842445825 }, { "epoch": 0.6086191732629728, "grad_norm": 2.5079710963209836, "learning_rate": 8.518171160609613e-05, "loss": 3.6861560344696045, "step": 1038, "token_acc": 0.2407471372647533 }, { "epoch": 0.6092055115801818, "grad_norm": 3.4824479968247255, "learning_rate": 8.526377491207503e-05, "loss": 3.674631118774414, "step": 1039, "token_acc": 0.24154742835605927 }, { "epoch": 0.6097918498973908, "grad_norm": 2.168821207363241, "learning_rate": 8.534583821805392e-05, "loss": 3.6727075576782227, "step": 1040, "token_acc": 0.2419392059420181 }, { "epoch": 0.6103781882145998, "grad_norm": 4.029948416394446, "learning_rate": 8.542790152403281e-05, "loss": 3.7046327590942383, "step": 1041, "token_acc": 0.24004632424777983 }, { "epoch": 0.6109645265318089, "grad_norm": 3.0809701570578287, "learning_rate": 8.550996483001171e-05, "loss": 3.677265167236328, "step": 1042, "token_acc": 0.23810717867889383 }, { "epoch": 0.6115508648490179, "grad_norm": 4.350522018156594, "learning_rate": 8.559202813599061e-05, "loss": 3.7118568420410156, "step": 1043, "token_acc": 0.23546996590920866 }, { "epoch": 0.6121372031662269, "grad_norm": 2.1699610013671586, "learning_rate": 8.567409144196951e-05, "loss": 3.6330552101135254, "step": 1044, "token_acc": 0.2459863761070717 }, { "epoch": 0.612723541483436, "grad_norm": 3.9420042519746405, "learning_rate": 8.575615474794841e-05, "loss": 3.694444417953491, "step": 1045, "token_acc": 0.23935295915034402 }, { "epoch": 0.613309879800645, "grad_norm": 3.3507339284219313, "learning_rate": 8.58382180539273e-05, "loss": 3.69856595993042, "step": 1046, "token_acc": 0.2388438495083775 }, { "epoch": 0.613896218117854, "grad_norm": 3.3542631031758607, "learning_rate": 8.59202813599062e-05, "loss": 3.709279775619507, "step": 1047, "token_acc": 0.23611059995057807 }, { "epoch": 0.614482556435063, "grad_norm": 3.131451202344225, "learning_rate": 8.60023446658851e-05, "loss": 3.6492738723754883, "step": 1048, "token_acc": 0.24169178027171928 }, { "epoch": 0.6150688947522721, "grad_norm": 2.8347374075490013, "learning_rate": 8.6084407971864e-05, "loss": 3.6759862899780273, "step": 1049, "token_acc": 0.2393384545772403 }, { "epoch": 0.6156552330694811, "grad_norm": 2.6071825669225253, "learning_rate": 8.61664712778429e-05, "loss": 3.7384555339813232, "step": 1050, "token_acc": 0.23241476455475205 }, { "epoch": 0.6162415713866901, "grad_norm": 4.567171547211958, "learning_rate": 8.62485345838218e-05, "loss": 3.6436378955841064, "step": 1051, "token_acc": 0.2427529904259451 }, { "epoch": 0.6168279097038991, "grad_norm": 2.487937000712364, "learning_rate": 8.633059788980069e-05, "loss": 3.7416181564331055, "step": 1052, "token_acc": 0.23178649102429008 }, { "epoch": 0.6174142480211082, "grad_norm": 4.010611861245166, "learning_rate": 8.64126611957796e-05, "loss": 3.6738507747650146, "step": 1053, "token_acc": 0.23784842881652812 }, { "epoch": 0.6180005863383172, "grad_norm": 2.174947451428352, "learning_rate": 8.64947245017585e-05, "loss": 3.7118053436279297, "step": 1054, "token_acc": 0.23602486034292383 }, { "epoch": 0.6185869246555262, "grad_norm": 5.193925962315059, "learning_rate": 8.65767878077374e-05, "loss": 3.671924114227295, "step": 1055, "token_acc": 0.2392359741877525 }, { "epoch": 0.6191732629727352, "grad_norm": 2.8666862821282466, "learning_rate": 8.66588511137163e-05, "loss": 3.7156877517700195, "step": 1056, "token_acc": 0.23500763547814602 }, { "epoch": 0.6197596012899443, "grad_norm": 3.3663052715442903, "learning_rate": 8.67409144196952e-05, "loss": 3.6975743770599365, "step": 1057, "token_acc": 0.2385550200113011 }, { "epoch": 0.6203459396071533, "grad_norm": 3.189160920945845, "learning_rate": 8.682297772567408e-05, "loss": 3.6914165019989014, "step": 1058, "token_acc": 0.2393398475752544 }, { "epoch": 0.6209322779243623, "grad_norm": 4.869527161252376, "learning_rate": 8.690504103165298e-05, "loss": 3.6959986686706543, "step": 1059, "token_acc": 0.2385128211952255 }, { "epoch": 0.6215186162415713, "grad_norm": 2.4614450550082374, "learning_rate": 8.698710433763188e-05, "loss": 3.6999077796936035, "step": 1060, "token_acc": 0.23591083264844326 }, { "epoch": 0.6221049545587805, "grad_norm": 4.072375519133194, "learning_rate": 8.706916764361077e-05, "loss": 3.691514015197754, "step": 1061, "token_acc": 0.23823964664293198 }, { "epoch": 0.6226912928759895, "grad_norm": 3.625572014663007, "learning_rate": 8.715123094958967e-05, "loss": 3.700716257095337, "step": 1062, "token_acc": 0.2363547874296083 }, { "epoch": 0.6232776311931985, "grad_norm": 3.383984250362769, "learning_rate": 8.723329425556857e-05, "loss": 3.663546562194824, "step": 1063, "token_acc": 0.24092448517124596 }, { "epoch": 0.6238639695104075, "grad_norm": 4.082003104613677, "learning_rate": 8.731535756154747e-05, "loss": 3.667447090148926, "step": 1064, "token_acc": 0.24112965448722692 }, { "epoch": 0.6244503078276166, "grad_norm": 2.80398553725358, "learning_rate": 8.739742086752637e-05, "loss": 3.7277731895446777, "step": 1065, "token_acc": 0.2339683588444738 }, { "epoch": 0.6250366461448256, "grad_norm": 4.115443600583568, "learning_rate": 8.747948417350526e-05, "loss": 3.7254858016967773, "step": 1066, "token_acc": 0.23352642532377152 }, { "epoch": 0.6256229844620346, "grad_norm": 4.077202625103572, "learning_rate": 8.756154747948416e-05, "loss": 3.7083616256713867, "step": 1067, "token_acc": 0.23424778190723752 }, { "epoch": 0.6262093227792436, "grad_norm": 2.354941417764202, "learning_rate": 8.764361078546306e-05, "loss": 3.706695079803467, "step": 1068, "token_acc": 0.23444976076555024 }, { "epoch": 0.6267956610964527, "grad_norm": 3.93936008108172, "learning_rate": 8.772567409144196e-05, "loss": 3.596928834915161, "step": 1069, "token_acc": 0.24960757578491286 }, { "epoch": 0.6273819994136617, "grad_norm": 3.3987872552203573, "learning_rate": 8.780773739742086e-05, "loss": 3.699096441268921, "step": 1070, "token_acc": 0.2341964866623292 }, { "epoch": 0.6279683377308707, "grad_norm": 3.401667475513786, "learning_rate": 8.788980070339977e-05, "loss": 3.702594757080078, "step": 1071, "token_acc": 0.23767863710057546 }, { "epoch": 0.6285546760480798, "grad_norm": 2.4172271108047236, "learning_rate": 8.797186400937867e-05, "loss": 3.6958084106445312, "step": 1072, "token_acc": 0.2372658244104474 }, { "epoch": 0.6291410143652888, "grad_norm": 4.44721798079579, "learning_rate": 8.805392731535756e-05, "loss": 3.689333915710449, "step": 1073, "token_acc": 0.2372842050338763 }, { "epoch": 0.6297273526824978, "grad_norm": 2.557873645925693, "learning_rate": 8.813599062133646e-05, "loss": 3.706573009490967, "step": 1074, "token_acc": 0.2361011888406514 }, { "epoch": 0.6303136909997068, "grad_norm": 3.110943105471363, "learning_rate": 8.821805392731536e-05, "loss": 3.640707015991211, "step": 1075, "token_acc": 0.2426175805319398 }, { "epoch": 0.6309000293169159, "grad_norm": 2.7776847834192595, "learning_rate": 8.830011723329424e-05, "loss": 3.651592493057251, "step": 1076, "token_acc": 0.24182716049382716 }, { "epoch": 0.6314863676341249, "grad_norm": 3.2471767990688467, "learning_rate": 8.838218053927314e-05, "loss": 3.645282745361328, "step": 1077, "token_acc": 0.2406760135621621 }, { "epoch": 0.6320727059513339, "grad_norm": 2.153274338713322, "learning_rate": 8.846424384525204e-05, "loss": 3.669600486755371, "step": 1078, "token_acc": 0.2407547309852696 }, { "epoch": 0.6326590442685429, "grad_norm": 2.3869554506560204, "learning_rate": 8.854630715123094e-05, "loss": 3.6873092651367188, "step": 1079, "token_acc": 0.23399029643371635 }, { "epoch": 0.633245382585752, "grad_norm": 3.970641996102504, "learning_rate": 8.862837045720984e-05, "loss": 3.6670117378234863, "step": 1080, "token_acc": 0.23933887389433742 }, { "epoch": 0.633831720902961, "grad_norm": 2.564318014878147, "learning_rate": 8.871043376318873e-05, "loss": 3.6191978454589844, "step": 1081, "token_acc": 0.24724905611248535 }, { "epoch": 0.63441805922017, "grad_norm": 2.7879682733894557, "learning_rate": 8.879249706916763e-05, "loss": 3.645559310913086, "step": 1082, "token_acc": 0.24035654033661447 }, { "epoch": 0.635004397537379, "grad_norm": 2.9523661166247086, "learning_rate": 8.887456037514653e-05, "loss": 3.7270026206970215, "step": 1083, "token_acc": 0.23134017627606734 }, { "epoch": 0.6355907358545881, "grad_norm": 2.1803907468983414, "learning_rate": 8.895662368112543e-05, "loss": 3.6534385681152344, "step": 1084, "token_acc": 0.2413595957360481 }, { "epoch": 0.6361770741717971, "grad_norm": 3.888937263374872, "learning_rate": 8.903868698710433e-05, "loss": 3.686830997467041, "step": 1085, "token_acc": 0.2363456723286188 }, { "epoch": 0.6367634124890061, "grad_norm": 2.4595163704192484, "learning_rate": 8.912075029308322e-05, "loss": 3.6888790130615234, "step": 1086, "token_acc": 0.23675515204505942 }, { "epoch": 0.6373497508062151, "grad_norm": 3.422397678688959, "learning_rate": 8.920281359906212e-05, "loss": 3.66855525970459, "step": 1087, "token_acc": 0.23799390409926854 }, { "epoch": 0.6379360891234243, "grad_norm": 3.6720086876870677, "learning_rate": 8.928487690504103e-05, "loss": 3.646956443786621, "step": 1088, "token_acc": 0.24156942845656554 }, { "epoch": 0.6385224274406333, "grad_norm": 2.718551158889029, "learning_rate": 8.936694021101993e-05, "loss": 3.6580452919006348, "step": 1089, "token_acc": 0.24035392798690672 }, { "epoch": 0.6391087657578423, "grad_norm": 3.3331653852882344, "learning_rate": 8.944900351699883e-05, "loss": 3.6371679306030273, "step": 1090, "token_acc": 0.2429623213512343 }, { "epoch": 0.6396951040750513, "grad_norm": 3.066466128395858, "learning_rate": 8.953106682297773e-05, "loss": 3.6113815307617188, "step": 1091, "token_acc": 0.24449353758664338 }, { "epoch": 0.6402814423922604, "grad_norm": 2.8253532638045575, "learning_rate": 8.961313012895663e-05, "loss": 3.621619701385498, "step": 1092, "token_acc": 0.24361911414966894 }, { "epoch": 0.6408677807094694, "grad_norm": 2.9059126345825614, "learning_rate": 8.969519343493551e-05, "loss": 3.697326421737671, "step": 1093, "token_acc": 0.23558159300631373 }, { "epoch": 0.6414541190266784, "grad_norm": 3.1473309626173753, "learning_rate": 8.977725674091441e-05, "loss": 3.62583589553833, "step": 1094, "token_acc": 0.24409304778020194 }, { "epoch": 0.6420404573438874, "grad_norm": 3.8133130825219004, "learning_rate": 8.98593200468933e-05, "loss": 3.582160472869873, "step": 1095, "token_acc": 0.2478891140269089 }, { "epoch": 0.6426267956610965, "grad_norm": 4.464738856390558, "learning_rate": 8.99413833528722e-05, "loss": 3.617539882659912, "step": 1096, "token_acc": 0.24480830064532455 }, { "epoch": 0.6432131339783055, "grad_norm": 2.001651958570203, "learning_rate": 9.00234466588511e-05, "loss": 3.6067187786102295, "step": 1097, "token_acc": 0.2448461301077064 }, { "epoch": 0.6437994722955145, "grad_norm": 6.209760739375893, "learning_rate": 9.010550996483e-05, "loss": 3.6563830375671387, "step": 1098, "token_acc": 0.2384539066365522 }, { "epoch": 0.6443858106127235, "grad_norm": 3.657729168594278, "learning_rate": 9.01875732708089e-05, "loss": 3.690416097640991, "step": 1099, "token_acc": 0.2359796253713858 }, { "epoch": 0.6449721489299326, "grad_norm": 5.177879422812686, "learning_rate": 9.02696365767878e-05, "loss": 3.698824405670166, "step": 1100, "token_acc": 0.23293829803501798 }, { "epoch": 0.6455584872471416, "grad_norm": 2.837210865795638, "learning_rate": 9.03516998827667e-05, "loss": 3.690340995788574, "step": 1101, "token_acc": 0.23765873237674703 }, { "epoch": 0.6461448255643506, "grad_norm": 3.7143206955545582, "learning_rate": 9.043376318874559e-05, "loss": 3.6323437690734863, "step": 1102, "token_acc": 0.24088070982582976 }, { "epoch": 0.6467311638815597, "grad_norm": 3.5384857704439807, "learning_rate": 9.051582649472449e-05, "loss": 3.642756938934326, "step": 1103, "token_acc": 0.24209850968622398 }, { "epoch": 0.6473175021987687, "grad_norm": 2.7745112061890373, "learning_rate": 9.059788980070339e-05, "loss": 3.628681182861328, "step": 1104, "token_acc": 0.24424118976824286 }, { "epoch": 0.6479038405159777, "grad_norm": 3.2377103860365137, "learning_rate": 9.06799531066823e-05, "loss": 3.675654888153076, "step": 1105, "token_acc": 0.2371810456067046 }, { "epoch": 0.6484901788331867, "grad_norm": 2.912580618600177, "learning_rate": 9.07620164126612e-05, "loss": 3.6180553436279297, "step": 1106, "token_acc": 0.24411638399507998 }, { "epoch": 0.6490765171503958, "grad_norm": 2.6859285650232483, "learning_rate": 9.08440797186401e-05, "loss": 3.6335134506225586, "step": 1107, "token_acc": 0.2420051138109348 }, { "epoch": 0.6496628554676048, "grad_norm": 2.850486126272157, "learning_rate": 9.0926143024619e-05, "loss": 3.6664273738861084, "step": 1108, "token_acc": 0.23794996331992613 }, { "epoch": 0.6502491937848138, "grad_norm": 3.4743188561289826, "learning_rate": 9.100820633059789e-05, "loss": 3.5565309524536133, "step": 1109, "token_acc": 0.25057429285230354 }, { "epoch": 0.6508355321020228, "grad_norm": 2.6378148939634944, "learning_rate": 9.109026963657679e-05, "loss": 3.630723476409912, "step": 1110, "token_acc": 0.24331036587451296 }, { "epoch": 0.6514218704192319, "grad_norm": 3.9544118375263633, "learning_rate": 9.117233294255567e-05, "loss": 3.578401565551758, "step": 1111, "token_acc": 0.24591891714304487 }, { "epoch": 0.6520082087364409, "grad_norm": 2.3420689073134304, "learning_rate": 9.125439624853457e-05, "loss": 3.6231515407562256, "step": 1112, "token_acc": 0.24393922816012004 }, { "epoch": 0.6525945470536499, "grad_norm": 3.153669656250764, "learning_rate": 9.133645955451347e-05, "loss": 3.6007745265960693, "step": 1113, "token_acc": 0.2467528801991122 }, { "epoch": 0.6531808853708589, "grad_norm": 2.3708753999240626, "learning_rate": 9.141852286049237e-05, "loss": 3.6407856941223145, "step": 1114, "token_acc": 0.24141487229388625 }, { "epoch": 0.653767223688068, "grad_norm": 3.5101712701699626, "learning_rate": 9.150058616647127e-05, "loss": 3.63830828666687, "step": 1115, "token_acc": 0.24304251569816454 }, { "epoch": 0.6543535620052771, "grad_norm": 1.8514542885456984, "learning_rate": 9.158264947245016e-05, "loss": 3.625324249267578, "step": 1116, "token_acc": 0.24061530865721384 }, { "epoch": 0.6549399003224861, "grad_norm": 3.880143560761657, "learning_rate": 9.166471277842906e-05, "loss": 3.5791852474212646, "step": 1117, "token_acc": 0.24861210741352963 }, { "epoch": 0.6555262386396951, "grad_norm": 2.2666327395386414, "learning_rate": 9.174677608440796e-05, "loss": 3.6924595832824707, "step": 1118, "token_acc": 0.2344515594142895 }, { "epoch": 0.6561125769569042, "grad_norm": 4.079671924939505, "learning_rate": 9.182883939038686e-05, "loss": 3.633694648742676, "step": 1119, "token_acc": 0.24215553762617942 }, { "epoch": 0.6566989152741132, "grad_norm": 3.0725267431622543, "learning_rate": 9.191090269636576e-05, "loss": 3.635021924972534, "step": 1120, "token_acc": 0.24189268107183115 }, { "epoch": 0.6572852535913222, "grad_norm": 2.6338148154108776, "learning_rate": 9.199296600234465e-05, "loss": 3.640258312225342, "step": 1121, "token_acc": 0.24034322631710145 }, { "epoch": 0.6578715919085312, "grad_norm": 2.4462303048005842, "learning_rate": 9.207502930832355e-05, "loss": 3.671175718307495, "step": 1122, "token_acc": 0.23558122381079427 }, { "epoch": 0.6584579302257403, "grad_norm": 3.551281536640821, "learning_rate": 9.215709261430246e-05, "loss": 3.6547622680664062, "step": 1123, "token_acc": 0.23981787185155712 }, { "epoch": 0.6590442685429493, "grad_norm": 2.1500169923634846, "learning_rate": 9.223915592028136e-05, "loss": 3.6179089546203613, "step": 1124, "token_acc": 0.24364431801260097 }, { "epoch": 0.6596306068601583, "grad_norm": 2.911642161670254, "learning_rate": 9.232121922626026e-05, "loss": 3.5438332557678223, "step": 1125, "token_acc": 0.25195284723282996 }, { "epoch": 0.6602169451773673, "grad_norm": 2.915592239384677, "learning_rate": 9.240328253223916e-05, "loss": 3.6207621097564697, "step": 1126, "token_acc": 0.24005256078954773 }, { "epoch": 0.6608032834945764, "grad_norm": 2.662367597552409, "learning_rate": 9.248534583821806e-05, "loss": 3.612668752670288, "step": 1127, "token_acc": 0.24411036949244172 }, { "epoch": 0.6613896218117854, "grad_norm": 3.343971525513835, "learning_rate": 9.256740914419695e-05, "loss": 3.6547317504882812, "step": 1128, "token_acc": 0.23889939802239488 }, { "epoch": 0.6619759601289944, "grad_norm": 2.482227129861332, "learning_rate": 9.264947245017584e-05, "loss": 3.5920207500457764, "step": 1129, "token_acc": 0.2469671720250918 }, { "epoch": 0.6625622984462035, "grad_norm": 2.6387899521644766, "learning_rate": 9.273153575615474e-05, "loss": 3.5902719497680664, "step": 1130, "token_acc": 0.24591198136038445 }, { "epoch": 0.6631486367634125, "grad_norm": 2.1154082629265556, "learning_rate": 9.281359906213363e-05, "loss": 3.6242785453796387, "step": 1131, "token_acc": 0.23954727889839586 }, { "epoch": 0.6637349750806215, "grad_norm": 3.225756091203333, "learning_rate": 9.289566236811253e-05, "loss": 3.6327247619628906, "step": 1132, "token_acc": 0.24043628454452406 }, { "epoch": 0.6643213133978305, "grad_norm": 2.075184478783505, "learning_rate": 9.297772567409143e-05, "loss": 3.6500983238220215, "step": 1133, "token_acc": 0.23965786529897212 }, { "epoch": 0.6649076517150396, "grad_norm": 3.555137517946713, "learning_rate": 9.305978898007033e-05, "loss": 3.646602153778076, "step": 1134, "token_acc": 0.2397526466509015 }, { "epoch": 0.6654939900322486, "grad_norm": 2.2292875747747587, "learning_rate": 9.314185228604923e-05, "loss": 3.5895256996154785, "step": 1135, "token_acc": 0.2457210395750263 }, { "epoch": 0.6660803283494576, "grad_norm": 2.844636266034063, "learning_rate": 9.322391559202812e-05, "loss": 3.5618162155151367, "step": 1136, "token_acc": 0.25125059986080717 }, { "epoch": 0.6666666666666666, "grad_norm": 2.199168991504178, "learning_rate": 9.330597889800702e-05, "loss": 3.610119342803955, "step": 1137, "token_acc": 0.24221313259893504 }, { "epoch": 0.6672530049838757, "grad_norm": 3.1596247675696447, "learning_rate": 9.338804220398592e-05, "loss": 3.584787607192993, "step": 1138, "token_acc": 0.24636447265413924 }, { "epoch": 0.6678393433010847, "grad_norm": 3.7970734713321836, "learning_rate": 9.347010550996482e-05, "loss": 3.6107027530670166, "step": 1139, "token_acc": 0.24291383594160498 }, { "epoch": 0.6684256816182937, "grad_norm": 2.0825958033192054, "learning_rate": 9.355216881594373e-05, "loss": 3.598156690597534, "step": 1140, "token_acc": 0.24665940313834517 }, { "epoch": 0.6690120199355027, "grad_norm": 4.472428704872754, "learning_rate": 9.363423212192263e-05, "loss": 3.615342617034912, "step": 1141, "token_acc": 0.24178383796221145 }, { "epoch": 0.6695983582527119, "grad_norm": 2.202871165158378, "learning_rate": 9.371629542790152e-05, "loss": 3.5906100273132324, "step": 1142, "token_acc": 0.2459319891783013 }, { "epoch": 0.6701846965699209, "grad_norm": 4.179642944134784, "learning_rate": 9.379835873388042e-05, "loss": 3.5967178344726562, "step": 1143, "token_acc": 0.24476220145922725 }, { "epoch": 0.6707710348871299, "grad_norm": 2.7187991648401812, "learning_rate": 9.388042203985932e-05, "loss": 3.634884834289551, "step": 1144, "token_acc": 0.2402460088137016 }, { "epoch": 0.6713573732043389, "grad_norm": 3.5855757949469456, "learning_rate": 9.396248534583822e-05, "loss": 3.632540702819824, "step": 1145, "token_acc": 0.23987249039718053 }, { "epoch": 0.671943711521548, "grad_norm": 2.483847800891102, "learning_rate": 9.404454865181712e-05, "loss": 3.5583815574645996, "step": 1146, "token_acc": 0.2491700905568115 }, { "epoch": 0.672530049838757, "grad_norm": 2.595520530828991, "learning_rate": 9.4126611957796e-05, "loss": 3.5689821243286133, "step": 1147, "token_acc": 0.24784972327771354 }, { "epoch": 0.673116388155966, "grad_norm": 2.242352049434097, "learning_rate": 9.42086752637749e-05, "loss": 3.6121182441711426, "step": 1148, "token_acc": 0.24175938110619294 }, { "epoch": 0.673702726473175, "grad_norm": 2.653874761361489, "learning_rate": 9.42907385697538e-05, "loss": 3.594228982925415, "step": 1149, "token_acc": 0.24437320563911386 }, { "epoch": 0.6742890647903841, "grad_norm": 2.042469062284592, "learning_rate": 9.43728018757327e-05, "loss": 3.6022393703460693, "step": 1150, "token_acc": 0.2442453080843458 }, { "epoch": 0.6748754031075931, "grad_norm": 3.5747253738696525, "learning_rate": 9.445486518171159e-05, "loss": 3.5794405937194824, "step": 1151, "token_acc": 0.2472802502510639 }, { "epoch": 0.6754617414248021, "grad_norm": 2.7483289849776877, "learning_rate": 9.453692848769049e-05, "loss": 3.5764782428741455, "step": 1152, "token_acc": 0.24691186749977323 }, { "epoch": 0.6760480797420111, "grad_norm": 3.105169588977674, "learning_rate": 9.461899179366939e-05, "loss": 3.582529067993164, "step": 1153, "token_acc": 0.2441149865365197 }, { "epoch": 0.6766344180592202, "grad_norm": 2.1664284262066325, "learning_rate": 9.470105509964829e-05, "loss": 3.6183509826660156, "step": 1154, "token_acc": 0.24155947479989962 }, { "epoch": 0.6772207563764292, "grad_norm": 2.5547271519292507, "learning_rate": 9.478311840562719e-05, "loss": 3.5611495971679688, "step": 1155, "token_acc": 0.24853782659793192 }, { "epoch": 0.6778070946936382, "grad_norm": 2.7906522204595188, "learning_rate": 9.486518171160608e-05, "loss": 3.6035265922546387, "step": 1156, "token_acc": 0.24316116439111599 }, { "epoch": 0.6783934330108473, "grad_norm": 3.309156250841553, "learning_rate": 9.494724501758498e-05, "loss": 3.5210158824920654, "step": 1157, "token_acc": 0.25216078986013796 }, { "epoch": 0.6789797713280563, "grad_norm": 2.746292794685027, "learning_rate": 9.502930832356389e-05, "loss": 3.6111483573913574, "step": 1158, "token_acc": 0.24146687769403408 }, { "epoch": 0.6795661096452653, "grad_norm": 1.9844604159645325, "learning_rate": 9.511137162954279e-05, "loss": 3.569340944290161, "step": 1159, "token_acc": 0.24738920625534566 }, { "epoch": 0.6801524479624743, "grad_norm": 2.388014953048503, "learning_rate": 9.519343493552169e-05, "loss": 3.4791207313537598, "step": 1160, "token_acc": 0.2562385535545949 }, { "epoch": 0.6807387862796834, "grad_norm": 2.516515604662825, "learning_rate": 9.527549824150059e-05, "loss": 3.6110122203826904, "step": 1161, "token_acc": 0.24173699832236417 }, { "epoch": 0.6813251245968924, "grad_norm": 4.169876385631033, "learning_rate": 9.535756154747948e-05, "loss": 3.5419564247131348, "step": 1162, "token_acc": 0.250024398888438 }, { "epoch": 0.6819114629141014, "grad_norm": 2.2782455342120365, "learning_rate": 9.543962485345838e-05, "loss": 3.593749761581421, "step": 1163, "token_acc": 0.2437383265828629 }, { "epoch": 0.6824978012313104, "grad_norm": 3.1015729107772585, "learning_rate": 9.552168815943727e-05, "loss": 3.563033103942871, "step": 1164, "token_acc": 0.24950859883596782 }, { "epoch": 0.6830841395485195, "grad_norm": 3.009176898620129, "learning_rate": 9.560375146541616e-05, "loss": 3.5905706882476807, "step": 1165, "token_acc": 0.24329414523129306 }, { "epoch": 0.6836704778657285, "grad_norm": 4.217561168197373, "learning_rate": 9.568581477139506e-05, "loss": 3.584014415740967, "step": 1166, "token_acc": 0.24540054599970068 }, { "epoch": 0.6842568161829375, "grad_norm": 2.3586906505896343, "learning_rate": 9.576787807737396e-05, "loss": 3.6028177738189697, "step": 1167, "token_acc": 0.24410240685207032 }, { "epoch": 0.6848431545001465, "grad_norm": 2.9530525977202484, "learning_rate": 9.584994138335286e-05, "loss": 3.603588581085205, "step": 1168, "token_acc": 0.244469648096893 }, { "epoch": 0.6854294928173557, "grad_norm": 2.5844753375797853, "learning_rate": 9.593200468933176e-05, "loss": 3.5811610221862793, "step": 1169, "token_acc": 0.24398261644751504 }, { "epoch": 0.6860158311345647, "grad_norm": 1.6056034309786322, "learning_rate": 9.601406799531065e-05, "loss": 3.5396506786346436, "step": 1170, "token_acc": 0.2499470680885125 }, { "epoch": 0.6866021694517737, "grad_norm": 3.958588136251107, "learning_rate": 9.609613130128955e-05, "loss": 3.576620101928711, "step": 1171, "token_acc": 0.2452747775339676 }, { "epoch": 0.6871885077689827, "grad_norm": 2.023040570597517, "learning_rate": 9.617819460726845e-05, "loss": 3.5877130031585693, "step": 1172, "token_acc": 0.2438442288680178 }, { "epoch": 0.6877748460861918, "grad_norm": 3.7897590561604555, "learning_rate": 9.626025791324735e-05, "loss": 3.5919458866119385, "step": 1173, "token_acc": 0.242657824933687 }, { "epoch": 0.6883611844034008, "grad_norm": 3.462756709799863, "learning_rate": 9.634232121922625e-05, "loss": 3.609170436859131, "step": 1174, "token_acc": 0.24336571722595535 }, { "epoch": 0.6889475227206098, "grad_norm": 2.643834595176728, "learning_rate": 9.642438452520516e-05, "loss": 3.6074862480163574, "step": 1175, "token_acc": 0.24344418587962727 }, { "epoch": 0.6895338610378188, "grad_norm": 2.71792640940671, "learning_rate": 9.650644783118406e-05, "loss": 3.59952712059021, "step": 1176, "token_acc": 0.2432607021947541 }, { "epoch": 0.6901201993550279, "grad_norm": 3.2931582454606714, "learning_rate": 9.658851113716295e-05, "loss": 3.5765795707702637, "step": 1177, "token_acc": 0.24243539525224111 }, { "epoch": 0.6907065376722369, "grad_norm": 2.7547126705212945, "learning_rate": 9.667057444314185e-05, "loss": 3.6101436614990234, "step": 1178, "token_acc": 0.24072255509428783 }, { "epoch": 0.6912928759894459, "grad_norm": 2.0813176695133775, "learning_rate": 9.675263774912075e-05, "loss": 3.574465751647949, "step": 1179, "token_acc": 0.24785099153576673 }, { "epoch": 0.6918792143066549, "grad_norm": 3.9381562207803946, "learning_rate": 9.683470105509965e-05, "loss": 3.568744659423828, "step": 1180, "token_acc": 0.24604529245024231 }, { "epoch": 0.692465552623864, "grad_norm": 2.107174513486288, "learning_rate": 9.691676436107855e-05, "loss": 3.619206428527832, "step": 1181, "token_acc": 0.24190360623807056 }, { "epoch": 0.693051890941073, "grad_norm": 2.5158419415880204, "learning_rate": 9.699882766705743e-05, "loss": 3.510016441345215, "step": 1182, "token_acc": 0.2509512336330931 }, { "epoch": 0.693638229258282, "grad_norm": 2.223861925486757, "learning_rate": 9.708089097303633e-05, "loss": 3.5318374633789062, "step": 1183, "token_acc": 0.2515877221324717 }, { "epoch": 0.6942245675754911, "grad_norm": 2.7141995108551313, "learning_rate": 9.716295427901523e-05, "loss": 3.520838737487793, "step": 1184, "token_acc": 0.2508289437923046 }, { "epoch": 0.6948109058927001, "grad_norm": 2.6825105202463475, "learning_rate": 9.724501758499412e-05, "loss": 3.5538434982299805, "step": 1185, "token_acc": 0.24910541451988158 }, { "epoch": 0.6953972442099091, "grad_norm": 3.433787000203574, "learning_rate": 9.732708089097302e-05, "loss": 3.543549060821533, "step": 1186, "token_acc": 0.24789602971463331 }, { "epoch": 0.6959835825271181, "grad_norm": 2.4260343764528094, "learning_rate": 9.740914419695192e-05, "loss": 3.581066608428955, "step": 1187, "token_acc": 0.24430200878334418 }, { "epoch": 0.6965699208443272, "grad_norm": 2.4153502188031033, "learning_rate": 9.749120750293082e-05, "loss": 3.5370540618896484, "step": 1188, "token_acc": 0.24838737399121358 }, { "epoch": 0.6971562591615362, "grad_norm": 2.1093712765345507, "learning_rate": 9.757327080890972e-05, "loss": 3.5378026962280273, "step": 1189, "token_acc": 0.2484754570993364 }, { "epoch": 0.6977425974787452, "grad_norm": 2.5279875322057714, "learning_rate": 9.765533411488861e-05, "loss": 3.596057891845703, "step": 1190, "token_acc": 0.24359018913319522 }, { "epoch": 0.6983289357959542, "grad_norm": 2.6419982404441384, "learning_rate": 9.773739742086751e-05, "loss": 3.5669679641723633, "step": 1191, "token_acc": 0.24765664711568092 }, { "epoch": 0.6989152741131633, "grad_norm": 2.7353021714608814, "learning_rate": 9.781946072684641e-05, "loss": 3.5290098190307617, "step": 1192, "token_acc": 0.25008044809589414 }, { "epoch": 0.6995016124303723, "grad_norm": 3.274750141238885, "learning_rate": 9.790152403282532e-05, "loss": 3.564891815185547, "step": 1193, "token_acc": 0.2463891119012119 }, { "epoch": 0.7000879507475813, "grad_norm": 2.0040008550109776, "learning_rate": 9.798358733880422e-05, "loss": 3.515172243118286, "step": 1194, "token_acc": 0.2532676471607525 }, { "epoch": 0.7006742890647903, "grad_norm": 2.4645147825159803, "learning_rate": 9.806565064478312e-05, "loss": 3.580850124359131, "step": 1195, "token_acc": 0.24458582523602967 }, { "epoch": 0.7012606273819995, "grad_norm": 2.0921528252891073, "learning_rate": 9.814771395076202e-05, "loss": 3.540644884109497, "step": 1196, "token_acc": 0.24851149339741 }, { "epoch": 0.7018469656992085, "grad_norm": 4.074097228077286, "learning_rate": 9.822977725674091e-05, "loss": 3.541962146759033, "step": 1197, "token_acc": 0.2475354743317081 }, { "epoch": 0.7024333040164175, "grad_norm": 1.8856338237330241, "learning_rate": 9.831184056271981e-05, "loss": 3.609485626220703, "step": 1198, "token_acc": 0.24053244661613835 }, { "epoch": 0.7030196423336265, "grad_norm": 3.806516590019726, "learning_rate": 9.839390386869871e-05, "loss": 3.6008718013763428, "step": 1199, "token_acc": 0.2434892567240831 }, { "epoch": 0.7036059806508356, "grad_norm": 2.70317909706124, "learning_rate": 9.84759671746776e-05, "loss": 3.606748104095459, "step": 1200, "token_acc": 0.24390290750639343 }, { "epoch": 0.7041923189680446, "grad_norm": 2.4246448090661925, "learning_rate": 9.855803048065649e-05, "loss": 3.601593494415283, "step": 1201, "token_acc": 0.24274322773434961 }, { "epoch": 0.7047786572852536, "grad_norm": 2.052612131005801, "learning_rate": 9.864009378663539e-05, "loss": 3.578190326690674, "step": 1202, "token_acc": 0.24568551631069474 }, { "epoch": 0.7053649956024626, "grad_norm": 3.038200640478679, "learning_rate": 9.872215709261429e-05, "loss": 3.6017799377441406, "step": 1203, "token_acc": 0.24196362810809668 }, { "epoch": 0.7059513339196717, "grad_norm": 2.365673862777938, "learning_rate": 9.880422039859319e-05, "loss": 3.5267391204833984, "step": 1204, "token_acc": 0.2492665991304715 }, { "epoch": 0.7065376722368807, "grad_norm": 2.8864967649728133, "learning_rate": 9.888628370457208e-05, "loss": 3.5487489700317383, "step": 1205, "token_acc": 0.24962579640712307 }, { "epoch": 0.7071240105540897, "grad_norm": 2.481756996527984, "learning_rate": 9.896834701055098e-05, "loss": 3.571739673614502, "step": 1206, "token_acc": 0.24509623120077187 }, { "epoch": 0.7077103488712987, "grad_norm": 2.738989124914833, "learning_rate": 9.905041031652988e-05, "loss": 3.5909862518310547, "step": 1207, "token_acc": 0.2429637688360732 }, { "epoch": 0.7082966871885078, "grad_norm": 2.751069773899159, "learning_rate": 9.913247362250878e-05, "loss": 3.5597996711730957, "step": 1208, "token_acc": 0.24779391355807048 }, { "epoch": 0.7088830255057168, "grad_norm": 2.2558497451786232, "learning_rate": 9.921453692848768e-05, "loss": 3.5285754203796387, "step": 1209, "token_acc": 0.25005577131731854 }, { "epoch": 0.7094693638229258, "grad_norm": 1.6738495355304766, "learning_rate": 9.929660023446659e-05, "loss": 3.5502572059631348, "step": 1210, "token_acc": 0.2475436245892749 }, { "epoch": 0.7100557021401348, "grad_norm": 3.750214077876057, "learning_rate": 9.937866354044549e-05, "loss": 3.5298142433166504, "step": 1211, "token_acc": 0.25007025761124124 }, { "epoch": 0.7106420404573439, "grad_norm": 1.7417864911260688, "learning_rate": 9.946072684642438e-05, "loss": 3.5526413917541504, "step": 1212, "token_acc": 0.24815282722713525 }, { "epoch": 0.7112283787745529, "grad_norm": 3.0864405750735724, "learning_rate": 9.954279015240328e-05, "loss": 3.5368363857269287, "step": 1213, "token_acc": 0.24839768682345328 }, { "epoch": 0.7118147170917619, "grad_norm": 1.9335035028660956, "learning_rate": 9.962485345838218e-05, "loss": 3.5323128700256348, "step": 1214, "token_acc": 0.24854425712614744 }, { "epoch": 0.712401055408971, "grad_norm": 2.188916205146948, "learning_rate": 9.970691676436108e-05, "loss": 3.5288867950439453, "step": 1215, "token_acc": 0.25078088536243853 }, { "epoch": 0.71298739372618, "grad_norm": 1.6033803284467862, "learning_rate": 9.978898007033998e-05, "loss": 3.5163066387176514, "step": 1216, "token_acc": 0.25171774399877617 }, { "epoch": 0.713573732043389, "grad_norm": 2.6779715310208396, "learning_rate": 9.987104337631887e-05, "loss": 3.5640363693237305, "step": 1217, "token_acc": 0.24574018157603114 }, { "epoch": 0.714160070360598, "grad_norm": 3.5850998682601025, "learning_rate": 9.995310668229776e-05, "loss": 3.5692007541656494, "step": 1218, "token_acc": 0.24389504019414532 }, { "epoch": 0.7147464086778071, "grad_norm": 1.859973620386237, "learning_rate": 0.00010003516998827666, "loss": 3.522157669067383, "step": 1219, "token_acc": 0.250383317604168 }, { "epoch": 0.7153327469950161, "grad_norm": 3.590657150733063, "learning_rate": 0.00010011723329425555, "loss": 3.574082374572754, "step": 1220, "token_acc": 0.24495054157438034 }, { "epoch": 0.7159190853122251, "grad_norm": 2.3371002353368784, "learning_rate": 0.00010019929660023445, "loss": 3.554076671600342, "step": 1221, "token_acc": 0.2478048411878794 }, { "epoch": 0.7165054236294341, "grad_norm": 3.037711524732583, "learning_rate": 0.00010028135990621335, "loss": 3.5288844108581543, "step": 1222, "token_acc": 0.2503178489245674 }, { "epoch": 0.7170917619466433, "grad_norm": 2.8460803608924876, "learning_rate": 0.00010036342321219225, "loss": 3.5708634853363037, "step": 1223, "token_acc": 0.24549793853317206 }, { "epoch": 0.7176781002638523, "grad_norm": 2.2697022290538014, "learning_rate": 0.00010044548651817115, "loss": 3.488515853881836, "step": 1224, "token_acc": 0.25668942657576643 }, { "epoch": 0.7182644385810613, "grad_norm": 3.241206908521797, "learning_rate": 0.00010052754982415004, "loss": 3.5712051391601562, "step": 1225, "token_acc": 0.2463248381610347 }, { "epoch": 0.7188507768982703, "grad_norm": 2.104504358295846, "learning_rate": 0.00010060961313012894, "loss": 3.576154947280884, "step": 1226, "token_acc": 0.24413529456441935 }, { "epoch": 0.7194371152154794, "grad_norm": 2.314494942554422, "learning_rate": 0.00010069167643610784, "loss": 3.5392565727233887, "step": 1227, "token_acc": 0.2488117619189226 }, { "epoch": 0.7200234535326884, "grad_norm": 2.6589998889116955, "learning_rate": 0.00010077373974208675, "loss": 3.5736331939697266, "step": 1228, "token_acc": 0.24295954142088783 }, { "epoch": 0.7206097918498974, "grad_norm": 2.2610709655514136, "learning_rate": 0.00010085580304806565, "loss": 3.4810173511505127, "step": 1229, "token_acc": 0.2567681452246103 }, { "epoch": 0.7211961301671064, "grad_norm": 2.671917040299621, "learning_rate": 0.00010093786635404455, "loss": 3.540220260620117, "step": 1230, "token_acc": 0.24977325624471464 }, { "epoch": 0.7217824684843155, "grad_norm": 1.9517242286861856, "learning_rate": 0.00010101992966002345, "loss": 3.545020341873169, "step": 1231, "token_acc": 0.24789976231456437 }, { "epoch": 0.7223688068015245, "grad_norm": 2.845469277029176, "learning_rate": 0.00010110199296600234, "loss": 3.535029649734497, "step": 1232, "token_acc": 0.2502838728670108 }, { "epoch": 0.7229551451187335, "grad_norm": 2.592958351738877, "learning_rate": 0.00010118405627198124, "loss": 3.5664520263671875, "step": 1233, "token_acc": 0.24465025906735752 }, { "epoch": 0.7235414834359425, "grad_norm": 2.9008662370789438, "learning_rate": 0.00010126611957796014, "loss": 3.492316246032715, "step": 1234, "token_acc": 0.25446014527447575 }, { "epoch": 0.7241278217531516, "grad_norm": 1.8629647765058137, "learning_rate": 0.00010134818288393904, "loss": 3.496278762817383, "step": 1235, "token_acc": 0.25511106742677414 }, { "epoch": 0.7247141600703606, "grad_norm": 2.959362238274771, "learning_rate": 0.00010143024618991792, "loss": 3.5446105003356934, "step": 1236, "token_acc": 0.24830599630218944 }, { "epoch": 0.7253004983875696, "grad_norm": 2.527224324267823, "learning_rate": 0.00010151230949589682, "loss": 3.5205023288726807, "step": 1237, "token_acc": 0.25118066852075355 }, { "epoch": 0.7258868367047786, "grad_norm": 2.0121949201708347, "learning_rate": 0.00010159437280187572, "loss": 3.5169951915740967, "step": 1238, "token_acc": 0.24935938748678818 }, { "epoch": 0.7264731750219877, "grad_norm": 2.0104404347405613, "learning_rate": 0.00010167643610785462, "loss": 3.5548434257507324, "step": 1239, "token_acc": 0.24538883708051543 }, { "epoch": 0.7270595133391967, "grad_norm": 2.7040633366424096, "learning_rate": 0.00010175849941383351, "loss": 3.4885833263397217, "step": 1240, "token_acc": 0.2543689936249167 }, { "epoch": 0.7276458516564057, "grad_norm": 2.0061427786516686, "learning_rate": 0.00010184056271981241, "loss": 3.511047124862671, "step": 1241, "token_acc": 0.2537486138566125 }, { "epoch": 0.7282321899736148, "grad_norm": 2.536271553883254, "learning_rate": 0.00010192262602579131, "loss": 3.493194818496704, "step": 1242, "token_acc": 0.2547939780250363 }, { "epoch": 0.7288185282908238, "grad_norm": 1.8317891904897208, "learning_rate": 0.00010200468933177021, "loss": 3.5765206813812256, "step": 1243, "token_acc": 0.2457818989766784 }, { "epoch": 0.7294048666080328, "grad_norm": 2.38526486413872, "learning_rate": 0.0001020867526377491, "loss": 3.48268985748291, "step": 1244, "token_acc": 0.2563839294679975 }, { "epoch": 0.7299912049252418, "grad_norm": 2.5772036161189735, "learning_rate": 0.00010216881594372802, "loss": 3.5784783363342285, "step": 1245, "token_acc": 0.24209169017860563 }, { "epoch": 0.7305775432424509, "grad_norm": 2.6043678173199907, "learning_rate": 0.00010225087924970692, "loss": 3.5291495323181152, "step": 1246, "token_acc": 0.2499946937216114 }, { "epoch": 0.73116388155966, "grad_norm": 3.219606353142494, "learning_rate": 0.00010233294255568581, "loss": 3.5300350189208984, "step": 1247, "token_acc": 0.24908366150415862 }, { "epoch": 0.731750219876869, "grad_norm": 2.1215338027371056, "learning_rate": 0.00010241500586166471, "loss": 3.512089252471924, "step": 1248, "token_acc": 0.25182376873553713 }, { "epoch": 0.732336558194078, "grad_norm": 2.446167725847885, "learning_rate": 0.00010249706916764361, "loss": 3.5400424003601074, "step": 1249, "token_acc": 0.24788967674290488 }, { "epoch": 0.7329228965112871, "grad_norm": 2.809960705904554, "learning_rate": 0.00010257913247362251, "loss": 3.5229735374450684, "step": 1250, "token_acc": 0.25057285909356913 }, { "epoch": 0.7335092348284961, "grad_norm": 2.557896557668373, "learning_rate": 0.0001026611957796014, "loss": 3.512618064880371, "step": 1251, "token_acc": 0.2539301181279022 }, { "epoch": 0.7340955731457051, "grad_norm": 2.455850532467235, "learning_rate": 0.0001027432590855803, "loss": 3.4953341484069824, "step": 1252, "token_acc": 0.2543974456328771 }, { "epoch": 0.7346819114629141, "grad_norm": 2.979672897445438, "learning_rate": 0.00010282532239155919, "loss": 3.5181024074554443, "step": 1253, "token_acc": 0.25044222554294737 }, { "epoch": 0.7352682497801232, "grad_norm": 1.5263870848523355, "learning_rate": 0.00010290738569753809, "loss": 3.5820112228393555, "step": 1254, "token_acc": 0.24260758260436963 }, { "epoch": 0.7358545880973322, "grad_norm": 3.5919981618115844, "learning_rate": 0.00010298944900351698, "loss": 3.552605152130127, "step": 1255, "token_acc": 0.24636127520880016 }, { "epoch": 0.7364409264145412, "grad_norm": 1.6240463326800108, "learning_rate": 0.00010307151230949588, "loss": 3.510108470916748, "step": 1256, "token_acc": 0.2505355545849712 }, { "epoch": 0.7370272647317502, "grad_norm": 3.6007783652019163, "learning_rate": 0.00010315357561547478, "loss": 3.64194393157959, "step": 1257, "token_acc": 0.23846942527069082 }, { "epoch": 0.7376136030489593, "grad_norm": 2.282954206775683, "learning_rate": 0.00010323563892145368, "loss": 3.595787525177002, "step": 1258, "token_acc": 0.24153297223824038 }, { "epoch": 0.7381999413661683, "grad_norm": 2.5223354393119553, "learning_rate": 0.00010331770222743258, "loss": 3.5291693210601807, "step": 1259, "token_acc": 0.248165227976422 }, { "epoch": 0.7387862796833773, "grad_norm": 2.1372816804294863, "learning_rate": 0.00010339976553341147, "loss": 3.5437755584716797, "step": 1260, "token_acc": 0.2475048028426746 }, { "epoch": 0.7393726180005863, "grad_norm": 1.8932817110190208, "learning_rate": 0.00010348182883939037, "loss": 3.5661864280700684, "step": 1261, "token_acc": 0.24571944221632627 }, { "epoch": 0.7399589563177954, "grad_norm": 2.391256696665423, "learning_rate": 0.00010356389214536927, "loss": 3.504255771636963, "step": 1262, "token_acc": 0.25250944679662424 }, { "epoch": 0.7405452946350044, "grad_norm": 2.685601519172893, "learning_rate": 0.00010364595545134818, "loss": 3.5313851833343506, "step": 1263, "token_acc": 0.2481709251006596 }, { "epoch": 0.7411316329522134, "grad_norm": 2.2388382671648754, "learning_rate": 0.00010372801875732708, "loss": 3.486743450164795, "step": 1264, "token_acc": 0.25470351116615525 }, { "epoch": 0.7417179712694224, "grad_norm": 1.9576954399106532, "learning_rate": 0.00010381008206330598, "loss": 3.574458599090576, "step": 1265, "token_acc": 0.24379003402082675 }, { "epoch": 0.7423043095866315, "grad_norm": 2.6172956782483006, "learning_rate": 0.00010389214536928487, "loss": 3.4823784828186035, "step": 1266, "token_acc": 0.2542963539779342 }, { "epoch": 0.7428906479038405, "grad_norm": 2.192499468194125, "learning_rate": 0.00010397420867526377, "loss": 3.4702978134155273, "step": 1267, "token_acc": 0.256539926007245 }, { "epoch": 0.7434769862210495, "grad_norm": 1.9839557932065688, "learning_rate": 0.00010405627198124267, "loss": 3.4872279167175293, "step": 1268, "token_acc": 0.25513607184506226 }, { "epoch": 0.7440633245382586, "grad_norm": 1.7868150723998975, "learning_rate": 0.00010413833528722157, "loss": 3.53373646736145, "step": 1269, "token_acc": 0.24947466697518492 }, { "epoch": 0.7446496628554676, "grad_norm": 2.6476805529637137, "learning_rate": 0.00010422039859320047, "loss": 3.5297203063964844, "step": 1270, "token_acc": 0.24858181780590738 }, { "epoch": 0.7452360011726766, "grad_norm": 2.361338562750735, "learning_rate": 0.00010430246189917935, "loss": 3.4865264892578125, "step": 1271, "token_acc": 0.2543234734315481 }, { "epoch": 0.7458223394898856, "grad_norm": 2.265009490380844, "learning_rate": 0.00010438452520515825, "loss": 3.482454299926758, "step": 1272, "token_acc": 0.25472569876367696 }, { "epoch": 0.7464086778070947, "grad_norm": 1.6720529335710348, "learning_rate": 0.00010446658851113715, "loss": 3.4993371963500977, "step": 1273, "token_acc": 0.2512804262247779 }, { "epoch": 0.7469950161243037, "grad_norm": 1.972450313656642, "learning_rate": 0.00010454865181711605, "loss": 3.448000431060791, "step": 1274, "token_acc": 0.2572244917744068 }, { "epoch": 0.7475813544415127, "grad_norm": 2.3841494510760484, "learning_rate": 0.00010463071512309494, "loss": 3.489889144897461, "step": 1275, "token_acc": 0.2525078088629619 }, { "epoch": 0.7481676927587217, "grad_norm": 2.9279797394009432, "learning_rate": 0.00010471277842907384, "loss": 3.549703359603882, "step": 1276, "token_acc": 0.24451048547155743 }, { "epoch": 0.7487540310759309, "grad_norm": 2.019867216389879, "learning_rate": 0.00010479484173505274, "loss": 3.46688175201416, "step": 1277, "token_acc": 0.2562691099311121 }, { "epoch": 0.7493403693931399, "grad_norm": 2.730985763023011, "learning_rate": 0.00010487690504103164, "loss": 3.519805431365967, "step": 1278, "token_acc": 0.2495800415377097 }, { "epoch": 0.7499267077103489, "grad_norm": 2.082638619791961, "learning_rate": 0.00010495896834701054, "loss": 3.5153207778930664, "step": 1279, "token_acc": 0.249756050579269 }, { "epoch": 0.7505130460275579, "grad_norm": 2.8405793172197704, "learning_rate": 0.00010504103165298945, "loss": 3.492912769317627, "step": 1280, "token_acc": 0.25220687313662454 }, { "epoch": 0.751099384344767, "grad_norm": 1.730288990487118, "learning_rate": 0.00010512309495896834, "loss": 3.481194496154785, "step": 1281, "token_acc": 0.25366853717801674 }, { "epoch": 0.751685722661976, "grad_norm": 3.2119553012188846, "learning_rate": 0.00010520515826494724, "loss": 3.449909210205078, "step": 1282, "token_acc": 0.2580981878989935 }, { "epoch": 0.752272060979185, "grad_norm": 2.742712747612491, "learning_rate": 0.00010528722157092614, "loss": 3.5121009349823, "step": 1283, "token_acc": 0.2505381258869112 }, { "epoch": 0.752858399296394, "grad_norm": 1.4545011188575085, "learning_rate": 0.00010536928487690504, "loss": 3.5439231395721436, "step": 1284, "token_acc": 0.245371196083449 }, { "epoch": 0.7534447376136031, "grad_norm": 2.41673697456245, "learning_rate": 0.00010545134818288394, "loss": 3.5207419395446777, "step": 1285, "token_acc": 0.24917407613587292 }, { "epoch": 0.7540310759308121, "grad_norm": 1.9507894036337186, "learning_rate": 0.00010553341148886283, "loss": 3.4737367630004883, "step": 1286, "token_acc": 0.25532937930478916 }, { "epoch": 0.7546174142480211, "grad_norm": 2.9967319340906027, "learning_rate": 0.00010561547479484173, "loss": 3.564824104309082, "step": 1287, "token_acc": 0.24293510864775616 }, { "epoch": 0.7552037525652301, "grad_norm": 2.300008195888897, "learning_rate": 0.00010569753810082063, "loss": 3.5064244270324707, "step": 1288, "token_acc": 0.25193001798112635 }, { "epoch": 0.7557900908824392, "grad_norm": 2.3517556004978397, "learning_rate": 0.00010577960140679951, "loss": 3.590050220489502, "step": 1289, "token_acc": 0.24096708928711602 }, { "epoch": 0.7563764291996482, "grad_norm": 2.6320760163602963, "learning_rate": 0.00010586166471277841, "loss": 3.470578670501709, "step": 1290, "token_acc": 0.2527637593075482 }, { "epoch": 0.7569627675168572, "grad_norm": 2.0623161093793674, "learning_rate": 0.00010594372801875731, "loss": 3.515133857727051, "step": 1291, "token_acc": 0.25027538689158396 }, { "epoch": 0.7575491058340662, "grad_norm": 2.5105947765665224, "learning_rate": 0.00010602579132473621, "loss": 3.5093860626220703, "step": 1292, "token_acc": 0.2482950010882972 }, { "epoch": 0.7581354441512753, "grad_norm": 1.967381747787795, "learning_rate": 0.00010610785463071511, "loss": 3.512925624847412, "step": 1293, "token_acc": 0.25259228865269817 }, { "epoch": 0.7587217824684843, "grad_norm": 2.9659572022441885, "learning_rate": 0.000106189917936694, "loss": 3.5080997943878174, "step": 1294, "token_acc": 0.24911178235243958 }, { "epoch": 0.7593081207856933, "grad_norm": 1.6005001325087476, "learning_rate": 0.0001062719812426729, "loss": 3.5016462802886963, "step": 1295, "token_acc": 0.251606945532458 }, { "epoch": 0.7598944591029023, "grad_norm": 2.6865929254540344, "learning_rate": 0.0001063540445486518, "loss": 3.4618418216705322, "step": 1296, "token_acc": 0.2538365834173684 }, { "epoch": 0.7604807974201114, "grad_norm": 2.88349206228666, "learning_rate": 0.00010643610785463071, "loss": 3.511580467224121, "step": 1297, "token_acc": 0.2507736209649972 }, { "epoch": 0.7610671357373204, "grad_norm": 2.3547615757394693, "learning_rate": 0.00010651817116060961, "loss": 3.526371479034424, "step": 1298, "token_acc": 0.24630399750480844 }, { "epoch": 0.7616534740545294, "grad_norm": 2.0520291843334606, "learning_rate": 0.00010660023446658851, "loss": 3.4733638763427734, "step": 1299, "token_acc": 0.25465289876473635 }, { "epoch": 0.7622398123717385, "grad_norm": 2.245402735569946, "learning_rate": 0.0001066822977725674, "loss": 3.4583024978637695, "step": 1300, "token_acc": 0.25607082760856476 }, { "epoch": 0.7628261506889475, "grad_norm": 3.0051181804502227, "learning_rate": 0.0001067643610785463, "loss": 3.5066702365875244, "step": 1301, "token_acc": 0.25065920915185136 }, { "epoch": 0.7634124890061565, "grad_norm": 1.2258156928439565, "learning_rate": 0.0001068464243845252, "loss": 3.463474750518799, "step": 1302, "token_acc": 0.2557656844363171 }, { "epoch": 0.7639988273233655, "grad_norm": 5.055052158392281, "learning_rate": 0.0001069284876905041, "loss": 3.4673821926116943, "step": 1303, "token_acc": 0.2538427733245611 }, { "epoch": 0.7645851656405747, "grad_norm": 3.14728517943582, "learning_rate": 0.000107010550996483, "loss": 3.562363624572754, "step": 1304, "token_acc": 0.24510205244858516 }, { "epoch": 0.7651715039577837, "grad_norm": 3.8605077443426206, "learning_rate": 0.0001070926143024619, "loss": 3.5847511291503906, "step": 1305, "token_acc": 0.24290127884575974 }, { "epoch": 0.7657578422749927, "grad_norm": 2.6920499855816162, "learning_rate": 0.0001071746776084408, "loss": 3.5618789196014404, "step": 1306, "token_acc": 0.2441602246625102 }, { "epoch": 0.7663441805922017, "grad_norm": 2.662048176619308, "learning_rate": 0.00010725674091441968, "loss": 3.5402517318725586, "step": 1307, "token_acc": 0.24658553926790266 }, { "epoch": 0.7669305189094108, "grad_norm": 1.8288703937541881, "learning_rate": 0.00010733880422039858, "loss": 3.4651176929473877, "step": 1308, "token_acc": 0.25594564882115206 }, { "epoch": 0.7675168572266198, "grad_norm": 2.242627630469795, "learning_rate": 0.00010742086752637747, "loss": 3.4793124198913574, "step": 1309, "token_acc": 0.25246749472596547 }, { "epoch": 0.7681031955438288, "grad_norm": 2.4204574822065843, "learning_rate": 0.00010750293083235637, "loss": 3.460197925567627, "step": 1310, "token_acc": 0.25845337102998533 }, { "epoch": 0.7686895338610378, "grad_norm": 1.83129764013251, "learning_rate": 0.00010758499413833527, "loss": 3.5346317291259766, "step": 1311, "token_acc": 0.2473667714075756 }, { "epoch": 0.7692758721782469, "grad_norm": 2.851110336501319, "learning_rate": 0.00010766705744431417, "loss": 3.516423225402832, "step": 1312, "token_acc": 0.2480323152873366 }, { "epoch": 0.7698622104954559, "grad_norm": 2.2898225486202115, "learning_rate": 0.00010774912075029307, "loss": 3.529078245162964, "step": 1313, "token_acc": 0.2473062822840706 }, { "epoch": 0.7704485488126649, "grad_norm": 3.1516583731822956, "learning_rate": 0.00010783118405627196, "loss": 3.5145180225372314, "step": 1314, "token_acc": 0.25032044722584257 }, { "epoch": 0.7710348871298739, "grad_norm": 1.6654665695307302, "learning_rate": 0.00010791324736225088, "loss": 3.485283851623535, "step": 1315, "token_acc": 0.25288256284238286 }, { "epoch": 0.771621225447083, "grad_norm": 3.0481150432100215, "learning_rate": 0.00010799531066822977, "loss": 3.5010576248168945, "step": 1316, "token_acc": 0.25212017007830106 }, { "epoch": 0.772207563764292, "grad_norm": 1.5440237008993152, "learning_rate": 0.00010807737397420867, "loss": 3.4440574645996094, "step": 1317, "token_acc": 0.25964460090945285 }, { "epoch": 0.772793902081501, "grad_norm": 3.1774850389882334, "learning_rate": 0.00010815943728018757, "loss": 3.516622304916382, "step": 1318, "token_acc": 0.24847255943011126 }, { "epoch": 0.77338024039871, "grad_norm": 2.0792547371686556, "learning_rate": 0.00010824150058616647, "loss": 3.536007881164551, "step": 1319, "token_acc": 0.2448501407950348 }, { "epoch": 0.7739665787159191, "grad_norm": 2.0371222955932624, "learning_rate": 0.00010832356389214537, "loss": 3.487272262573242, "step": 1320, "token_acc": 0.2544468735887014 }, { "epoch": 0.7745529170331281, "grad_norm": 2.546753999628975, "learning_rate": 0.00010840562719812426, "loss": 3.4719176292419434, "step": 1321, "token_acc": 0.25392249930669625 }, { "epoch": 0.7751392553503371, "grad_norm": 1.5637097394076207, "learning_rate": 0.00010848769050410316, "loss": 3.514017105102539, "step": 1322, "token_acc": 0.25017965163670686 }, { "epoch": 0.7757255936675461, "grad_norm": 3.269707113769341, "learning_rate": 0.00010856975381008206, "loss": 3.516347885131836, "step": 1323, "token_acc": 0.2499133973001952 }, { "epoch": 0.7763119319847552, "grad_norm": 1.9912346894159882, "learning_rate": 0.00010865181711606094, "loss": 3.504991054534912, "step": 1324, "token_acc": 0.2508131817151413 }, { "epoch": 0.7768982703019642, "grad_norm": 2.879831577433987, "learning_rate": 0.00010873388042203984, "loss": 3.4906930923461914, "step": 1325, "token_acc": 0.2510815590951514 }, { "epoch": 0.7774846086191732, "grad_norm": 2.212997090047063, "learning_rate": 0.00010881594372801874, "loss": 3.5146164894104004, "step": 1326, "token_acc": 0.25096581792162925 }, { "epoch": 0.7780709469363823, "grad_norm": 3.2422513766347083, "learning_rate": 0.00010889800703399764, "loss": 3.5304179191589355, "step": 1327, "token_acc": 0.24632720651299037 }, { "epoch": 0.7786572852535913, "grad_norm": 1.5576904445351991, "learning_rate": 0.00010898007033997654, "loss": 3.4751577377319336, "step": 1328, "token_acc": 0.25465443905260954 }, { "epoch": 0.7792436235708003, "grad_norm": 3.757390382238241, "learning_rate": 0.00010906213364595543, "loss": 3.4894869327545166, "step": 1329, "token_acc": 0.2518094614367059 }, { "epoch": 0.7798299618880093, "grad_norm": 2.5143653018251144, "learning_rate": 0.00010914419695193433, "loss": 3.5633764266967773, "step": 1330, "token_acc": 0.24517024313638253 }, { "epoch": 0.7804163002052185, "grad_norm": 3.4313682505203853, "learning_rate": 0.00010922626025791323, "loss": 3.4894797801971436, "step": 1331, "token_acc": 0.2524260865448846 }, { "epoch": 0.7810026385224275, "grad_norm": 2.8838704178580414, "learning_rate": 0.00010930832356389214, "loss": 3.5439133644104004, "step": 1332, "token_acc": 0.24663373669879737 }, { "epoch": 0.7815889768396365, "grad_norm": 2.168133642666341, "learning_rate": 0.00010939038686987104, "loss": 3.531949996948242, "step": 1333, "token_acc": 0.24533899388762437 }, { "epoch": 0.7821753151568455, "grad_norm": 2.1749043186467176, "learning_rate": 0.00010947245017584994, "loss": 3.56597638130188, "step": 1334, "token_acc": 0.2438011519423874 }, { "epoch": 0.7827616534740546, "grad_norm": 1.7071748806849016, "learning_rate": 0.00010955451348182884, "loss": 3.503934860229492, "step": 1335, "token_acc": 0.25062254652816973 }, { "epoch": 0.7833479917912636, "grad_norm": 3.3967387622971317, "learning_rate": 0.00010963657678780773, "loss": 3.473453998565674, "step": 1336, "token_acc": 0.25408249577072345 }, { "epoch": 0.7839343301084726, "grad_norm": 1.7258696464588632, "learning_rate": 0.00010971864009378663, "loss": 3.460827112197876, "step": 1337, "token_acc": 0.2563349481631643 }, { "epoch": 0.7845206684256816, "grad_norm": 2.045523738336018, "learning_rate": 0.00010980070339976553, "loss": 3.514976978302002, "step": 1338, "token_acc": 0.2498473075677025 }, { "epoch": 0.7851070067428907, "grad_norm": 2.065586944123851, "learning_rate": 0.00010988276670574443, "loss": 3.51725435256958, "step": 1339, "token_acc": 0.2490453479740014 }, { "epoch": 0.7856933450600997, "grad_norm": 2.0088446653241667, "learning_rate": 0.00010996483001172333, "loss": 3.5098557472229004, "step": 1340, "token_acc": 0.25026360665981073 }, { "epoch": 0.7862796833773087, "grad_norm": 2.529519584850874, "learning_rate": 0.00011004689331770222, "loss": 3.5220985412597656, "step": 1341, "token_acc": 0.24702599696920294 }, { "epoch": 0.7868660216945177, "grad_norm": 1.5336792351690491, "learning_rate": 0.00011012895662368111, "loss": 3.5066158771514893, "step": 1342, "token_acc": 0.25002165925155817 }, { "epoch": 0.7874523600117268, "grad_norm": 2.291017187126893, "learning_rate": 0.00011021101992966, "loss": 3.4693357944488525, "step": 1343, "token_acc": 0.2536555165551124 }, { "epoch": 0.7880386983289358, "grad_norm": 1.6694639154637716, "learning_rate": 0.0001102930832356389, "loss": 3.5315287113189697, "step": 1344, "token_acc": 0.24749664619352696 }, { "epoch": 0.7886250366461448, "grad_norm": 2.749098491784195, "learning_rate": 0.0001103751465416178, "loss": 3.4832682609558105, "step": 1345, "token_acc": 0.25314299747292707 }, { "epoch": 0.7892113749633538, "grad_norm": 1.476909349305623, "learning_rate": 0.0001104572098475967, "loss": 3.4796810150146484, "step": 1346, "token_acc": 0.25257131566084634 }, { "epoch": 0.7897977132805629, "grad_norm": 2.1944216006067476, "learning_rate": 0.0001105392731535756, "loss": 3.4824490547180176, "step": 1347, "token_acc": 0.2553413363755422 }, { "epoch": 0.7903840515977719, "grad_norm": 2.3354412549144605, "learning_rate": 0.0001106213364595545, "loss": 3.5235204696655273, "step": 1348, "token_acc": 0.24800021316848303 }, { "epoch": 0.7909703899149809, "grad_norm": 2.4777474562265667, "learning_rate": 0.0001107033997655334, "loss": 3.512127161026001, "step": 1349, "token_acc": 0.24980902650607453 }, { "epoch": 0.7915567282321899, "grad_norm": 1.649031003375605, "learning_rate": 0.0001107854630715123, "loss": 3.447403907775879, "step": 1350, "token_acc": 0.2584091274638497 }, { "epoch": 0.792143066549399, "grad_norm": 1.9134891836115946, "learning_rate": 0.0001108675263774912, "loss": 3.5353763103485107, "step": 1351, "token_acc": 0.24636798126226497 }, { "epoch": 0.792729404866608, "grad_norm": 2.08475671079445, "learning_rate": 0.0001109495896834701, "loss": 3.4992029666900635, "step": 1352, "token_acc": 0.252505169595067 }, { "epoch": 0.793315743183817, "grad_norm": 1.610110042459501, "learning_rate": 0.000111031652989449, "loss": 3.5161097049713135, "step": 1353, "token_acc": 0.24626238304541184 }, { "epoch": 0.7939020815010261, "grad_norm": 1.6731768057426692, "learning_rate": 0.0001111137162954279, "loss": 3.547534227371216, "step": 1354, "token_acc": 0.2443312503001809 }, { "epoch": 0.7944884198182351, "grad_norm": 1.7656188632511818, "learning_rate": 0.0001111957796014068, "loss": 3.4768528938293457, "step": 1355, "token_acc": 0.2538136648540949 }, { "epoch": 0.7950747581354441, "grad_norm": 2.124341571882291, "learning_rate": 0.0001112778429073857, "loss": 3.4576823711395264, "step": 1356, "token_acc": 0.25689171758054175 }, { "epoch": 0.7956610964526531, "grad_norm": 2.149388011481822, "learning_rate": 0.00011135990621336459, "loss": 3.389029026031494, "step": 1357, "token_acc": 0.26545436315807525 }, { "epoch": 0.7962474347698623, "grad_norm": 1.485384161533022, "learning_rate": 0.00011144196951934349, "loss": 3.54351806640625, "step": 1358, "token_acc": 0.2442869471977404 }, { "epoch": 0.7968337730870713, "grad_norm": 2.688752375578216, "learning_rate": 0.00011152403282532239, "loss": 3.477694034576416, "step": 1359, "token_acc": 0.2528829115941105 }, { "epoch": 0.7974201114042803, "grad_norm": 1.8182793551601826, "learning_rate": 0.00011160609613130127, "loss": 3.479078769683838, "step": 1360, "token_acc": 0.25203874311201074 }, { "epoch": 0.7980064497214893, "grad_norm": 2.5228575256990453, "learning_rate": 0.00011168815943728017, "loss": 3.463512897491455, "step": 1361, "token_acc": 0.2555741655901898 }, { "epoch": 0.7985927880386984, "grad_norm": 1.4337638118674019, "learning_rate": 0.00011177022274325907, "loss": 3.407137632369995, "step": 1362, "token_acc": 0.2643104351813438 }, { "epoch": 0.7991791263559074, "grad_norm": 2.924363927645141, "learning_rate": 0.00011185228604923797, "loss": 3.5044167041778564, "step": 1363, "token_acc": 0.2503583506483293 }, { "epoch": 0.7997654646731164, "grad_norm": 2.154290283771355, "learning_rate": 0.00011193434935521686, "loss": 3.461982250213623, "step": 1364, "token_acc": 0.25475857299540217 }, { "epoch": 0.8003518029903254, "grad_norm": 1.910232496974231, "learning_rate": 0.00011201641266119576, "loss": 3.489426374435425, "step": 1365, "token_acc": 0.25238179678558187 }, { "epoch": 0.8009381413075345, "grad_norm": 1.9702064608666856, "learning_rate": 0.00011209847596717466, "loss": 3.4879424571990967, "step": 1366, "token_acc": 0.25162392813080314 }, { "epoch": 0.8015244796247435, "grad_norm": 1.8080870784131426, "learning_rate": 0.00011218053927315357, "loss": 3.480198860168457, "step": 1367, "token_acc": 0.25261769774978554 }, { "epoch": 0.8021108179419525, "grad_norm": 1.5444889050467996, "learning_rate": 0.00011226260257913247, "loss": 3.478196620941162, "step": 1368, "token_acc": 0.252042683815058 }, { "epoch": 0.8026971562591615, "grad_norm": 2.1629539813191063, "learning_rate": 0.00011234466588511137, "loss": 3.440249443054199, "step": 1369, "token_acc": 0.2579075062149088 }, { "epoch": 0.8032834945763706, "grad_norm": 1.9236920704974576, "learning_rate": 0.00011242672919109027, "loss": 3.450601100921631, "step": 1370, "token_acc": 0.2549119170984456 }, { "epoch": 0.8038698328935796, "grad_norm": 2.2304314135180916, "learning_rate": 0.00011250879249706916, "loss": 3.479851722717285, "step": 1371, "token_acc": 0.25291415633254827 }, { "epoch": 0.8044561712107886, "grad_norm": 1.8960556850224077, "learning_rate": 0.00011259085580304806, "loss": 3.482255697250366, "step": 1372, "token_acc": 0.25490154063725384 }, { "epoch": 0.8050425095279976, "grad_norm": 1.2038320410597012, "learning_rate": 0.00011267291910902696, "loss": 3.466371536254883, "step": 1373, "token_acc": 0.25450082472776164 }, { "epoch": 0.8056288478452067, "grad_norm": 2.020094947377214, "learning_rate": 0.00011275498241500586, "loss": 3.4769277572631836, "step": 1374, "token_acc": 0.25208877681229874 }, { "epoch": 0.8062151861624157, "grad_norm": 1.8740998109647056, "learning_rate": 0.00011283704572098476, "loss": 3.4642021656036377, "step": 1375, "token_acc": 0.2553745858197606 }, { "epoch": 0.8068015244796247, "grad_norm": 2.0622125512504192, "learning_rate": 0.00011291910902696365, "loss": 3.454598903656006, "step": 1376, "token_acc": 0.2547711134886444 }, { "epoch": 0.8073878627968337, "grad_norm": 2.2548132477222937, "learning_rate": 0.00011300117233294255, "loss": 3.486370325088501, "step": 1377, "token_acc": 0.2518087686810464 }, { "epoch": 0.8079742011140428, "grad_norm": 1.613118107044528, "learning_rate": 0.00011308323563892144, "loss": 3.4848580360412598, "step": 1378, "token_acc": 0.251870957049559 }, { "epoch": 0.8085605394312518, "grad_norm": 1.9422231512314676, "learning_rate": 0.00011316529894490033, "loss": 3.45316481590271, "step": 1379, "token_acc": 0.25464443122106684 }, { "epoch": 0.8091468777484608, "grad_norm": 1.6653469606209894, "learning_rate": 0.00011324736225087923, "loss": 3.4937076568603516, "step": 1380, "token_acc": 0.25082990679999695 }, { "epoch": 0.8097332160656698, "grad_norm": 2.124939943643208, "learning_rate": 0.00011332942555685813, "loss": 3.477816581726074, "step": 1381, "token_acc": 0.2506954641812961 }, { "epoch": 0.810319554382879, "grad_norm": 1.6321763000035545, "learning_rate": 0.00011341148886283703, "loss": 3.4749388694763184, "step": 1382, "token_acc": 0.25319382531514967 }, { "epoch": 0.810905892700088, "grad_norm": 2.119510104123133, "learning_rate": 0.00011349355216881593, "loss": 3.4837558269500732, "step": 1383, "token_acc": 0.24929537149974343 }, { "epoch": 0.811492231017297, "grad_norm": 2.068022797366922, "learning_rate": 0.00011357561547479482, "loss": 3.4494569301605225, "step": 1384, "token_acc": 0.2564743190620416 }, { "epoch": 0.8120785693345061, "grad_norm": 1.6422919960846851, "learning_rate": 0.00011365767878077373, "loss": 3.4373133182525635, "step": 1385, "token_acc": 0.25709290748449715 }, { "epoch": 0.8126649076517151, "grad_norm": 2.2372045104091556, "learning_rate": 0.00011373974208675263, "loss": 3.445688486099243, "step": 1386, "token_acc": 0.25671052800164 }, { "epoch": 0.8132512459689241, "grad_norm": 1.2201084013861518, "learning_rate": 0.00011382180539273153, "loss": 3.430572509765625, "step": 1387, "token_acc": 0.2585926991047822 }, { "epoch": 0.8138375842861331, "grad_norm": 2.324996247286695, "learning_rate": 0.00011390386869871043, "loss": 3.5182652473449707, "step": 1388, "token_acc": 0.24654964844480623 }, { "epoch": 0.8144239226033422, "grad_norm": 2.675608736844797, "learning_rate": 0.00011398593200468933, "loss": 3.4582936763763428, "step": 1389, "token_acc": 0.25326306483248584 }, { "epoch": 0.8150102609205512, "grad_norm": 1.869600242399582, "learning_rate": 0.00011406799531066822, "loss": 3.442856788635254, "step": 1390, "token_acc": 0.25580843940124287 }, { "epoch": 0.8155965992377602, "grad_norm": 1.7032887151046605, "learning_rate": 0.00011415005861664712, "loss": 3.4262657165527344, "step": 1391, "token_acc": 0.2573925909123715 }, { "epoch": 0.8161829375549692, "grad_norm": 2.9015546454073955, "learning_rate": 0.00011423212192262602, "loss": 3.483952045440674, "step": 1392, "token_acc": 0.2538033246745454 }, { "epoch": 0.8167692758721783, "grad_norm": 2.1028304392623722, "learning_rate": 0.00011431418522860492, "loss": 3.4491629600524902, "step": 1393, "token_acc": 0.25597029030297147 }, { "epoch": 0.8173556141893873, "grad_norm": 1.8078791600454742, "learning_rate": 0.00011439624853458382, "loss": 3.4457709789276123, "step": 1394, "token_acc": 0.25515805362930527 }, { "epoch": 0.8179419525065963, "grad_norm": 3.169047467189759, "learning_rate": 0.00011447831184056271, "loss": 3.4571824073791504, "step": 1395, "token_acc": 0.25467445547946094 }, { "epoch": 0.8185282908238053, "grad_norm": 1.5951260690361297, "learning_rate": 0.0001145603751465416, "loss": 3.4346394538879395, "step": 1396, "token_acc": 0.25597249799332517 }, { "epoch": 0.8191146291410144, "grad_norm": 4.385116591485995, "learning_rate": 0.0001146424384525205, "loss": 3.5010201930999756, "step": 1397, "token_acc": 0.25109525731530313 }, { "epoch": 0.8197009674582234, "grad_norm": 3.343843702761358, "learning_rate": 0.0001147245017584994, "loss": 3.554399251937866, "step": 1398, "token_acc": 0.24304291691943938 }, { "epoch": 0.8202873057754324, "grad_norm": 2.404543363456073, "learning_rate": 0.00011480656506447829, "loss": 3.514979362487793, "step": 1399, "token_acc": 0.24826293292219118 }, { "epoch": 0.8208736440926414, "grad_norm": 2.2479958394061224, "learning_rate": 0.00011488862837045719, "loss": 3.4540510177612305, "step": 1400, "token_acc": 0.25484034491498686 }, { "epoch": 0.8214599824098505, "grad_norm": 1.6229950669512192, "learning_rate": 0.00011497069167643609, "loss": 3.5001330375671387, "step": 1401, "token_acc": 0.2508741647961068 }, { "epoch": 0.8220463207270595, "grad_norm": 2.1256985540553814, "learning_rate": 0.000115052754982415, "loss": 3.4449234008789062, "step": 1402, "token_acc": 0.25856909825760804 }, { "epoch": 0.8226326590442685, "grad_norm": 2.1685634983470177, "learning_rate": 0.0001151348182883939, "loss": 3.4522035121917725, "step": 1403, "token_acc": 0.25680800440533214 }, { "epoch": 0.8232189973614775, "grad_norm": 2.2222628223915164, "learning_rate": 0.0001152168815943728, "loss": 3.4078269004821777, "step": 1404, "token_acc": 0.26125508009863146 }, { "epoch": 0.8238053356786866, "grad_norm": 2.2540913456394476, "learning_rate": 0.0001152989449003517, "loss": 3.421428680419922, "step": 1405, "token_acc": 0.25714753536351054 }, { "epoch": 0.8243916739958956, "grad_norm": 1.5758790277310466, "learning_rate": 0.00011538100820633059, "loss": 3.4481430053710938, "step": 1406, "token_acc": 0.25467517413292573 }, { "epoch": 0.8249780123131046, "grad_norm": 2.1963270728808055, "learning_rate": 0.00011546307151230949, "loss": 3.4928040504455566, "step": 1407, "token_acc": 0.2503780031677821 }, { "epoch": 0.8255643506303136, "grad_norm": 1.3170446656811312, "learning_rate": 0.00011554513481828839, "loss": 3.3883275985717773, "step": 1408, "token_acc": 0.2641605875769006 }, { "epoch": 0.8261506889475227, "grad_norm": 2.9419033942044566, "learning_rate": 0.00011562719812426729, "loss": 3.5186703205108643, "step": 1409, "token_acc": 0.24717833560563532 }, { "epoch": 0.8267370272647317, "grad_norm": 1.1692380459741931, "learning_rate": 0.00011570926143024618, "loss": 3.437136650085449, "step": 1410, "token_acc": 0.2561185338518797 }, { "epoch": 0.8273233655819408, "grad_norm": 2.6918183369605857, "learning_rate": 0.00011579132473622508, "loss": 3.488126277923584, "step": 1411, "token_acc": 0.25136150234741783 }, { "epoch": 0.8279097038991499, "grad_norm": 1.9497520367799692, "learning_rate": 0.00011587338804220398, "loss": 3.4991955757141113, "step": 1412, "token_acc": 0.24891923899654736 }, { "epoch": 0.8284960422163589, "grad_norm": 1.8313543979831282, "learning_rate": 0.00011595545134818286, "loss": 3.428920269012451, "step": 1413, "token_acc": 0.2576608239518813 }, { "epoch": 0.8290823805335679, "grad_norm": 1.957978553494661, "learning_rate": 0.00011603751465416176, "loss": 3.5101370811462402, "step": 1414, "token_acc": 0.2488521455789091 }, { "epoch": 0.8296687188507769, "grad_norm": 1.6542488213501336, "learning_rate": 0.00011611957796014066, "loss": 3.4838385581970215, "step": 1415, "token_acc": 0.2526956193234586 }, { "epoch": 0.830255057167986, "grad_norm": 2.321389631130462, "learning_rate": 0.00011620164126611956, "loss": 3.5034406185150146, "step": 1416, "token_acc": 0.25020156356658035 }, { "epoch": 0.830841395485195, "grad_norm": 1.6499007840372506, "learning_rate": 0.00011628370457209846, "loss": 3.4260053634643555, "step": 1417, "token_acc": 0.25813857349740915 }, { "epoch": 0.831427733802404, "grad_norm": 1.9767241748547444, "learning_rate": 0.00011636576787807735, "loss": 3.4941108226776123, "step": 1418, "token_acc": 0.2509137140864369 }, { "epoch": 0.832014072119613, "grad_norm": 2.245413634651665, "learning_rate": 0.00011644783118405625, "loss": 3.472487688064575, "step": 1419, "token_acc": 0.25300140973377916 }, { "epoch": 0.8326004104368221, "grad_norm": 1.8269350997122824, "learning_rate": 0.00011652989449003516, "loss": 3.445422410964966, "step": 1420, "token_acc": 0.25644839238173256 }, { "epoch": 0.8331867487540311, "grad_norm": 2.0756480585037935, "learning_rate": 0.00011661195779601406, "loss": 3.5002686977386475, "step": 1421, "token_acc": 0.24849583237358538 }, { "epoch": 0.8337730870712401, "grad_norm": 1.5245351710215633, "learning_rate": 0.00011669402110199296, "loss": 3.466574192047119, "step": 1422, "token_acc": 0.2529519734068948 }, { "epoch": 0.8343594253884491, "grad_norm": 1.3507138759851334, "learning_rate": 0.00011677608440797186, "loss": 3.396169662475586, "step": 1423, "token_acc": 0.2617191860760136 }, { "epoch": 0.8349457637056582, "grad_norm": 1.794485469643318, "learning_rate": 0.00011685814771395076, "loss": 3.3983442783355713, "step": 1424, "token_acc": 0.26067963124957944 }, { "epoch": 0.8355321020228672, "grad_norm": 1.903084282425927, "learning_rate": 0.00011694021101992965, "loss": 3.402369260787964, "step": 1425, "token_acc": 0.2619126957680773 }, { "epoch": 0.8361184403400762, "grad_norm": 1.704019101817661, "learning_rate": 0.00011702227432590855, "loss": 3.439713478088379, "step": 1426, "token_acc": 0.2553901137673177 }, { "epoch": 0.8367047786572852, "grad_norm": 1.8834001389273514, "learning_rate": 0.00011710433763188745, "loss": 3.462554693222046, "step": 1427, "token_acc": 0.2548908457448736 }, { "epoch": 0.8372911169744943, "grad_norm": 2.1597924429599407, "learning_rate": 0.00011718640093786635, "loss": 3.486043930053711, "step": 1428, "token_acc": 0.25108148487482884 }, { "epoch": 0.8378774552917033, "grad_norm": 1.49570010610963, "learning_rate": 0.00011726846424384525, "loss": 3.4443469047546387, "step": 1429, "token_acc": 0.25529090812374455 }, { "epoch": 0.8384637936089123, "grad_norm": 1.8904824609825497, "learning_rate": 0.00011735052754982414, "loss": 3.4574596881866455, "step": 1430, "token_acc": 0.25480277381931005 }, { "epoch": 0.8390501319261213, "grad_norm": 2.065599763445802, "learning_rate": 0.00011743259085580303, "loss": 3.4586968421936035, "step": 1431, "token_acc": 0.2553851937653681 }, { "epoch": 0.8396364702433304, "grad_norm": 1.6000344979178496, "learning_rate": 0.00011751465416178193, "loss": 3.4332659244537354, "step": 1432, "token_acc": 0.25718306524798445 }, { "epoch": 0.8402228085605394, "grad_norm": 1.9424360611777232, "learning_rate": 0.00011759671746776082, "loss": 3.477078914642334, "step": 1433, "token_acc": 0.2522593807939057 }, { "epoch": 0.8408091468777484, "grad_norm": 2.8275531600655217, "learning_rate": 0.00011767878077373972, "loss": 3.437272310256958, "step": 1434, "token_acc": 0.255872663143887 }, { "epoch": 0.8413954851949574, "grad_norm": 1.1009720617380683, "learning_rate": 0.00011776084407971862, "loss": 3.435957431793213, "step": 1435, "token_acc": 0.25730451720619607 }, { "epoch": 0.8419818235121665, "grad_norm": 3.0925689339273075, "learning_rate": 0.00011784290738569752, "loss": 3.5108790397644043, "step": 1436, "token_acc": 0.24727710349502982 }, { "epoch": 0.8425681618293756, "grad_norm": 1.954305740798933, "learning_rate": 0.00011792497069167643, "loss": 3.539501667022705, "step": 1437, "token_acc": 0.24504596823820554 }, { "epoch": 0.8431545001465846, "grad_norm": 1.6259211904974364, "learning_rate": 0.00011800703399765533, "loss": 3.5022828578948975, "step": 1438, "token_acc": 0.25097712463826016 }, { "epoch": 0.8437408384637937, "grad_norm": 1.7919078644937987, "learning_rate": 0.00011808909730363423, "loss": 3.45670485496521, "step": 1439, "token_acc": 0.2525527806454029 }, { "epoch": 0.8443271767810027, "grad_norm": 1.582170295473844, "learning_rate": 0.00011817116060961312, "loss": 3.463327407836914, "step": 1440, "token_acc": 0.25437266820800986 }, { "epoch": 0.8449135150982117, "grad_norm": 1.7006151646335155, "learning_rate": 0.00011825322391559202, "loss": 3.472337007522583, "step": 1441, "token_acc": 0.2535334405645203 }, { "epoch": 0.8454998534154207, "grad_norm": 1.7882102151699137, "learning_rate": 0.00011833528722157092, "loss": 3.44679594039917, "step": 1442, "token_acc": 0.2544991389016804 }, { "epoch": 0.8460861917326298, "grad_norm": 1.0512086685683577, "learning_rate": 0.00011841735052754982, "loss": 3.489717960357666, "step": 1443, "token_acc": 0.249919418956959 }, { "epoch": 0.8466725300498388, "grad_norm": 2.4652123281569622, "learning_rate": 0.00011849941383352872, "loss": 3.4744672775268555, "step": 1444, "token_acc": 0.251611797333115 }, { "epoch": 0.8472588683670478, "grad_norm": 1.2425158461126327, "learning_rate": 0.00011858147713950761, "loss": 3.456843852996826, "step": 1445, "token_acc": 0.25381877851345913 }, { "epoch": 0.8478452066842568, "grad_norm": 1.8970534216319244, "learning_rate": 0.00011866354044548651, "loss": 3.4840238094329834, "step": 1446, "token_acc": 0.2508379800353551 }, { "epoch": 0.8484315450014659, "grad_norm": 1.7764065929036983, "learning_rate": 0.00011874560375146541, "loss": 3.432133197784424, "step": 1447, "token_acc": 0.25989832985834965 }, { "epoch": 0.8490178833186749, "grad_norm": 1.2333360229939117, "learning_rate": 0.00011882766705744431, "loss": 3.4992733001708984, "step": 1448, "token_acc": 0.24982165373711102 }, { "epoch": 0.8496042216358839, "grad_norm": 1.7075093549146882, "learning_rate": 0.00011890973036342319, "loss": 3.4612910747528076, "step": 1449, "token_acc": 0.25407074323933276 }, { "epoch": 0.8501905599530929, "grad_norm": 1.4091663420826739, "learning_rate": 0.00011899179366940209, "loss": 3.4539175033569336, "step": 1450, "token_acc": 0.25379739359817316 }, { "epoch": 0.850776898270302, "grad_norm": 1.5223086875245324, "learning_rate": 0.00011907385697538099, "loss": 3.4142401218414307, "step": 1451, "token_acc": 0.26066212774803804 }, { "epoch": 0.851363236587511, "grad_norm": 1.4204352725613607, "learning_rate": 0.00011915592028135989, "loss": 3.456674575805664, "step": 1452, "token_acc": 0.2546225943641574 }, { "epoch": 0.85194957490472, "grad_norm": 2.1928904110604446, "learning_rate": 0.00011923798358733878, "loss": 3.472041606903076, "step": 1453, "token_acc": 0.2532037830375691 }, { "epoch": 0.852535913221929, "grad_norm": 1.4519761038592274, "learning_rate": 0.00011932004689331768, "loss": 3.4731338024139404, "step": 1454, "token_acc": 0.25183974466223086 }, { "epoch": 0.8531222515391381, "grad_norm": 2.2473210362642058, "learning_rate": 0.0001194021101992966, "loss": 3.4378409385681152, "step": 1455, "token_acc": 0.2577174198302897 }, { "epoch": 0.8537085898563471, "grad_norm": 1.4044904593766538, "learning_rate": 0.00011948417350527549, "loss": 3.369058132171631, "step": 1456, "token_acc": 0.26429449036372127 }, { "epoch": 0.8542949281735561, "grad_norm": 2.0249574315585197, "learning_rate": 0.00011956623681125439, "loss": 3.4121270179748535, "step": 1457, "token_acc": 0.25990162810534456 }, { "epoch": 0.8548812664907651, "grad_norm": 1.5081759271690975, "learning_rate": 0.00011964830011723329, "loss": 3.4754323959350586, "step": 1458, "token_acc": 0.25273340890354495 }, { "epoch": 0.8554676048079742, "grad_norm": 2.366400906599703, "learning_rate": 0.00011973036342321219, "loss": 3.3916494846343994, "step": 1459, "token_acc": 0.26229431526763375 }, { "epoch": 0.8560539431251832, "grad_norm": 1.499165918510265, "learning_rate": 0.00011981242672919108, "loss": 3.4688644409179688, "step": 1460, "token_acc": 0.2537529285858907 }, { "epoch": 0.8566402814423922, "grad_norm": 1.437435472100298, "learning_rate": 0.00011989449003516998, "loss": 3.4200077056884766, "step": 1461, "token_acc": 0.25759275043739627 }, { "epoch": 0.8572266197596012, "grad_norm": 1.6170195219990595, "learning_rate": 0.00011997655334114888, "loss": 3.465322494506836, "step": 1462, "token_acc": 0.25385671657556313 }, { "epoch": 0.8578129580768104, "grad_norm": 1.7116062493677813, "learning_rate": 0.00012005861664712778, "loss": 3.461017608642578, "step": 1463, "token_acc": 0.25443857965451055 }, { "epoch": 0.8583992963940194, "grad_norm": 1.1841054062320047, "learning_rate": 0.00012014067995310668, "loss": 3.4397692680358887, "step": 1464, "token_acc": 0.25731477052440777 }, { "epoch": 0.8589856347112284, "grad_norm": 1.8306737616180009, "learning_rate": 0.00012022274325908557, "loss": 3.419232130050659, "step": 1465, "token_acc": 0.2580833851247957 }, { "epoch": 0.8595719730284375, "grad_norm": 1.5784342683509562, "learning_rate": 0.00012030480656506447, "loss": 3.412738561630249, "step": 1466, "token_acc": 0.258783507000491 }, { "epoch": 0.8601583113456465, "grad_norm": 1.8577545998297527, "learning_rate": 0.00012038686987104336, "loss": 3.452714443206787, "step": 1467, "token_acc": 0.25439143406279147 }, { "epoch": 0.8607446496628555, "grad_norm": 1.537325896969621, "learning_rate": 0.00012046893317702225, "loss": 3.4643139839172363, "step": 1468, "token_acc": 0.25153157149798255 }, { "epoch": 0.8613309879800645, "grad_norm": 1.2333943346809917, "learning_rate": 0.00012055099648300115, "loss": 3.427891731262207, "step": 1469, "token_acc": 0.2589686509703271 }, { "epoch": 0.8619173262972736, "grad_norm": 2.1060774891726655, "learning_rate": 0.00012063305978898005, "loss": 3.399210214614868, "step": 1470, "token_acc": 0.2619854032298451 }, { "epoch": 0.8625036646144826, "grad_norm": 1.1850622123484198, "learning_rate": 0.00012071512309495895, "loss": 3.4835376739501953, "step": 1471, "token_acc": 0.25012865841954607 }, { "epoch": 0.8630900029316916, "grad_norm": 1.45820854118914, "learning_rate": 0.00012079718640093786, "loss": 3.4814066886901855, "step": 1472, "token_acc": 0.2520966040701522 }, { "epoch": 0.8636763412489006, "grad_norm": 2.152330476321324, "learning_rate": 0.00012087924970691676, "loss": 3.423205852508545, "step": 1473, "token_acc": 0.2606073596578971 }, { "epoch": 0.8642626795661097, "grad_norm": 1.008599675486724, "learning_rate": 0.00012096131301289566, "loss": 3.47617244720459, "step": 1474, "token_acc": 0.25345913410062515 }, { "epoch": 0.8648490178833187, "grad_norm": 2.793285852707622, "learning_rate": 0.00012104337631887455, "loss": 3.4529669284820557, "step": 1475, "token_acc": 0.2525538251617051 }, { "epoch": 0.8654353562005277, "grad_norm": 1.6071907101284855, "learning_rate": 0.00012112543962485345, "loss": 3.445446491241455, "step": 1476, "token_acc": 0.2558612804078507 }, { "epoch": 0.8660216945177367, "grad_norm": 2.59028463843677, "learning_rate": 0.00012120750293083235, "loss": 3.4565694332122803, "step": 1477, "token_acc": 0.25320589776755403 }, { "epoch": 0.8666080328349458, "grad_norm": 2.0575528482623975, "learning_rate": 0.00012128956623681125, "loss": 3.5098876953125, "step": 1478, "token_acc": 0.24792793753164727 }, { "epoch": 0.8671943711521548, "grad_norm": 1.5940516012889903, "learning_rate": 0.00012137162954279015, "loss": 3.4364118576049805, "step": 1479, "token_acc": 0.25552153319701204 }, { "epoch": 0.8677807094693638, "grad_norm": 1.6677439125124707, "learning_rate": 0.00012145369284876904, "loss": 3.463893413543701, "step": 1480, "token_acc": 0.25354651927885247 }, { "epoch": 0.8683670477865728, "grad_norm": 1.5513655219486746, "learning_rate": 0.00012153575615474794, "loss": 3.441990375518799, "step": 1481, "token_acc": 0.2558676514278864 }, { "epoch": 0.8689533861037819, "grad_norm": 1.6522320320110375, "learning_rate": 0.00012161781946072684, "loss": 3.3729634284973145, "step": 1482, "token_acc": 0.26538235807306126 }, { "epoch": 0.8695397244209909, "grad_norm": 1.2768603566985224, "learning_rate": 0.00012169988276670574, "loss": 3.4236669540405273, "step": 1483, "token_acc": 0.2578060330846578 }, { "epoch": 0.8701260627381999, "grad_norm": 2.2431656393440926, "learning_rate": 0.00012178194607268462, "loss": 3.442612648010254, "step": 1484, "token_acc": 0.25558748739071957 }, { "epoch": 0.8707124010554089, "grad_norm": 1.2946558345907428, "learning_rate": 0.00012186400937866352, "loss": 3.462536334991455, "step": 1485, "token_acc": 0.2524354752036435 }, { "epoch": 0.871298739372618, "grad_norm": 2.448818162980202, "learning_rate": 0.00012194607268464242, "loss": 3.468039035797119, "step": 1486, "token_acc": 0.2510686407311558 }, { "epoch": 0.871885077689827, "grad_norm": 1.55103730874604, "learning_rate": 0.00012202813599062132, "loss": 3.427125930786133, "step": 1487, "token_acc": 0.2583153468673006 }, { "epoch": 0.872471416007036, "grad_norm": 2.0842968900205596, "learning_rate": 0.00012211019929660023, "loss": 3.464477062225342, "step": 1488, "token_acc": 0.2532736097888346 }, { "epoch": 0.873057754324245, "grad_norm": 1.4843798066973837, "learning_rate": 0.0001221922626025791, "loss": 3.455716133117676, "step": 1489, "token_acc": 0.25366416257218083 }, { "epoch": 0.8736440926414542, "grad_norm": 1.2739777809175559, "learning_rate": 0.00012227432590855802, "loss": 3.447477102279663, "step": 1490, "token_acc": 0.25466691019656085 }, { "epoch": 0.8742304309586632, "grad_norm": 2.1859177736222946, "learning_rate": 0.0001223563892145369, "loss": 3.428298234939575, "step": 1491, "token_acc": 0.2575932657356036 }, { "epoch": 0.8748167692758722, "grad_norm": 1.9309164066343492, "learning_rate": 0.00012243845252051582, "loss": 3.4118127822875977, "step": 1492, "token_acc": 0.25880211788438445 }, { "epoch": 0.8754031075930812, "grad_norm": 1.2159960008627448, "learning_rate": 0.0001225205158264947, "loss": 3.426647663116455, "step": 1493, "token_acc": 0.25611590928287864 }, { "epoch": 0.8759894459102903, "grad_norm": 2.3467847192363904, "learning_rate": 0.00012260257913247362, "loss": 3.4199390411376953, "step": 1494, "token_acc": 0.25861233952347307 }, { "epoch": 0.8765757842274993, "grad_norm": 1.0311038411610973, "learning_rate": 0.0001226846424384525, "loss": 3.458796977996826, "step": 1495, "token_acc": 0.2529837523836246 }, { "epoch": 0.8771621225447083, "grad_norm": 1.7572641175282862, "learning_rate": 0.0001227667057444314, "loss": 3.4372024536132812, "step": 1496, "token_acc": 0.2582377987201153 }, { "epoch": 0.8777484608619174, "grad_norm": 1.581679540309632, "learning_rate": 0.00012284876905041032, "loss": 3.519561767578125, "step": 1497, "token_acc": 0.24561182711335208 }, { "epoch": 0.8783347991791264, "grad_norm": 1.3016882985742975, "learning_rate": 0.0001229308323563892, "loss": 3.4367005825042725, "step": 1498, "token_acc": 0.2563187825812227 }, { "epoch": 0.8789211374963354, "grad_norm": 1.691039645160426, "learning_rate": 0.00012301289566236812, "loss": 3.4573256969451904, "step": 1499, "token_acc": 0.25297267672796836 }, { "epoch": 0.8795074758135444, "grad_norm": 1.4327149142596116, "learning_rate": 0.000123094958968347, "loss": 3.4507312774658203, "step": 1500, "token_acc": 0.25454048554512887 }, { "epoch": 0.8800938141307535, "grad_norm": 1.7851981679790219, "learning_rate": 0.00012317702227432591, "loss": 3.429867744445801, "step": 1501, "token_acc": 0.25758230363777757 }, { "epoch": 0.8806801524479625, "grad_norm": 1.226875374192547, "learning_rate": 0.0001232590855803048, "loss": 3.4147696495056152, "step": 1502, "token_acc": 0.2602180228377484 }, { "epoch": 0.8812664907651715, "grad_norm": 1.5263218588430374, "learning_rate": 0.00012334114888628368, "loss": 3.4408297538757324, "step": 1503, "token_acc": 0.25580881408197353 }, { "epoch": 0.8818528290823805, "grad_norm": 1.3675672997456356, "learning_rate": 0.0001234232121922626, "loss": 3.4493942260742188, "step": 1504, "token_acc": 0.25357806068979283 }, { "epoch": 0.8824391673995896, "grad_norm": 1.382906136927589, "learning_rate": 0.00012350527549824148, "loss": 3.385406494140625, "step": 1505, "token_acc": 0.26275747131181376 }, { "epoch": 0.8830255057167986, "grad_norm": 1.815356813818446, "learning_rate": 0.0001235873388042204, "loss": 3.377808094024658, "step": 1506, "token_acc": 0.26319879584950184 }, { "epoch": 0.8836118440340076, "grad_norm": 1.9836318665184827, "learning_rate": 0.00012366940211019928, "loss": 3.4552505016326904, "step": 1507, "token_acc": 0.2530039210205941 }, { "epoch": 0.8841981823512166, "grad_norm": 1.3542132771549567, "learning_rate": 0.0001237514654161782, "loss": 3.4395880699157715, "step": 1508, "token_acc": 0.2572736526247707 }, { "epoch": 0.8847845206684257, "grad_norm": 1.3766723702014485, "learning_rate": 0.00012383352872215707, "loss": 3.393324375152588, "step": 1509, "token_acc": 0.26035008212780275 }, { "epoch": 0.8853708589856347, "grad_norm": 1.804991285081376, "learning_rate": 0.00012391559202813598, "loss": 3.3616652488708496, "step": 1510, "token_acc": 0.26688124386218387 }, { "epoch": 0.8859571973028437, "grad_norm": 1.6895761177182043, "learning_rate": 0.00012399765533411487, "loss": 3.453643560409546, "step": 1511, "token_acc": 0.2555476507628966 }, { "epoch": 0.8865435356200527, "grad_norm": 1.8597568453152975, "learning_rate": 0.00012407971864009378, "loss": 3.4125051498413086, "step": 1512, "token_acc": 0.25941851051624215 }, { "epoch": 0.8871298739372618, "grad_norm": 1.4951639880153045, "learning_rate": 0.00012416178194607266, "loss": 3.403019905090332, "step": 1513, "token_acc": 0.2605627876397107 }, { "epoch": 0.8877162122544708, "grad_norm": 1.8128308016909087, "learning_rate": 0.00012424384525205157, "loss": 3.475231170654297, "step": 1514, "token_acc": 0.2504313669362213 }, { "epoch": 0.8883025505716798, "grad_norm": 1.8037971469348852, "learning_rate": 0.00012432590855803049, "loss": 3.465027332305908, "step": 1515, "token_acc": 0.2539705320851488 }, { "epoch": 0.8888888888888888, "grad_norm": 2.0962125523400004, "learning_rate": 0.00012440797186400937, "loss": 3.4204673767089844, "step": 1516, "token_acc": 0.25520376159457114 }, { "epoch": 0.889475227206098, "grad_norm": 1.2899251765543887, "learning_rate": 0.00012449003516998828, "loss": 3.4191970825195312, "step": 1517, "token_acc": 0.2562839687232858 }, { "epoch": 0.890061565523307, "grad_norm": 2.727379528242444, "learning_rate": 0.00012457209847596717, "loss": 3.4421167373657227, "step": 1518, "token_acc": 0.25599884549283025 }, { "epoch": 0.890647903840516, "grad_norm": 1.145673622375147, "learning_rate": 0.00012465416178194608, "loss": 3.4065799713134766, "step": 1519, "token_acc": 0.2602408549072592 }, { "epoch": 0.891234242157725, "grad_norm": 2.666349245087391, "learning_rate": 0.00012473622508792496, "loss": 3.409372329711914, "step": 1520, "token_acc": 0.25798298177689954 }, { "epoch": 0.8918205804749341, "grad_norm": 1.9670408669208799, "learning_rate": 0.00012481828839390385, "loss": 3.4458847045898438, "step": 1521, "token_acc": 0.25505430719388605 }, { "epoch": 0.8924069187921431, "grad_norm": 2.1251656124526446, "learning_rate": 0.00012490035169988276, "loss": 3.456653594970703, "step": 1522, "token_acc": 0.2542231314741246 }, { "epoch": 0.8929932571093521, "grad_norm": 1.9416610422185518, "learning_rate": 0.00012498241500586164, "loss": 3.452975034713745, "step": 1523, "token_acc": 0.2530405918692655 }, { "epoch": 0.8935795954265612, "grad_norm": 1.7195799586848999, "learning_rate": 0.00012506447831184055, "loss": 3.4389374256134033, "step": 1524, "token_acc": 0.25461891478923604 }, { "epoch": 0.8941659337437702, "grad_norm": 1.7943957290731274, "learning_rate": 0.00012514654161781944, "loss": 3.422852039337158, "step": 1525, "token_acc": 0.25631443902591355 }, { "epoch": 0.8947522720609792, "grad_norm": 1.7734554839471535, "learning_rate": 0.00012522860492379835, "loss": 3.4251604080200195, "step": 1526, "token_acc": 0.2581400232535508 }, { "epoch": 0.8953386103781882, "grad_norm": 1.6463671389067425, "learning_rate": 0.00012531066822977724, "loss": 3.3679966926574707, "step": 1527, "token_acc": 0.2642591788999079 }, { "epoch": 0.8959249486953973, "grad_norm": 1.6201143698248306, "learning_rate": 0.00012539273153575615, "loss": 3.473146438598633, "step": 1528, "token_acc": 0.2533326823853475 }, { "epoch": 0.8965112870126063, "grad_norm": 1.0734999066798894, "learning_rate": 0.00012547479484173503, "loss": 3.3924050331115723, "step": 1529, "token_acc": 0.2626420143983436 }, { "epoch": 0.8970976253298153, "grad_norm": 2.1257549894392036, "learning_rate": 0.00012555685814771394, "loss": 3.367523193359375, "step": 1530, "token_acc": 0.2644689777951465 }, { "epoch": 0.8976839636470243, "grad_norm": 1.6466681516304258, "learning_rate": 0.00012563892145369283, "loss": 3.440213203430176, "step": 1531, "token_acc": 0.25518867177394233 }, { "epoch": 0.8982703019642334, "grad_norm": 1.8194296395151108, "learning_rate": 0.00012572098475967174, "loss": 3.4583487510681152, "step": 1532, "token_acc": 0.25308281920893794 }, { "epoch": 0.8988566402814424, "grad_norm": 1.0345156604330248, "learning_rate": 0.00012580304806565065, "loss": 3.3876595497131348, "step": 1533, "token_acc": 0.2609349987307475 }, { "epoch": 0.8994429785986514, "grad_norm": 1.753055775056012, "learning_rate": 0.00012588511137162953, "loss": 3.40232253074646, "step": 1534, "token_acc": 0.26097320121224504 }, { "epoch": 0.9000293169158604, "grad_norm": 1.234639541558314, "learning_rate": 0.00012596717467760845, "loss": 3.4030961990356445, "step": 1535, "token_acc": 0.260651337141889 }, { "epoch": 0.9006156552330695, "grad_norm": 1.2991405981535866, "learning_rate": 0.00012604923798358733, "loss": 3.4115095138549805, "step": 1536, "token_acc": 0.258784043724546 }, { "epoch": 0.9012019935502785, "grad_norm": 2.163376676254576, "learning_rate": 0.00012613130128956624, "loss": 3.3936753273010254, "step": 1537, "token_acc": 0.2604160744981795 }, { "epoch": 0.9017883318674875, "grad_norm": 1.4498963812030876, "learning_rate": 0.00012621336459554513, "loss": 3.4051949977874756, "step": 1538, "token_acc": 0.25826653638282265 }, { "epoch": 0.9023746701846965, "grad_norm": 1.6528266548629278, "learning_rate": 0.000126295427901524, "loss": 3.4513165950775146, "step": 1539, "token_acc": 0.2534301643579233 }, { "epoch": 0.9029610085019056, "grad_norm": 1.095087526042891, "learning_rate": 0.00012637749120750292, "loss": 3.401042938232422, "step": 1540, "token_acc": 0.2597976234823662 }, { "epoch": 0.9035473468191146, "grad_norm": 1.784380241259064, "learning_rate": 0.0001264595545134818, "loss": 3.3909621238708496, "step": 1541, "token_acc": 0.2607756376887038 }, { "epoch": 0.9041336851363236, "grad_norm": 1.041432635979975, "learning_rate": 0.00012654161781946072, "loss": 3.3980960845947266, "step": 1542, "token_acc": 0.2607974358354876 }, { "epoch": 0.9047200234535326, "grad_norm": 1.4305055170568384, "learning_rate": 0.0001266236811254396, "loss": 3.3993449211120605, "step": 1543, "token_acc": 0.26053646026548816 }, { "epoch": 0.9053063617707418, "grad_norm": 1.518294467064156, "learning_rate": 0.00012670574443141851, "loss": 3.4432902336120605, "step": 1544, "token_acc": 0.2555351647275333 }, { "epoch": 0.9058927000879508, "grad_norm": 1.8622711267314034, "learning_rate": 0.0001267878077373974, "loss": 3.3634085655212402, "step": 1545, "token_acc": 0.26421013424543993 }, { "epoch": 0.9064790384051598, "grad_norm": 1.1560263965212023, "learning_rate": 0.0001268698710433763, "loss": 3.3357577323913574, "step": 1546, "token_acc": 0.2676453819840365 }, { "epoch": 0.9070653767223688, "grad_norm": 1.8559841530718768, "learning_rate": 0.0001269519343493552, "loss": 3.372191905975342, "step": 1547, "token_acc": 0.2636687439618655 }, { "epoch": 0.9076517150395779, "grad_norm": 1.585022739453405, "learning_rate": 0.0001270339976553341, "loss": 3.4497013092041016, "step": 1548, "token_acc": 0.2519981107331991 }, { "epoch": 0.9082380533567869, "grad_norm": 1.5410059864668335, "learning_rate": 0.00012711606096131302, "loss": 3.3897926807403564, "step": 1549, "token_acc": 0.2595405541876409 }, { "epoch": 0.9088243916739959, "grad_norm": 1.555896756624451, "learning_rate": 0.0001271981242672919, "loss": 3.37626576423645, "step": 1550, "token_acc": 0.26381750633806184 }, { "epoch": 0.909410729991205, "grad_norm": 1.1968189040705774, "learning_rate": 0.00012728018757327081, "loss": 3.408867120742798, "step": 1551, "token_acc": 0.25903616085785086 }, { "epoch": 0.909997068308414, "grad_norm": 1.8814723521216314, "learning_rate": 0.0001273622508792497, "loss": 3.4293951988220215, "step": 1552, "token_acc": 0.25559339727278657 }, { "epoch": 0.910583406625623, "grad_norm": 1.4785780763266745, "learning_rate": 0.0001274443141852286, "loss": 3.3968286514282227, "step": 1553, "token_acc": 0.26147151372644983 }, { "epoch": 0.911169744942832, "grad_norm": 1.612523874928874, "learning_rate": 0.0001275263774912075, "loss": 3.4345650672912598, "step": 1554, "token_acc": 0.2570978914235786 }, { "epoch": 0.9117560832600411, "grad_norm": 1.3980844447729044, "learning_rate": 0.00012760844079718638, "loss": 3.449166774749756, "step": 1555, "token_acc": 0.25410012193574943 }, { "epoch": 0.9123424215772501, "grad_norm": 1.8388607441425078, "learning_rate": 0.0001276905041031653, "loss": 3.457688808441162, "step": 1556, "token_acc": 0.25203113041053954 }, { "epoch": 0.9129287598944591, "grad_norm": 1.6041631915136352, "learning_rate": 0.00012777256740914417, "loss": 3.476592540740967, "step": 1557, "token_acc": 0.2504371493497323 }, { "epoch": 0.9135150982116681, "grad_norm": 1.4907487624017328, "learning_rate": 0.00012785463071512309, "loss": 3.432101249694824, "step": 1558, "token_acc": 0.2568024282362133 }, { "epoch": 0.9141014365288772, "grad_norm": 1.425227318655967, "learning_rate": 0.00012793669402110197, "loss": 3.4399032592773438, "step": 1559, "token_acc": 0.25403551632718696 }, { "epoch": 0.9146877748460862, "grad_norm": 2.5193843486930687, "learning_rate": 0.00012801875732708088, "loss": 3.424142837524414, "step": 1560, "token_acc": 0.2573708425917021 }, { "epoch": 0.9152741131632952, "grad_norm": 1.0234863893872745, "learning_rate": 0.00012810082063305977, "loss": 3.3329296112060547, "step": 1561, "token_acc": 0.26969947600676325 }, { "epoch": 0.9158604514805042, "grad_norm": 2.590742252634702, "learning_rate": 0.00012818288393903868, "loss": 3.4161808490753174, "step": 1562, "token_acc": 0.25874583625873 }, { "epoch": 0.9164467897977133, "grad_norm": 1.4663309937114128, "learning_rate": 0.00012826494724501756, "loss": 3.389841318130493, "step": 1563, "token_acc": 0.2628703652633431 }, { "epoch": 0.9170331281149223, "grad_norm": 1.750696503375946, "learning_rate": 0.00012834701055099647, "loss": 3.3607654571533203, "step": 1564, "token_acc": 0.26589156477583176 }, { "epoch": 0.9176194664321313, "grad_norm": 1.8751730302893155, "learning_rate": 0.00012842907385697536, "loss": 3.435530185699463, "step": 1565, "token_acc": 0.2553990435409742 }, { "epoch": 0.9182058047493403, "grad_norm": 1.817345647313248, "learning_rate": 0.00012851113716295427, "loss": 3.391427755355835, "step": 1566, "token_acc": 0.26175877976152573 }, { "epoch": 0.9187921430665494, "grad_norm": 1.3867129127005364, "learning_rate": 0.00012859320046893318, "loss": 3.449869155883789, "step": 1567, "token_acc": 0.25318217760889145 }, { "epoch": 0.9193784813837584, "grad_norm": 1.2924012128996152, "learning_rate": 0.00012867526377491207, "loss": 3.3871898651123047, "step": 1568, "token_acc": 0.2636144438922677 }, { "epoch": 0.9199648197009674, "grad_norm": 1.1307441521273516, "learning_rate": 0.00012875732708089098, "loss": 3.3623342514038086, "step": 1569, "token_acc": 0.2632221423362006 }, { "epoch": 0.9205511580181764, "grad_norm": 1.372860410810995, "learning_rate": 0.00012883939038686986, "loss": 3.380150556564331, "step": 1570, "token_acc": 0.2638815008797414 }, { "epoch": 0.9211374963353856, "grad_norm": 1.642020094143525, "learning_rate": 0.00012892145369284877, "loss": 3.3960001468658447, "step": 1571, "token_acc": 0.26034435104947407 }, { "epoch": 0.9217238346525946, "grad_norm": 1.383253037315965, "learning_rate": 0.00012900351699882766, "loss": 3.4062883853912354, "step": 1572, "token_acc": 0.25979498985314337 }, { "epoch": 0.9223101729698036, "grad_norm": 1.7401216627480691, "learning_rate": 0.00012908558030480654, "loss": 3.3856728076934814, "step": 1573, "token_acc": 0.2611058193617885 }, { "epoch": 0.9228965112870126, "grad_norm": 1.2925508874163962, "learning_rate": 0.00012916764361078545, "loss": 3.4103055000305176, "step": 1574, "token_acc": 0.2571754531899058 }, { "epoch": 0.9234828496042217, "grad_norm": 1.6468940858529026, "learning_rate": 0.00012924970691676434, "loss": 3.3652539253234863, "step": 1575, "token_acc": 0.26435405458635236 }, { "epoch": 0.9240691879214307, "grad_norm": 1.2925208290653964, "learning_rate": 0.00012933177022274325, "loss": 3.396791934967041, "step": 1576, "token_acc": 0.26056945186423286 }, { "epoch": 0.9246555262386397, "grad_norm": 1.7004359844038002, "learning_rate": 0.00012941383352872213, "loss": 3.3742918968200684, "step": 1577, "token_acc": 0.26298866358142775 }, { "epoch": 0.9252418645558487, "grad_norm": 1.0928600623173799, "learning_rate": 0.00012949589683470105, "loss": 3.4830098152160645, "step": 1578, "token_acc": 0.2503633240850839 }, { "epoch": 0.9258282028730578, "grad_norm": 1.966378935257831, "learning_rate": 0.00012957796014067993, "loss": 3.372520923614502, "step": 1579, "token_acc": 0.2631443999055629 }, { "epoch": 0.9264145411902668, "grad_norm": 1.754489044553854, "learning_rate": 0.00012966002344665884, "loss": 3.4189040660858154, "step": 1580, "token_acc": 0.25668263298260513 }, { "epoch": 0.9270008795074758, "grad_norm": 0.990692161242356, "learning_rate": 0.00012974208675263773, "loss": 3.3704352378845215, "step": 1581, "token_acc": 0.26270092660201366 }, { "epoch": 0.9275872178246849, "grad_norm": 1.2790744314627125, "learning_rate": 0.00012982415005861664, "loss": 3.4020233154296875, "step": 1582, "token_acc": 0.25990416632827096 }, { "epoch": 0.9281735561418939, "grad_norm": 1.0448719491627247, "learning_rate": 0.00012990621336459552, "loss": 3.387636184692383, "step": 1583, "token_acc": 0.26169775327445033 }, { "epoch": 0.9287598944591029, "grad_norm": 1.7311121151879958, "learning_rate": 0.00012998827667057443, "loss": 3.4150383472442627, "step": 1584, "token_acc": 0.2595987543142981 }, { "epoch": 0.9293462327763119, "grad_norm": 1.1357092911466424, "learning_rate": 0.00013007033997655335, "loss": 3.406409740447998, "step": 1585, "token_acc": 0.25788372093023254 }, { "epoch": 0.929932571093521, "grad_norm": 1.3576007877852292, "learning_rate": 0.00013015240328253223, "loss": 3.4089412689208984, "step": 1586, "token_acc": 0.2586472832305491 }, { "epoch": 0.93051890941073, "grad_norm": 1.1590308635175053, "learning_rate": 0.00013023446658851114, "loss": 3.391888380050659, "step": 1587, "token_acc": 0.2597921600398853 }, { "epoch": 0.931105247727939, "grad_norm": 1.592440322825381, "learning_rate": 0.00013031652989449003, "loss": 3.443631172180176, "step": 1588, "token_acc": 0.2542345051903475 }, { "epoch": 0.931691586045148, "grad_norm": 1.5514572272748557, "learning_rate": 0.00013039859320046894, "loss": 3.418938636779785, "step": 1589, "token_acc": 0.25787297351673605 }, { "epoch": 0.9322779243623571, "grad_norm": 1.1885770848576238, "learning_rate": 0.00013048065650644782, "loss": 3.3787739276885986, "step": 1590, "token_acc": 0.26254568120925137 }, { "epoch": 0.9328642626795661, "grad_norm": 1.9901442438009058, "learning_rate": 0.0001305627198124267, "loss": 3.397382974624634, "step": 1591, "token_acc": 0.25973850812001104 }, { "epoch": 0.9334506009967751, "grad_norm": 1.129930689222301, "learning_rate": 0.00013064478311840562, "loss": 3.411898136138916, "step": 1592, "token_acc": 0.25577946728836987 }, { "epoch": 0.9340369393139841, "grad_norm": 2.031584292920118, "learning_rate": 0.0001307268464243845, "loss": 3.3807005882263184, "step": 1593, "token_acc": 0.2636437293824874 }, { "epoch": 0.9346232776311932, "grad_norm": 1.296994906816686, "learning_rate": 0.0001308089097303634, "loss": 3.3560585975646973, "step": 1594, "token_acc": 0.26376548294062657 }, { "epoch": 0.9352096159484022, "grad_norm": 1.9775410954257926, "learning_rate": 0.0001308909730363423, "loss": 3.4179506301879883, "step": 1595, "token_acc": 0.2577368503843605 }, { "epoch": 0.9357959542656112, "grad_norm": 1.4133324628897241, "learning_rate": 0.0001309730363423212, "loss": 3.35154128074646, "step": 1596, "token_acc": 0.26564876771821744 }, { "epoch": 0.9363822925828202, "grad_norm": 1.3139799506616736, "learning_rate": 0.0001310550996483001, "loss": 3.42692494392395, "step": 1597, "token_acc": 0.2573869194855602 }, { "epoch": 0.9369686309000294, "grad_norm": 1.4344248979779852, "learning_rate": 0.000131137162954279, "loss": 3.365551710128784, "step": 1598, "token_acc": 0.26263915593543485 }, { "epoch": 0.9375549692172384, "grad_norm": 1.6553291202539733, "learning_rate": 0.0001312192262602579, "loss": 3.3806092739105225, "step": 1599, "token_acc": 0.26291846868492713 }, { "epoch": 0.9381413075344474, "grad_norm": 1.7767331012761172, "learning_rate": 0.0001313012895662368, "loss": 3.4010696411132812, "step": 1600, "token_acc": 0.2596315790860905 }, { "epoch": 0.9387276458516564, "grad_norm": 1.3415274981476004, "learning_rate": 0.00013138335287221569, "loss": 3.404432773590088, "step": 1601, "token_acc": 0.2579591140199862 }, { "epoch": 0.9393139841688655, "grad_norm": 1.1834821769725241, "learning_rate": 0.0001314654161781946, "loss": 3.3415231704711914, "step": 1602, "token_acc": 0.2676237422766809 }, { "epoch": 0.9399003224860745, "grad_norm": 1.5921073153485024, "learning_rate": 0.0001315474794841735, "loss": 3.4134697914123535, "step": 1603, "token_acc": 0.25610784594487374 }, { "epoch": 0.9404866608032835, "grad_norm": 1.5553226010270753, "learning_rate": 0.0001316295427901524, "loss": 3.394374370574951, "step": 1604, "token_acc": 0.2591971115757757 }, { "epoch": 0.9410729991204925, "grad_norm": 1.4485197695516605, "learning_rate": 0.0001317116060961313, "loss": 3.4404358863830566, "step": 1605, "token_acc": 0.2536410677267907 }, { "epoch": 0.9416593374377016, "grad_norm": 1.915942734401626, "learning_rate": 0.0001317936694021102, "loss": 3.429225444793701, "step": 1606, "token_acc": 0.2555212106073852 }, { "epoch": 0.9422456757549106, "grad_norm": 0.8992380986124461, "learning_rate": 0.0001318757327080891, "loss": 3.359910011291504, "step": 1607, "token_acc": 0.2632898670632604 }, { "epoch": 0.9428320140721196, "grad_norm": 1.8996140048995376, "learning_rate": 0.00013195779601406799, "loss": 3.373720407485962, "step": 1608, "token_acc": 0.2626686152532947 }, { "epoch": 0.9434183523893287, "grad_norm": 1.3420597263068255, "learning_rate": 0.00013203985932004687, "loss": 3.406137466430664, "step": 1609, "token_acc": 0.2581210578500289 }, { "epoch": 0.9440046907065377, "grad_norm": 1.7061286596703917, "learning_rate": 0.00013212192262602578, "loss": 3.4146323204040527, "step": 1610, "token_acc": 0.2584042792823355 }, { "epoch": 0.9445910290237467, "grad_norm": 1.5283232374270648, "learning_rate": 0.00013220398593200467, "loss": 3.3759312629699707, "step": 1611, "token_acc": 0.26065202289192496 }, { "epoch": 0.9451773673409557, "grad_norm": 1.2291675172064562, "learning_rate": 0.00013228604923798358, "loss": 3.4050798416137695, "step": 1612, "token_acc": 0.2602949260294926 }, { "epoch": 0.9457637056581648, "grad_norm": 1.4991455514857643, "learning_rate": 0.00013236811254396246, "loss": 3.41379451751709, "step": 1613, "token_acc": 0.2582484413381142 }, { "epoch": 0.9463500439753738, "grad_norm": 1.3134137672134356, "learning_rate": 0.00013245017584994137, "loss": 3.345651149749756, "step": 1614, "token_acc": 0.26608187134502925 }, { "epoch": 0.9469363822925828, "grad_norm": 1.6124270851088944, "learning_rate": 0.00013253223915592026, "loss": 3.3834378719329834, "step": 1615, "token_acc": 0.26219965839908627 }, { "epoch": 0.9475227206097918, "grad_norm": 1.194257244938607, "learning_rate": 0.00013261430246189917, "loss": 3.372851610183716, "step": 1616, "token_acc": 0.2638059266018058 }, { "epoch": 0.9481090589270009, "grad_norm": 2.0996889776797163, "learning_rate": 0.00013269636576787805, "loss": 3.3949944972991943, "step": 1617, "token_acc": 0.259176367652237 }, { "epoch": 0.9486953972442099, "grad_norm": 1.0273019701280062, "learning_rate": 0.00013277842907385697, "loss": 3.407728672027588, "step": 1618, "token_acc": 0.25823909701602543 }, { "epoch": 0.9492817355614189, "grad_norm": 2.0780664285476567, "learning_rate": 0.00013286049237983588, "loss": 3.3907909393310547, "step": 1619, "token_acc": 0.25960532572515455 }, { "epoch": 0.9498680738786279, "grad_norm": 1.2504846098075775, "learning_rate": 0.00013294255568581476, "loss": 3.437098503112793, "step": 1620, "token_acc": 0.25519283218052286 }, { "epoch": 0.950454412195837, "grad_norm": 1.9071060161508944, "learning_rate": 0.00013302461899179367, "loss": 3.4237794876098633, "step": 1621, "token_acc": 0.25517933646542623 }, { "epoch": 0.951040750513046, "grad_norm": 1.8396084325295134, "learning_rate": 0.00013310668229777256, "loss": 3.3670296669006348, "step": 1622, "token_acc": 0.26320507894688694 }, { "epoch": 0.951627088830255, "grad_norm": 1.0676503991102975, "learning_rate": 0.00013318874560375147, "loss": 3.38714599609375, "step": 1623, "token_acc": 0.2601433215321077 }, { "epoch": 0.952213427147464, "grad_norm": 2.0407141237803064, "learning_rate": 0.00013327080890973035, "loss": 3.409733533859253, "step": 1624, "token_acc": 0.2583709690198875 }, { "epoch": 0.9527997654646732, "grad_norm": 1.264426496633966, "learning_rate": 0.00013335287221570926, "loss": 3.43257999420166, "step": 1625, "token_acc": 0.2538907178867286 }, { "epoch": 0.9533861037818822, "grad_norm": 1.4797886208242987, "learning_rate": 0.00013343493552168815, "loss": 3.385241746902466, "step": 1626, "token_acc": 0.2614268809979844 }, { "epoch": 0.9539724420990912, "grad_norm": 1.3960493925540565, "learning_rate": 0.00013351699882766703, "loss": 3.2887959480285645, "step": 1627, "token_acc": 0.27447114224883845 }, { "epoch": 0.9545587804163002, "grad_norm": 1.4895935491554844, "learning_rate": 0.00013359906213364594, "loss": 3.3930814266204834, "step": 1628, "token_acc": 0.2612280786407819 }, { "epoch": 0.9551451187335093, "grad_norm": 1.4175952400545995, "learning_rate": 0.00013368112543962483, "loss": 3.36912202835083, "step": 1629, "token_acc": 0.2625094397307488 }, { "epoch": 0.9557314570507183, "grad_norm": 1.2617666193871946, "learning_rate": 0.00013376318874560374, "loss": 3.4219913482666016, "step": 1630, "token_acc": 0.2569656660075499 }, { "epoch": 0.9563177953679273, "grad_norm": 1.8788393830980479, "learning_rate": 0.00013384525205158263, "loss": 3.369431495666504, "step": 1631, "token_acc": 0.26476876188272297 }, { "epoch": 0.9569041336851363, "grad_norm": 1.0057128508774238, "learning_rate": 0.00013392731535756154, "loss": 3.3941421508789062, "step": 1632, "token_acc": 0.2592805942916721 }, { "epoch": 0.9574904720023454, "grad_norm": 1.7114515017058227, "learning_rate": 0.00013400937866354042, "loss": 3.402280807495117, "step": 1633, "token_acc": 0.2589737730040732 }, { "epoch": 0.9580768103195544, "grad_norm": 1.1344373084871577, "learning_rate": 0.00013409144196951933, "loss": 3.433426856994629, "step": 1634, "token_acc": 0.25518585193216453 }, { "epoch": 0.9586631486367634, "grad_norm": 1.5262664679220905, "learning_rate": 0.00013417350527549822, "loss": 3.4048283100128174, "step": 1635, "token_acc": 0.26010747854088473 }, { "epoch": 0.9592494869539725, "grad_norm": 0.9996598798954925, "learning_rate": 0.00013425556858147713, "loss": 3.394796848297119, "step": 1636, "token_acc": 0.25863476439735955 }, { "epoch": 0.9598358252711815, "grad_norm": 1.9688280286815467, "learning_rate": 0.00013433763188745604, "loss": 3.3769078254699707, "step": 1637, "token_acc": 0.2644607856331523 }, { "epoch": 0.9604221635883905, "grad_norm": 1.1903855394501328, "learning_rate": 0.00013441969519343492, "loss": 3.3755381107330322, "step": 1638, "token_acc": 0.26153934257244305 }, { "epoch": 0.9610085019055995, "grad_norm": 1.7702026222573801, "learning_rate": 0.00013450175849941384, "loss": 3.4392783641815186, "step": 1639, "token_acc": 0.25585094474208986 }, { "epoch": 0.9615948402228086, "grad_norm": 1.535314503535627, "learning_rate": 0.00013458382180539272, "loss": 3.3950421810150146, "step": 1640, "token_acc": 0.25879764463493116 }, { "epoch": 0.9621811785400176, "grad_norm": 1.2740440641686512, "learning_rate": 0.00013466588511137163, "loss": 3.394374370574951, "step": 1641, "token_acc": 0.259294906070652 }, { "epoch": 0.9627675168572266, "grad_norm": 1.4624663316763096, "learning_rate": 0.00013474794841735052, "loss": 3.4390034675598145, "step": 1642, "token_acc": 0.2529298126047277 }, { "epoch": 0.9633538551744356, "grad_norm": 1.2802065447321582, "learning_rate": 0.00013483001172332943, "loss": 3.462399482727051, "step": 1643, "token_acc": 0.2524917553658991 }, { "epoch": 0.9639401934916447, "grad_norm": 1.3950324543904118, "learning_rate": 0.0001349120750293083, "loss": 3.4082980155944824, "step": 1644, "token_acc": 0.25700195497256734 }, { "epoch": 0.9645265318088537, "grad_norm": 1.4842984150979937, "learning_rate": 0.0001349941383352872, "loss": 3.358534812927246, "step": 1645, "token_acc": 0.26316448791792696 }, { "epoch": 0.9651128701260627, "grad_norm": 1.1198920698197914, "learning_rate": 0.0001350762016412661, "loss": 3.398050546646118, "step": 1646, "token_acc": 0.2582846053413669 }, { "epoch": 0.9656992084432717, "grad_norm": 2.095747294086805, "learning_rate": 0.000135158264947245, "loss": 3.326256275177002, "step": 1647, "token_acc": 0.2681122718159755 }, { "epoch": 0.9662855467604808, "grad_norm": 1.025051416285317, "learning_rate": 0.0001352403282532239, "loss": 3.3809971809387207, "step": 1648, "token_acc": 0.2628815846161387 }, { "epoch": 0.9668718850776898, "grad_norm": 1.9218514699987144, "learning_rate": 0.0001353223915592028, "loss": 3.411264419555664, "step": 1649, "token_acc": 0.2566667960310992 }, { "epoch": 0.9674582233948988, "grad_norm": 1.165383499297446, "learning_rate": 0.0001354044548651817, "loss": 3.3521289825439453, "step": 1650, "token_acc": 0.26499625262181675 }, { "epoch": 0.9680445617121078, "grad_norm": 1.5748825558118935, "learning_rate": 0.00013548651817116059, "loss": 3.4237351417541504, "step": 1651, "token_acc": 0.2564794899567264 }, { "epoch": 0.968630900029317, "grad_norm": 1.3060848671013605, "learning_rate": 0.0001355685814771395, "loss": 3.355648994445801, "step": 1652, "token_acc": 0.2655677844329881 }, { "epoch": 0.969217238346526, "grad_norm": 1.3552628428289912, "learning_rate": 0.00013565064478311838, "loss": 3.405456066131592, "step": 1653, "token_acc": 0.2572387563335105 }, { "epoch": 0.969803576663735, "grad_norm": 1.134270799255079, "learning_rate": 0.0001357327080890973, "loss": 3.3528923988342285, "step": 1654, "token_acc": 0.26592354630201565 }, { "epoch": 0.970389914980944, "grad_norm": 1.494721916776184, "learning_rate": 0.0001358147713950762, "loss": 3.3605828285217285, "step": 1655, "token_acc": 0.2634616459168713 }, { "epoch": 0.9709762532981531, "grad_norm": 1.3226596718785637, "learning_rate": 0.0001358968347010551, "loss": 3.3690621852874756, "step": 1656, "token_acc": 0.2617710714675368 }, { "epoch": 0.9715625916153621, "grad_norm": 0.9200399239416597, "learning_rate": 0.000135978898007034, "loss": 3.343637704849243, "step": 1657, "token_acc": 0.26588609575466776 }, { "epoch": 0.9721489299325711, "grad_norm": 1.1197051847172135, "learning_rate": 0.00013606096131301288, "loss": 3.391432523727417, "step": 1658, "token_acc": 0.25887319724506297 }, { "epoch": 0.9727352682497801, "grad_norm": 2.0036792000164367, "learning_rate": 0.0001361430246189918, "loss": 3.365065574645996, "step": 1659, "token_acc": 0.26424418452794507 }, { "epoch": 0.9733216065669892, "grad_norm": 1.0211775899498772, "learning_rate": 0.00013622508792497068, "loss": 3.3939638137817383, "step": 1660, "token_acc": 0.25828521596659754 }, { "epoch": 0.9739079448841982, "grad_norm": 2.5699032730336535, "learning_rate": 0.0001363071512309496, "loss": 3.3959455490112305, "step": 1661, "token_acc": 0.25860752409792936 }, { "epoch": 0.9744942832014072, "grad_norm": 1.567678565561149, "learning_rate": 0.00013638921453692848, "loss": 3.3992919921875, "step": 1662, "token_acc": 0.2567820255257223 }, { "epoch": 0.9750806215186162, "grad_norm": 1.7020167352253, "learning_rate": 0.00013647127784290736, "loss": 3.416536331176758, "step": 1663, "token_acc": 0.2571967740450754 }, { "epoch": 0.9756669598358253, "grad_norm": 1.3874585451852086, "learning_rate": 0.00013655334114888627, "loss": 3.4685044288635254, "step": 1664, "token_acc": 0.25255281221394177 }, { "epoch": 0.9762532981530343, "grad_norm": 1.2932234411064543, "learning_rate": 0.00013663540445486516, "loss": 3.390638828277588, "step": 1665, "token_acc": 0.25879916648331025 }, { "epoch": 0.9768396364702433, "grad_norm": 1.578632599646592, "learning_rate": 0.00013671746776084407, "loss": 3.426136016845703, "step": 1666, "token_acc": 0.25617224063394983 }, { "epoch": 0.9774259747874524, "grad_norm": 1.084237595221012, "learning_rate": 0.00013679953106682295, "loss": 3.3864731788635254, "step": 1667, "token_acc": 0.26110061679323754 }, { "epoch": 0.9780123131046614, "grad_norm": 1.5392218472996468, "learning_rate": 0.00013688159437280186, "loss": 3.3718767166137695, "step": 1668, "token_acc": 0.26235911023378927 }, { "epoch": 0.9785986514218704, "grad_norm": 1.051083613356463, "learning_rate": 0.00013696365767878075, "loss": 3.41086745262146, "step": 1669, "token_acc": 0.25717329787150495 }, { "epoch": 0.9791849897390794, "grad_norm": 1.258071476706584, "learning_rate": 0.00013704572098475966, "loss": 3.4070990085601807, "step": 1670, "token_acc": 0.25760990383732907 }, { "epoch": 0.9797713280562885, "grad_norm": 1.1168723465295087, "learning_rate": 0.00013712778429073854, "loss": 3.3141238689422607, "step": 1671, "token_acc": 0.2704851045252109 }, { "epoch": 0.9803576663734975, "grad_norm": 1.1693316513329723, "learning_rate": 0.00013720984759671746, "loss": 3.415781021118164, "step": 1672, "token_acc": 0.2555888666814716 }, { "epoch": 0.9809440046907065, "grad_norm": 1.4059780599864335, "learning_rate": 0.00013729191090269637, "loss": 3.3873393535614014, "step": 1673, "token_acc": 0.25857956596105264 }, { "epoch": 0.9815303430079155, "grad_norm": 1.4082246000859102, "learning_rate": 0.00013737397420867525, "loss": 3.3875980377197266, "step": 1674, "token_acc": 0.2622277439142757 }, { "epoch": 0.9821166813251246, "grad_norm": 1.542758133702623, "learning_rate": 0.00013745603751465416, "loss": 3.439617872238159, "step": 1675, "token_acc": 0.2543997878116516 }, { "epoch": 0.9827030196423336, "grad_norm": 0.786107459017105, "learning_rate": 0.00013753810082063305, "loss": 3.347785472869873, "step": 1676, "token_acc": 0.265063819436443 }, { "epoch": 0.9832893579595426, "grad_norm": 1.460477706564235, "learning_rate": 0.00013762016412661196, "loss": 3.4017410278320312, "step": 1677, "token_acc": 0.2571645640275574 }, { "epoch": 0.9838756962767516, "grad_norm": 1.3495346252343778, "learning_rate": 0.00013770222743259084, "loss": 3.396604537963867, "step": 1678, "token_acc": 0.2600650718296865 }, { "epoch": 0.9844620345939608, "grad_norm": 1.3278307049506606, "learning_rate": 0.00013778429073856976, "loss": 3.3793435096740723, "step": 1679, "token_acc": 0.2606423385094418 }, { "epoch": 0.9850483729111698, "grad_norm": 1.234983240994458, "learning_rate": 0.00013786635404454864, "loss": 3.3579092025756836, "step": 1680, "token_acc": 0.2639951965749491 }, { "epoch": 0.9856347112283788, "grad_norm": 1.473129436132707, "learning_rate": 0.00013794841735052752, "loss": 3.3994297981262207, "step": 1681, "token_acc": 0.2599592628216668 }, { "epoch": 0.9862210495455878, "grad_norm": 1.4775422083214274, "learning_rate": 0.00013803048065650644, "loss": 3.3531432151794434, "step": 1682, "token_acc": 0.2638493869295917 }, { "epoch": 0.9868073878627969, "grad_norm": 0.9724421922025391, "learning_rate": 0.00013811254396248532, "loss": 3.403754711151123, "step": 1683, "token_acc": 0.2595373937791143 }, { "epoch": 0.9873937261800059, "grad_norm": 1.3203948903903946, "learning_rate": 0.00013819460726846423, "loss": 3.3758339881896973, "step": 1684, "token_acc": 0.261963516966489 }, { "epoch": 0.9879800644972149, "grad_norm": 1.6077139309364612, "learning_rate": 0.00013827667057444312, "loss": 3.3747713565826416, "step": 1685, "token_acc": 0.26089251878471087 }, { "epoch": 0.9885664028144239, "grad_norm": 1.1762764058122244, "learning_rate": 0.00013835873388042203, "loss": 3.365751266479492, "step": 1686, "token_acc": 0.2632811895506101 }, { "epoch": 0.989152741131633, "grad_norm": 1.0317129729236414, "learning_rate": 0.0001384407971864009, "loss": 3.3322577476501465, "step": 1687, "token_acc": 0.26728618969697127 }, { "epoch": 0.989739079448842, "grad_norm": 1.4496924096116623, "learning_rate": 0.00013852286049237982, "loss": 3.387169361114502, "step": 1688, "token_acc": 0.2596358654350455 }, { "epoch": 0.990325417766051, "grad_norm": 0.9412790832358663, "learning_rate": 0.00013860492379835874, "loss": 3.386284828186035, "step": 1689, "token_acc": 0.2614359772586368 }, { "epoch": 0.99091175608326, "grad_norm": 1.3581972488065917, "learning_rate": 0.00013868698710433762, "loss": 3.3547544479370117, "step": 1690, "token_acc": 0.2654190764195818 }, { "epoch": 0.9914980944004691, "grad_norm": 1.2899137442832138, "learning_rate": 0.00013876905041031653, "loss": 3.3697428703308105, "step": 1691, "token_acc": 0.26258819335968214 }, { "epoch": 0.9920844327176781, "grad_norm": 1.4137471218165154, "learning_rate": 0.00013885111371629542, "loss": 3.412522792816162, "step": 1692, "token_acc": 0.2563867588814132 }, { "epoch": 0.9926707710348871, "grad_norm": 1.067623006279297, "learning_rate": 0.00013893317702227433, "loss": 3.3776886463165283, "step": 1693, "token_acc": 0.2613984545381115 }, { "epoch": 0.9932571093520962, "grad_norm": 1.2379028749060677, "learning_rate": 0.0001390152403282532, "loss": 3.3797287940979004, "step": 1694, "token_acc": 0.2597831009542593 }, { "epoch": 0.9938434476693052, "grad_norm": 1.1923467832475487, "learning_rate": 0.00013909730363423212, "loss": 3.3570010662078857, "step": 1695, "token_acc": 0.2651219117945139 }, { "epoch": 0.9944297859865142, "grad_norm": 1.1936438759807122, "learning_rate": 0.000139179366940211, "loss": 3.32651424407959, "step": 1696, "token_acc": 0.26843226723487323 }, { "epoch": 0.9950161243037232, "grad_norm": 1.3047963527016206, "learning_rate": 0.00013926143024618992, "loss": 3.3679392337799072, "step": 1697, "token_acc": 0.26188865350305496 }, { "epoch": 0.9956024626209323, "grad_norm": 1.3365674570676664, "learning_rate": 0.0001393434935521688, "loss": 3.3872809410095215, "step": 1698, "token_acc": 0.25937124676036155 }, { "epoch": 0.9961888009381413, "grad_norm": 1.0251022863370607, "learning_rate": 0.0001394255568581477, "loss": 3.382136821746826, "step": 1699, "token_acc": 0.2603615297453439 }, { "epoch": 0.9967751392553503, "grad_norm": 1.455271015040403, "learning_rate": 0.0001395076201641266, "loss": 3.325009822845459, "step": 1700, "token_acc": 0.26742330153529054 }, { "epoch": 0.9973614775725593, "grad_norm": 1.124126346477058, "learning_rate": 0.00013958968347010548, "loss": 3.3369157314300537, "step": 1701, "token_acc": 0.26697835093050737 }, { "epoch": 0.9979478158897684, "grad_norm": 1.2281047611347757, "learning_rate": 0.0001396717467760844, "loss": 3.333434820175171, "step": 1702, "token_acc": 0.26504732668201586 }, { "epoch": 0.9985341542069774, "grad_norm": 1.3783098049339384, "learning_rate": 0.00013975381008206328, "loss": 3.3815784454345703, "step": 1703, "token_acc": 0.2627994321347063 }, { "epoch": 0.9991204925241864, "grad_norm": 0.9796998919694639, "learning_rate": 0.0001398358733880422, "loss": 3.3336775302886963, "step": 1704, "token_acc": 0.2677462755050459 }, { "epoch": 0.9997068308413954, "grad_norm": 1.1024303780641105, "learning_rate": 0.00013991793669402108, "loss": 3.3751683235168457, "step": 1705, "token_acc": 0.26120042872454446 }, { "epoch": 1.0, "grad_norm": 1.52881410902098, "learning_rate": 0.00014, "loss": 3.2945046424865723, "step": 1706, "token_acc": 0.27328458828082036 }, { "epoch": 1.0, "eval_loss": 3.344470739364624, "eval_runtime": 21.8347, "eval_samples_per_second": 11.724, "eval_steps_per_second": 1.466, "eval_token_acc": 0.26507068718743504, "step": 1706 }, { "epoch": 1.0005863383172091, "grad_norm": 1.4307452499022542, "learning_rate": 0.00013999999967122216, "loss": 3.364198684692383, "step": 1707, "token_acc": 0.26313845212054365 }, { "epoch": 1.001172676634418, "grad_norm": 1.6717730599079252, "learning_rate": 0.00013999999868488864, "loss": 3.320115089416504, "step": 1708, "token_acc": 0.2681035332216579 }, { "epoch": 1.0017590149516271, "grad_norm": 1.005714895009179, "learning_rate": 0.00013999999704099948, "loss": 3.3462278842926025, "step": 1709, "token_acc": 0.2650901594541878 }, { "epoch": 1.0023453532688362, "grad_norm": 1.3132159576443299, "learning_rate": 0.00013999999473955467, "loss": 3.3265597820281982, "step": 1710, "token_acc": 0.26728518113281075 }, { "epoch": 1.0029316915860451, "grad_norm": 0.8945381419190027, "learning_rate": 0.00013999999178055422, "loss": 3.343167781829834, "step": 1711, "token_acc": 0.2654776076621743 }, { "epoch": 1.0035180299032542, "grad_norm": 1.5733901367013092, "learning_rate": 0.0001399999881639982, "loss": 3.439504384994507, "step": 1712, "token_acc": 0.2557064935064935 }, { "epoch": 1.0041043682204631, "grad_norm": 1.083829742141001, "learning_rate": 0.00013999998388988658, "loss": 3.3416497707366943, "step": 1713, "token_acc": 0.26527284615830293 }, { "epoch": 1.0046907065376722, "grad_norm": 1.0571573769099725, "learning_rate": 0.00013999997895821945, "loss": 3.3300538063049316, "step": 1714, "token_acc": 0.2649017793838182 }, { "epoch": 1.0052770448548813, "grad_norm": 0.9639593657514132, "learning_rate": 0.00013999997336899687, "loss": 3.297475576400757, "step": 1715, "token_acc": 0.2716643580541125 }, { "epoch": 1.0058633831720902, "grad_norm": 1.2355659873631477, "learning_rate": 0.00013999996712221884, "loss": 3.3221805095672607, "step": 1716, "token_acc": 0.26723839841203223 }, { "epoch": 1.0064497214892993, "grad_norm": 2.010721436296605, "learning_rate": 0.00013999996021788545, "loss": 3.3547515869140625, "step": 1717, "token_acc": 0.26365168331581257 }, { "epoch": 1.0070360598065085, "grad_norm": 0.7836491043342813, "learning_rate": 0.00013999995265599679, "loss": 3.3489737510681152, "step": 1718, "token_acc": 0.26232613725938925 }, { "epoch": 1.0076223981237173, "grad_norm": 1.7595445241121848, "learning_rate": 0.00013999994443655286, "loss": 3.3461060523986816, "step": 1719, "token_acc": 0.2642616950939924 }, { "epoch": 1.0082087364409265, "grad_norm": 1.2906697865151768, "learning_rate": 0.0001399999355595538, "loss": 3.37056303024292, "step": 1720, "token_acc": 0.26186812043751817 }, { "epoch": 1.0087950747581353, "grad_norm": 1.3021843035735605, "learning_rate": 0.00013999992602499965, "loss": 3.361006736755371, "step": 1721, "token_acc": 0.2641722385664616 }, { "epoch": 1.0093814130753445, "grad_norm": 1.1559058217501055, "learning_rate": 0.00013999991583289053, "loss": 3.35701060295105, "step": 1722, "token_acc": 0.26399307633906244 }, { "epoch": 1.0099677513925536, "grad_norm": 1.1128761077085572, "learning_rate": 0.00013999990498322654, "loss": 3.292968988418579, "step": 1723, "token_acc": 0.2698882235208923 }, { "epoch": 1.0105540897097625, "grad_norm": 1.1775002709096303, "learning_rate": 0.00013999989347600778, "loss": 3.372377872467041, "step": 1724, "token_acc": 0.2621188690193319 }, { "epoch": 1.0111404280269716, "grad_norm": 1.6740134002541207, "learning_rate": 0.0001399998813112343, "loss": 3.340231418609619, "step": 1725, "token_acc": 0.26563895022159967 }, { "epoch": 1.0117267663441807, "grad_norm": 1.4532001287426262, "learning_rate": 0.0001399998684889063, "loss": 3.361891269683838, "step": 1726, "token_acc": 0.2601422979695018 }, { "epoch": 1.0123131046613896, "grad_norm": 1.373754460250308, "learning_rate": 0.00013999985500902384, "loss": 3.3556835651397705, "step": 1727, "token_acc": 0.2603735736987443 }, { "epoch": 1.0128994429785987, "grad_norm": 1.476105696621109, "learning_rate": 0.0001399998408715871, "loss": 3.384720802307129, "step": 1728, "token_acc": 0.25817561935291927 }, { "epoch": 1.0134857812958076, "grad_norm": 1.1608929621074666, "learning_rate": 0.00013999982607659615, "loss": 3.330758571624756, "step": 1729, "token_acc": 0.2660935727022208 }, { "epoch": 1.0140721196130167, "grad_norm": 1.14026194396528, "learning_rate": 0.00013999981062405117, "loss": 3.3556740283966064, "step": 1730, "token_acc": 0.26242149071033655 }, { "epoch": 1.0146584579302258, "grad_norm": 1.2240118783392269, "learning_rate": 0.0001399997945139523, "loss": 3.2899863719940186, "step": 1731, "token_acc": 0.26919836179426426 }, { "epoch": 1.0152447962474347, "grad_norm": 1.4454148337700305, "learning_rate": 0.00013999977774629968, "loss": 3.2921361923217773, "step": 1732, "token_acc": 0.2704427324419113 }, { "epoch": 1.0158311345646438, "grad_norm": 0.7945831802358312, "learning_rate": 0.0001399997603210935, "loss": 3.2521042823791504, "step": 1733, "token_acc": 0.27634130406376584 }, { "epoch": 1.016417472881853, "grad_norm": 1.4596530159767214, "learning_rate": 0.00013999974223833384, "loss": 3.3401317596435547, "step": 1734, "token_acc": 0.2631723149013533 }, { "epoch": 1.0170038111990618, "grad_norm": 0.9223644796862505, "learning_rate": 0.00013999972349802096, "loss": 3.345731496810913, "step": 1735, "token_acc": 0.26548395201794533 }, { "epoch": 1.017590149516271, "grad_norm": 1.1637294576974744, "learning_rate": 0.000139999704100155, "loss": 3.3097307682037354, "step": 1736, "token_acc": 0.26968218452898274 }, { "epoch": 1.01817648783348, "grad_norm": 1.1466044474144608, "learning_rate": 0.00013999968404473616, "loss": 3.362715721130371, "step": 1737, "token_acc": 0.26228215823647727 }, { "epoch": 1.018762826150689, "grad_norm": 1.331175187208717, "learning_rate": 0.0001399996633317646, "loss": 3.3232498168945312, "step": 1738, "token_acc": 0.26689275856053685 }, { "epoch": 1.019349164467898, "grad_norm": 0.9820061971371875, "learning_rate": 0.0001399996419612405, "loss": 3.2821009159088135, "step": 1739, "token_acc": 0.27197503538999385 }, { "epoch": 1.019935502785107, "grad_norm": 1.2783143507305557, "learning_rate": 0.00013999961993316416, "loss": 3.337312698364258, "step": 1740, "token_acc": 0.2649543415317064 }, { "epoch": 1.020521841102316, "grad_norm": 1.418788593329582, "learning_rate": 0.0001399995972475357, "loss": 3.315840721130371, "step": 1741, "token_acc": 0.26926669290305655 }, { "epoch": 1.0211081794195251, "grad_norm": 1.1592718796004187, "learning_rate": 0.0001399995739043553, "loss": 3.33168625831604, "step": 1742, "token_acc": 0.2683552446427854 }, { "epoch": 1.021694517736734, "grad_norm": 1.4019478133008025, "learning_rate": 0.00013999954990362326, "loss": 3.3924148082733154, "step": 1743, "token_acc": 0.2575531872216948 }, { "epoch": 1.0222808560539431, "grad_norm": 1.4229998389239662, "learning_rate": 0.00013999952524533976, "loss": 3.324014902114868, "step": 1744, "token_acc": 0.26748387262917994 }, { "epoch": 1.0228671943711523, "grad_norm": 1.641438329660123, "learning_rate": 0.00013999949992950507, "loss": 3.3799757957458496, "step": 1745, "token_acc": 0.26034774660614685 }, { "epoch": 1.0234535326883611, "grad_norm": 0.9667915164729465, "learning_rate": 0.00013999947395611939, "loss": 3.32035493850708, "step": 1746, "token_acc": 0.2693528338987886 }, { "epoch": 1.0240398710055703, "grad_norm": 1.1901312751578843, "learning_rate": 0.00013999944732518297, "loss": 3.3056960105895996, "step": 1747, "token_acc": 0.26852144029571723 }, { "epoch": 1.0246262093227791, "grad_norm": 1.0098560142913988, "learning_rate": 0.00013999942003669607, "loss": 3.3067548274993896, "step": 1748, "token_acc": 0.2688751539900233 }, { "epoch": 1.0252125476399883, "grad_norm": 1.6576624524111359, "learning_rate": 0.00013999939209065896, "loss": 3.3458027839660645, "step": 1749, "token_acc": 0.26475199816436573 }, { "epoch": 1.0257988859571974, "grad_norm": 0.8018676153235134, "learning_rate": 0.00013999936348707188, "loss": 3.3323605060577393, "step": 1750, "token_acc": 0.26473924806900667 }, { "epoch": 1.0263852242744063, "grad_norm": 1.2913185013192388, "learning_rate": 0.0001399993342259351, "loss": 3.3125858306884766, "step": 1751, "token_acc": 0.2691325709626896 }, { "epoch": 1.0269715625916154, "grad_norm": 1.2427545375944296, "learning_rate": 0.00013999930430724891, "loss": 3.314629554748535, "step": 1752, "token_acc": 0.26635188253779285 }, { "epoch": 1.0275579009088245, "grad_norm": 1.4234333177981138, "learning_rate": 0.00013999927373101358, "loss": 3.326265811920166, "step": 1753, "token_acc": 0.26543636737947307 }, { "epoch": 1.0281442392260334, "grad_norm": 1.2328991244751653, "learning_rate": 0.00013999924249722938, "loss": 3.318502426147461, "step": 1754, "token_acc": 0.2697015066294393 }, { "epoch": 1.0287305775432425, "grad_norm": 1.1151485292462047, "learning_rate": 0.00013999921060589663, "loss": 3.350287914276123, "step": 1755, "token_acc": 0.26171984487353933 }, { "epoch": 1.0293169158604514, "grad_norm": 1.1830828332974732, "learning_rate": 0.00013999917805701564, "loss": 3.314169406890869, "step": 1756, "token_acc": 0.2693471911996869 }, { "epoch": 1.0299032541776605, "grad_norm": 1.448941355344543, "learning_rate": 0.00013999914485058666, "loss": 3.3251256942749023, "step": 1757, "token_acc": 0.2660629722101878 }, { "epoch": 1.0304895924948696, "grad_norm": 0.9251562530372269, "learning_rate": 0.0001399991109866101, "loss": 3.293213367462158, "step": 1758, "token_acc": 0.270832581471724 }, { "epoch": 1.0310759308120785, "grad_norm": 1.3304710436206886, "learning_rate": 0.00013999907646508616, "loss": 3.3478100299835205, "step": 1759, "token_acc": 0.2649337326823979 }, { "epoch": 1.0316622691292876, "grad_norm": 1.3638555434275688, "learning_rate": 0.00013999904128601524, "loss": 3.334480047225952, "step": 1760, "token_acc": 0.26499975478998405 }, { "epoch": 1.0322486074464967, "grad_norm": 1.275334425678453, "learning_rate": 0.00013999900544939763, "loss": 3.333841323852539, "step": 1761, "token_acc": 0.26601950870635405 }, { "epoch": 1.0328349457637056, "grad_norm": 1.0297007561032134, "learning_rate": 0.00013999896895523374, "loss": 3.3275198936462402, "step": 1762, "token_acc": 0.26690641358471295 }, { "epoch": 1.0334212840809147, "grad_norm": 1.1464697616979012, "learning_rate": 0.00013999893180352384, "loss": 3.327052593231201, "step": 1763, "token_acc": 0.2662565361330844 }, { "epoch": 1.0340076223981236, "grad_norm": 1.3890806466930228, "learning_rate": 0.00013999889399426827, "loss": 3.32881498336792, "step": 1764, "token_acc": 0.2653418132220133 }, { "epoch": 1.0345939607153327, "grad_norm": 1.1148908338911234, "learning_rate": 0.00013999885552746746, "loss": 3.288656711578369, "step": 1765, "token_acc": 0.2705376467160576 }, { "epoch": 1.0351802990325418, "grad_norm": 0.9804558206970546, "learning_rate": 0.00013999881640312168, "loss": 3.2847766876220703, "step": 1766, "token_acc": 0.27178461885608285 }, { "epoch": 1.0357666373497507, "grad_norm": 1.0520160748981298, "learning_rate": 0.0001399987766212314, "loss": 3.3211231231689453, "step": 1767, "token_acc": 0.26816973261552973 }, { "epoch": 1.0363529756669598, "grad_norm": 1.3013189369712828, "learning_rate": 0.00013999873618179688, "loss": 3.3616676330566406, "step": 1768, "token_acc": 0.26238565470230196 }, { "epoch": 1.036939313984169, "grad_norm": 1.1876083376368192, "learning_rate": 0.00013999869508481857, "loss": 3.290506362915039, "step": 1769, "token_acc": 0.26963750873239134 }, { "epoch": 1.0375256523013778, "grad_norm": 0.9999175436543033, "learning_rate": 0.00013999865333029688, "loss": 3.3091888427734375, "step": 1770, "token_acc": 0.2684587526399189 }, { "epoch": 1.038111990618587, "grad_norm": 1.2294061108614425, "learning_rate": 0.00013999861091823214, "loss": 3.336608409881592, "step": 1771, "token_acc": 0.26666232599325457 }, { "epoch": 1.038698328935796, "grad_norm": 1.0774548200044143, "learning_rate": 0.00013999856784862477, "loss": 3.360105037689209, "step": 1772, "token_acc": 0.2623185804194118 }, { "epoch": 1.039284667253005, "grad_norm": 1.0587649497427865, "learning_rate": 0.0001399985241214752, "loss": 3.3470427989959717, "step": 1773, "token_acc": 0.2633832636845903 }, { "epoch": 1.039871005570214, "grad_norm": 0.9852565223605219, "learning_rate": 0.00013999847973678384, "loss": 3.2943501472473145, "step": 1774, "token_acc": 0.2704324064335058 }, { "epoch": 1.040457343887423, "grad_norm": 1.1513684988755293, "learning_rate": 0.00013999843469455104, "loss": 3.3257832527160645, "step": 1775, "token_acc": 0.266993504044675 }, { "epoch": 1.041043682204632, "grad_norm": 0.8900799659357775, "learning_rate": 0.0001399983889947773, "loss": 3.354994773864746, "step": 1776, "token_acc": 0.2626962462807357 }, { "epoch": 1.0416300205218412, "grad_norm": 0.7730218706339016, "learning_rate": 0.00013999834263746298, "loss": 3.321240186691284, "step": 1777, "token_acc": 0.2660936202822658 }, { "epoch": 1.04221635883905, "grad_norm": 0.833763646708899, "learning_rate": 0.0001399982956226086, "loss": 3.3348751068115234, "step": 1778, "token_acc": 0.2657338734967251 }, { "epoch": 1.0428026971562592, "grad_norm": 1.1107716362616493, "learning_rate": 0.00013999824795021454, "loss": 3.301572799682617, "step": 1779, "token_acc": 0.2695578032959372 }, { "epoch": 1.0433890354734683, "grad_norm": 1.4856542278496963, "learning_rate": 0.00013999819962028125, "loss": 3.3440115451812744, "step": 1780, "token_acc": 0.2624542820679986 }, { "epoch": 1.0439753737906772, "grad_norm": 1.040143884670671, "learning_rate": 0.00013999815063280921, "loss": 3.319406509399414, "step": 1781, "token_acc": 0.26633087930714633 }, { "epoch": 1.0445617121078863, "grad_norm": 1.2654119906126347, "learning_rate": 0.0001399981009877989, "loss": 3.2569477558135986, "step": 1782, "token_acc": 0.27400824709301574 }, { "epoch": 1.0451480504250952, "grad_norm": 0.9213956527880066, "learning_rate": 0.00013999805068525068, "loss": 3.323514461517334, "step": 1783, "token_acc": 0.2672733729671408 }, { "epoch": 1.0457343887423043, "grad_norm": 1.0725791999185954, "learning_rate": 0.00013999799972516517, "loss": 3.2772116661071777, "step": 1784, "token_acc": 0.2731096022657571 }, { "epoch": 1.0463207270595134, "grad_norm": 1.205998028856346, "learning_rate": 0.00013999794810754275, "loss": 3.3316054344177246, "step": 1785, "token_acc": 0.2652874581482302 }, { "epoch": 1.0469070653767223, "grad_norm": 1.1401862560703175, "learning_rate": 0.00013999789583238394, "loss": 3.3418891429901123, "step": 1786, "token_acc": 0.26512752976590426 }, { "epoch": 1.0474934036939314, "grad_norm": 0.9815506604671348, "learning_rate": 0.0001399978428996892, "loss": 3.3635711669921875, "step": 1787, "token_acc": 0.26306641578174056 }, { "epoch": 1.0480797420111405, "grad_norm": 1.1673272600976379, "learning_rate": 0.00013999778930945907, "loss": 3.317405939102173, "step": 1788, "token_acc": 0.26699733818087384 }, { "epoch": 1.0486660803283494, "grad_norm": 1.3283198698217937, "learning_rate": 0.000139997735061694, "loss": 3.3545851707458496, "step": 1789, "token_acc": 0.2629396898175641 }, { "epoch": 1.0492524186455585, "grad_norm": 0.8064309407733516, "learning_rate": 0.00013999768015639458, "loss": 3.2974672317504883, "step": 1790, "token_acc": 0.27078108907608955 }, { "epoch": 1.0498387569627674, "grad_norm": 1.1086236393177713, "learning_rate": 0.00013999762459356125, "loss": 3.3527579307556152, "step": 1791, "token_acc": 0.26184795633071495 }, { "epoch": 1.0504250952799765, "grad_norm": 1.1115559787350677, "learning_rate": 0.00013999756837319456, "loss": 3.3284459114074707, "step": 1792, "token_acc": 0.26580342656636935 }, { "epoch": 1.0510114335971856, "grad_norm": 0.9671510985756274, "learning_rate": 0.00013999751149529503, "loss": 3.2788405418395996, "step": 1793, "token_acc": 0.2719465648854962 }, { "epoch": 1.0515977719143945, "grad_norm": 1.5588599662360043, "learning_rate": 0.0001399974539598632, "loss": 3.346158981323242, "step": 1794, "token_acc": 0.26401828262632787 }, { "epoch": 1.0521841102316036, "grad_norm": 0.8747284511001189, "learning_rate": 0.00013999739576689963, "loss": 3.3254544734954834, "step": 1795, "token_acc": 0.26666192155808277 }, { "epoch": 1.0527704485488127, "grad_norm": 1.1488101779700044, "learning_rate": 0.00013999733691640487, "loss": 3.31069278717041, "step": 1796, "token_acc": 0.2670141648965976 }, { "epoch": 1.0533567868660216, "grad_norm": 0.7678092789412517, "learning_rate": 0.0001399972774083794, "loss": 3.296076774597168, "step": 1797, "token_acc": 0.2692060439129685 }, { "epoch": 1.0539431251832307, "grad_norm": 1.0599135325538214, "learning_rate": 0.00013999721724282388, "loss": 3.3422317504882812, "step": 1798, "token_acc": 0.26471112275645226 }, { "epoch": 1.0545294635004399, "grad_norm": 1.2805634509628596, "learning_rate": 0.0001399971564197388, "loss": 3.3032217025756836, "step": 1799, "token_acc": 0.26969189086366185 }, { "epoch": 1.0551158018176487, "grad_norm": 0.9879500681445028, "learning_rate": 0.00013999709493912475, "loss": 3.295577049255371, "step": 1800, "token_acc": 0.26896933982904986 }, { "epoch": 1.0557021401348579, "grad_norm": 1.01711912746615, "learning_rate": 0.00013999703280098236, "loss": 3.357044219970703, "step": 1801, "token_acc": 0.2624168178676903 }, { "epoch": 1.0562884784520667, "grad_norm": 0.9297610177216736, "learning_rate": 0.00013999697000531214, "loss": 3.3503241539001465, "step": 1802, "token_acc": 0.2630404081818596 }, { "epoch": 1.0568748167692759, "grad_norm": 1.1275169653231805, "learning_rate": 0.0001399969065521147, "loss": 3.3427200317382812, "step": 1803, "token_acc": 0.2631673201880645 }, { "epoch": 1.057461155086485, "grad_norm": 1.5168705046673456, "learning_rate": 0.00013999684244139066, "loss": 3.3364458084106445, "step": 1804, "token_acc": 0.2648679766453736 }, { "epoch": 1.0580474934036939, "grad_norm": 0.7758619041488104, "learning_rate": 0.0001399967776731406, "loss": 3.321136474609375, "step": 1805, "token_acc": 0.2664464859205005 }, { "epoch": 1.058633831720903, "grad_norm": 1.3018297683569777, "learning_rate": 0.00013999671224736512, "loss": 3.308809280395508, "step": 1806, "token_acc": 0.2677616782301705 }, { "epoch": 1.059220170038112, "grad_norm": 1.3814469554642985, "learning_rate": 0.00013999664616406486, "loss": 3.313599109649658, "step": 1807, "token_acc": 0.26609996474556297 }, { "epoch": 1.059806508355321, "grad_norm": 0.7960372984128132, "learning_rate": 0.00013999657942324043, "loss": 3.304856300354004, "step": 1808, "token_acc": 0.2698606652569042 }, { "epoch": 1.06039284667253, "grad_norm": 1.4294149614854543, "learning_rate": 0.00013999651202489246, "loss": 3.336911916732788, "step": 1809, "token_acc": 0.26349588953227615 }, { "epoch": 1.060979184989739, "grad_norm": 0.751744999311584, "learning_rate": 0.0001399964439690216, "loss": 3.327693462371826, "step": 1810, "token_acc": 0.2646924043333196 }, { "epoch": 1.061565523306948, "grad_norm": 0.9909147085150155, "learning_rate": 0.00013999637525562847, "loss": 3.3280014991760254, "step": 1811, "token_acc": 0.2655039607297313 }, { "epoch": 1.0621518616241572, "grad_norm": 1.3325706766957, "learning_rate": 0.00013999630588471367, "loss": 3.3167190551757812, "step": 1812, "token_acc": 0.2676403597341912 }, { "epoch": 1.062738199941366, "grad_norm": 1.0086630479858323, "learning_rate": 0.00013999623585627795, "loss": 3.305112838745117, "step": 1813, "token_acc": 0.26868874043009705 }, { "epoch": 1.0633245382585752, "grad_norm": 1.089611211061808, "learning_rate": 0.0001399961651703219, "loss": 3.343794345855713, "step": 1814, "token_acc": 0.2644560030085224 }, { "epoch": 1.0639108765757843, "grad_norm": 1.071721484523487, "learning_rate": 0.00013999609382684617, "loss": 3.3233747482299805, "step": 1815, "token_acc": 0.26702088422538073 }, { "epoch": 1.0644972148929932, "grad_norm": 1.0832754803123985, "learning_rate": 0.0001399960218258515, "loss": 3.331878185272217, "step": 1816, "token_acc": 0.26516927479754004 }, { "epoch": 1.0650835532102023, "grad_norm": 1.1567334820171873, "learning_rate": 0.0001399959491673385, "loss": 3.2944395542144775, "step": 1817, "token_acc": 0.26997135208754053 }, { "epoch": 1.0656698915274112, "grad_norm": 1.0439031025417007, "learning_rate": 0.0001399958758513079, "loss": 3.322361946105957, "step": 1818, "token_acc": 0.2668553888481103 }, { "epoch": 1.0662562298446203, "grad_norm": 1.613010241333247, "learning_rate": 0.00013999580187776034, "loss": 3.3395676612854004, "step": 1819, "token_acc": 0.2616043627724483 }, { "epoch": 1.0668425681618294, "grad_norm": 0.7473231670673615, "learning_rate": 0.00013999572724669656, "loss": 3.3388588428497314, "step": 1820, "token_acc": 0.26432208865170653 }, { "epoch": 1.0674289064790383, "grad_norm": 1.1500927404570311, "learning_rate": 0.00013999565195811723, "loss": 3.3333349227905273, "step": 1821, "token_acc": 0.26493488741331905 }, { "epoch": 1.0680152447962474, "grad_norm": 1.33547826635988, "learning_rate": 0.00013999557601202306, "loss": 3.330199956893921, "step": 1822, "token_acc": 0.26604604589340675 }, { "epoch": 1.0686015831134565, "grad_norm": 1.0368455000288705, "learning_rate": 0.00013999549940841479, "loss": 3.294459819793701, "step": 1823, "token_acc": 0.26879343102816744 }, { "epoch": 1.0691879214306654, "grad_norm": 1.2093250485907292, "learning_rate": 0.00013999542214729313, "loss": 3.3290820121765137, "step": 1824, "token_acc": 0.2657802680473478 }, { "epoch": 1.0697742597478745, "grad_norm": 0.8713838553403832, "learning_rate": 0.0001399953442286588, "loss": 3.2968857288360596, "step": 1825, "token_acc": 0.26941374382721633 }, { "epoch": 1.0703605980650837, "grad_norm": 1.4551536889062209, "learning_rate": 0.00013999526565251254, "loss": 3.2816882133483887, "step": 1826, "token_acc": 0.2716676066929874 }, { "epoch": 1.0709469363822925, "grad_norm": 0.8507611938412795, "learning_rate": 0.00013999518641885506, "loss": 3.2701101303100586, "step": 1827, "token_acc": 0.27357226394435313 }, { "epoch": 1.0715332746995017, "grad_norm": 1.5293370848418248, "learning_rate": 0.00013999510652768713, "loss": 3.2855193614959717, "step": 1828, "token_acc": 0.26786718169123913 }, { "epoch": 1.0721196130167105, "grad_norm": 0.8636019805976295, "learning_rate": 0.0001399950259790095, "loss": 3.3260278701782227, "step": 1829, "token_acc": 0.26390238891127915 }, { "epoch": 1.0727059513339197, "grad_norm": 1.0369510616555084, "learning_rate": 0.00013999494477282288, "loss": 3.3669614791870117, "step": 1830, "token_acc": 0.25850625699096197 }, { "epoch": 1.0732922896511288, "grad_norm": 1.2398367043044685, "learning_rate": 0.0001399948629091281, "loss": 3.2878429889678955, "step": 1831, "token_acc": 0.2720432207594776 }, { "epoch": 1.0738786279683377, "grad_norm": 1.273925486807277, "learning_rate": 0.00013999478038792594, "loss": 3.2479612827301025, "step": 1832, "token_acc": 0.27589032261474944 }, { "epoch": 1.0744649662855468, "grad_norm": 0.9146816916096273, "learning_rate": 0.0001399946972092171, "loss": 3.301370859146118, "step": 1833, "token_acc": 0.26903573100327666 }, { "epoch": 1.0750513046027559, "grad_norm": 1.6017705278626457, "learning_rate": 0.0001399946133730024, "loss": 3.281545639038086, "step": 1834, "token_acc": 0.2708565469747056 }, { "epoch": 1.0756376429199648, "grad_norm": 0.9833885208929952, "learning_rate": 0.00013999452887928263, "loss": 3.338879346847534, "step": 1835, "token_acc": 0.2650875780920624 }, { "epoch": 1.0762239812371739, "grad_norm": 1.3698570040137303, "learning_rate": 0.00013999444372805858, "loss": 3.2998180389404297, "step": 1836, "token_acc": 0.26981970184550835 }, { "epoch": 1.0768103195543828, "grad_norm": 1.2877304166096115, "learning_rate": 0.00013999435791933103, "loss": 3.348214864730835, "step": 1837, "token_acc": 0.26316915060853335 }, { "epoch": 1.077396657871592, "grad_norm": 1.1564317398648107, "learning_rate": 0.00013999427145310083, "loss": 3.303995132446289, "step": 1838, "token_acc": 0.2669063967215784 }, { "epoch": 1.077982996188801, "grad_norm": 1.222992538862914, "learning_rate": 0.00013999418432936877, "loss": 3.3253049850463867, "step": 1839, "token_acc": 0.2655641017688594 }, { "epoch": 1.07856933450601, "grad_norm": 0.8980403231458256, "learning_rate": 0.00013999409654813564, "loss": 3.3390817642211914, "step": 1840, "token_acc": 0.2630144347902473 }, { "epoch": 1.079155672823219, "grad_norm": 1.1559855607120255, "learning_rate": 0.0001399940081094023, "loss": 3.277944803237915, "step": 1841, "token_acc": 0.2717599401398457 }, { "epoch": 1.0797420111404281, "grad_norm": 0.8730834210500212, "learning_rate": 0.00013999391901316961, "loss": 3.2907848358154297, "step": 1842, "token_acc": 0.27076429388477885 }, { "epoch": 1.080328349457637, "grad_norm": 0.8635917251124942, "learning_rate": 0.00013999382925943834, "loss": 3.3359527587890625, "step": 1843, "token_acc": 0.2648700550163284 }, { "epoch": 1.0809146877748461, "grad_norm": 0.6098258875311179, "learning_rate": 0.00013999373884820935, "loss": 3.3045454025268555, "step": 1844, "token_acc": 0.26686216213283565 }, { "epoch": 1.081501026092055, "grad_norm": 1.1251604261023664, "learning_rate": 0.00013999364777948352, "loss": 3.290310859680176, "step": 1845, "token_acc": 0.26986378890570506 }, { "epoch": 1.0820873644092641, "grad_norm": 1.2461749520837204, "learning_rate": 0.0001399935560532617, "loss": 3.252135992050171, "step": 1846, "token_acc": 0.27515763637857377 }, { "epoch": 1.0826737027264732, "grad_norm": 0.8100914081760897, "learning_rate": 0.00013999346366954472, "loss": 3.2760884761810303, "step": 1847, "token_acc": 0.2718901671404027 }, { "epoch": 1.0832600410436821, "grad_norm": 1.2656913646658259, "learning_rate": 0.00013999337062833346, "loss": 3.2524685859680176, "step": 1848, "token_acc": 0.2741837960437387 }, { "epoch": 1.0838463793608912, "grad_norm": 0.7247251981099396, "learning_rate": 0.0001399932769296288, "loss": 3.2182953357696533, "step": 1849, "token_acc": 0.28022362706776244 }, { "epoch": 1.0844327176781003, "grad_norm": 0.908585700878057, "learning_rate": 0.00013999318257343162, "loss": 3.3057150840759277, "step": 1850, "token_acc": 0.26736236697724414 }, { "epoch": 1.0850190559953092, "grad_norm": 1.1017466213489573, "learning_rate": 0.0001399930875597428, "loss": 3.2950050830841064, "step": 1851, "token_acc": 0.2686912610464301 }, { "epoch": 1.0856053943125183, "grad_norm": 1.1491698301440563, "learning_rate": 0.00013999299188856328, "loss": 3.301602363586426, "step": 1852, "token_acc": 0.27003996254691753 }, { "epoch": 1.0861917326297275, "grad_norm": 0.9181808351619032, "learning_rate": 0.00013999289555989387, "loss": 3.316830635070801, "step": 1853, "token_acc": 0.2664258454991918 }, { "epoch": 1.0867780709469363, "grad_norm": 1.0216041658426336, "learning_rate": 0.00013999279857373556, "loss": 3.343573808670044, "step": 1854, "token_acc": 0.2636155454515283 }, { "epoch": 1.0873644092641455, "grad_norm": 1.2934262879052087, "learning_rate": 0.00013999270093008922, "loss": 3.25911283493042, "step": 1855, "token_acc": 0.2745379775058217 }, { "epoch": 1.0879507475813543, "grad_norm": 0.6503468020493017, "learning_rate": 0.00013999260262895574, "loss": 3.306527614593506, "step": 1856, "token_acc": 0.26668879982925847 }, { "epoch": 1.0885370858985635, "grad_norm": 0.9405648187380918, "learning_rate": 0.0001399925036703361, "loss": 3.364485740661621, "step": 1857, "token_acc": 0.2604449170868718 }, { "epoch": 1.0891234242157726, "grad_norm": 1.497570455005679, "learning_rate": 0.0001399924040542312, "loss": 3.309842824935913, "step": 1858, "token_acc": 0.2660288891941558 }, { "epoch": 1.0897097625329815, "grad_norm": 0.7199694119735058, "learning_rate": 0.00013999230378064197, "loss": 3.308772087097168, "step": 1859, "token_acc": 0.26789840644914703 }, { "epoch": 1.0902961008501906, "grad_norm": 1.5180060422852206, "learning_rate": 0.00013999220284956936, "loss": 3.3184075355529785, "step": 1860, "token_acc": 0.26377301378245727 }, { "epoch": 1.0908824391673997, "grad_norm": 0.7701802885184044, "learning_rate": 0.00013999210126101433, "loss": 3.3614258766174316, "step": 1861, "token_acc": 0.26163085084334 }, { "epoch": 1.0914687774846086, "grad_norm": 1.2566696807955915, "learning_rate": 0.00013999199901497782, "loss": 3.2930243015289307, "step": 1862, "token_acc": 0.27156288394509687 }, { "epoch": 1.0920551158018177, "grad_norm": 0.9182559800217669, "learning_rate": 0.00013999189611146081, "loss": 3.31693172454834, "step": 1863, "token_acc": 0.2675856386356599 }, { "epoch": 1.0926414541190266, "grad_norm": 1.1426842117053462, "learning_rate": 0.00013999179255046423, "loss": 3.252509593963623, "step": 1864, "token_acc": 0.274149438645499 }, { "epoch": 1.0932277924362357, "grad_norm": 1.1106417396325976, "learning_rate": 0.0001399916883319891, "loss": 3.261803150177002, "step": 1865, "token_acc": 0.2728094106750961 }, { "epoch": 1.0938141307534448, "grad_norm": 0.9954299856150943, "learning_rate": 0.00013999158345603637, "loss": 3.2489876747131348, "step": 1866, "token_acc": 0.2752487289506927 }, { "epoch": 1.0944004690706537, "grad_norm": 0.9836350810312638, "learning_rate": 0.000139991477922607, "loss": 3.2866289615631104, "step": 1867, "token_acc": 0.2723171984892768 }, { "epoch": 1.0949868073878628, "grad_norm": 0.9507489955610542, "learning_rate": 0.00013999137173170202, "loss": 3.323457956314087, "step": 1868, "token_acc": 0.2655584383140287 }, { "epoch": 1.095573145705072, "grad_norm": 0.8450988642634238, "learning_rate": 0.0001399912648833224, "loss": 3.3118367195129395, "step": 1869, "token_acc": 0.26796890995653716 }, { "epoch": 1.0961594840222808, "grad_norm": 0.9351306648347011, "learning_rate": 0.0001399911573774692, "loss": 3.322701930999756, "step": 1870, "token_acc": 0.26708102472918555 }, { "epoch": 1.09674582233949, "grad_norm": 0.9842668059515658, "learning_rate": 0.00013999104921414335, "loss": 3.2851157188415527, "step": 1871, "token_acc": 0.27061894851619056 }, { "epoch": 1.0973321606566988, "grad_norm": 1.166577425011579, "learning_rate": 0.00013999094039334595, "loss": 3.2874763011932373, "step": 1872, "token_acc": 0.2716030379007073 }, { "epoch": 1.097918498973908, "grad_norm": 1.0197718254792618, "learning_rate": 0.00013999083091507797, "loss": 3.2692391872406006, "step": 1873, "token_acc": 0.2719374756427013 }, { "epoch": 1.098504837291117, "grad_norm": 0.8781846958233227, "learning_rate": 0.00013999072077934042, "loss": 3.2783310413360596, "step": 1874, "token_acc": 0.2736820895788859 }, { "epoch": 1.099091175608326, "grad_norm": 1.1599809443604552, "learning_rate": 0.00013999060998613438, "loss": 3.3003063201904297, "step": 1875, "token_acc": 0.26700517664958484 }, { "epoch": 1.099677513925535, "grad_norm": 0.8997034460378547, "learning_rate": 0.00013999049853546087, "loss": 3.331218957901001, "step": 1876, "token_acc": 0.2643905678492462 }, { "epoch": 1.1002638522427441, "grad_norm": 0.9296153918164964, "learning_rate": 0.00013999038642732093, "loss": 3.241473436355591, "step": 1877, "token_acc": 0.2776678517654293 }, { "epoch": 1.100850190559953, "grad_norm": 1.3537777951583687, "learning_rate": 0.00013999027366171565, "loss": 3.2870283126831055, "step": 1878, "token_acc": 0.27148064527024396 }, { "epoch": 1.1014365288771621, "grad_norm": 0.6647397589705146, "learning_rate": 0.00013999016023864602, "loss": 3.379164218902588, "step": 1879, "token_acc": 0.2570523384304858 }, { "epoch": 1.1020228671943713, "grad_norm": 0.9822821036040421, "learning_rate": 0.00013999004615811319, "loss": 3.259047508239746, "step": 1880, "token_acc": 0.27398084581172627 }, { "epoch": 1.1026092055115801, "grad_norm": 1.0733352997023877, "learning_rate": 0.00013998993142011818, "loss": 3.3657634258270264, "step": 1881, "token_acc": 0.26218121938861677 }, { "epoch": 1.1031955438287893, "grad_norm": 1.0465187261581401, "learning_rate": 0.00013998981602466204, "loss": 3.271768093109131, "step": 1882, "token_acc": 0.2705745599468615 }, { "epoch": 1.1037818821459981, "grad_norm": 1.4709603791923103, "learning_rate": 0.00013998969997174593, "loss": 3.3049778938293457, "step": 1883, "token_acc": 0.26826877922290054 }, { "epoch": 1.1043682204632073, "grad_norm": 0.7146593027189652, "learning_rate": 0.0001399895832613709, "loss": 3.348053455352783, "step": 1884, "token_acc": 0.2631329418936027 }, { "epoch": 1.1049545587804164, "grad_norm": 1.1079262161660524, "learning_rate": 0.00013998946589353803, "loss": 3.3031368255615234, "step": 1885, "token_acc": 0.26803077990179847 }, { "epoch": 1.1055408970976253, "grad_norm": 0.9520136137114626, "learning_rate": 0.00013998934786824845, "loss": 3.2916035652160645, "step": 1886, "token_acc": 0.2701066001969407 }, { "epoch": 1.1061272354148344, "grad_norm": 1.340398865809543, "learning_rate": 0.00013998922918550326, "loss": 3.2986459732055664, "step": 1887, "token_acc": 0.2704262373105026 }, { "epoch": 1.1067135737320435, "grad_norm": 0.8223712073681388, "learning_rate": 0.00013998910984530357, "loss": 3.3266139030456543, "step": 1888, "token_acc": 0.26573040198102355 }, { "epoch": 1.1072999120492524, "grad_norm": 0.8724142106525006, "learning_rate": 0.0001399889898476505, "loss": 3.324303150177002, "step": 1889, "token_acc": 0.26524718436327904 }, { "epoch": 1.1078862503664615, "grad_norm": 0.8455126963491224, "learning_rate": 0.00013998886919254518, "loss": 3.272357940673828, "step": 1890, "token_acc": 0.2717668141953352 }, { "epoch": 1.1084725886836704, "grad_norm": 1.2108692741823819, "learning_rate": 0.00013998874787998875, "loss": 3.323394298553467, "step": 1891, "token_acc": 0.26520179699009366 }, { "epoch": 1.1090589270008795, "grad_norm": 0.7913665158157078, "learning_rate": 0.00013998862590998236, "loss": 3.2900800704956055, "step": 1892, "token_acc": 0.26916836175261033 }, { "epoch": 1.1096452653180886, "grad_norm": 0.8356301268618571, "learning_rate": 0.0001399885032825271, "loss": 3.289419412612915, "step": 1893, "token_acc": 0.26937441154932523 }, { "epoch": 1.1102316036352975, "grad_norm": 0.684766429557567, "learning_rate": 0.0001399883799976242, "loss": 3.287341833114624, "step": 1894, "token_acc": 0.2711999360954857 }, { "epoch": 1.1108179419525066, "grad_norm": 0.9783662583349133, "learning_rate": 0.00013998825605527476, "loss": 3.303997039794922, "step": 1895, "token_acc": 0.2671409038462516 }, { "epoch": 1.1114042802697157, "grad_norm": 1.0050496371266775, "learning_rate": 0.00013998813145547998, "loss": 3.3054754734039307, "step": 1896, "token_acc": 0.26853206534991464 }, { "epoch": 1.1119906185869246, "grad_norm": 1.3996467425054169, "learning_rate": 0.00013998800619824102, "loss": 3.289036273956299, "step": 1897, "token_acc": 0.2697812952526949 }, { "epoch": 1.1125769569041337, "grad_norm": 0.9218744468740792, "learning_rate": 0.00013998788028355905, "loss": 3.2724738121032715, "step": 1898, "token_acc": 0.27360321231602824 }, { "epoch": 1.1131632952213426, "grad_norm": 1.1667947411399546, "learning_rate": 0.00013998775371143522, "loss": 3.3260960578918457, "step": 1899, "token_acc": 0.26720866923828845 }, { "epoch": 1.1137496335385517, "grad_norm": 1.0971500792293096, "learning_rate": 0.0001399876264818708, "loss": 3.244976043701172, "step": 1900, "token_acc": 0.27503414508439633 }, { "epoch": 1.1143359718557608, "grad_norm": 0.9848058593145909, "learning_rate": 0.0001399874985948669, "loss": 3.330976963043213, "step": 1901, "token_acc": 0.26608724283276586 }, { "epoch": 1.1149223101729697, "grad_norm": 0.7727261311315157, "learning_rate": 0.0001399873700504248, "loss": 3.3092007637023926, "step": 1902, "token_acc": 0.26821043457057847 }, { "epoch": 1.1155086484901788, "grad_norm": 0.8764433813856298, "learning_rate": 0.00013998724084854564, "loss": 3.258882999420166, "step": 1903, "token_acc": 0.27473534278397616 }, { "epoch": 1.116094986807388, "grad_norm": 1.0004384674682756, "learning_rate": 0.00013998711098923066, "loss": 3.3339600563049316, "step": 1904, "token_acc": 0.2648401704349408 }, { "epoch": 1.1166813251245968, "grad_norm": 0.9693524331929907, "learning_rate": 0.00013998698047248108, "loss": 3.2253172397613525, "step": 1905, "token_acc": 0.27831982631635677 }, { "epoch": 1.117267663441806, "grad_norm": 0.88397228043664, "learning_rate": 0.00013998684929829815, "loss": 3.320296287536621, "step": 1906, "token_acc": 0.2665719137228772 }, { "epoch": 1.117854001759015, "grad_norm": 1.3086384267931546, "learning_rate": 0.00013998671746668305, "loss": 3.3414483070373535, "step": 1907, "token_acc": 0.26346636029839854 }, { "epoch": 1.118440340076224, "grad_norm": 1.2290662260882046, "learning_rate": 0.00013998658497763706, "loss": 3.290722608566284, "step": 1908, "token_acc": 0.26859515871191675 }, { "epoch": 1.119026678393433, "grad_norm": 0.9739474848104016, "learning_rate": 0.00013998645183116142, "loss": 3.252053737640381, "step": 1909, "token_acc": 0.27211002740295326 }, { "epoch": 1.119613016710642, "grad_norm": 0.9609520537502815, "learning_rate": 0.00013998631802725737, "loss": 3.2660207748413086, "step": 1910, "token_acc": 0.2732543827728664 }, { "epoch": 1.120199355027851, "grad_norm": 0.8863912761997428, "learning_rate": 0.00013998618356592614, "loss": 3.3199892044067383, "step": 1911, "token_acc": 0.2651269738760885 }, { "epoch": 1.1207856933450602, "grad_norm": 0.962278852525161, "learning_rate": 0.00013998604844716906, "loss": 3.3091344833374023, "step": 1912, "token_acc": 0.2663750680714458 }, { "epoch": 1.121372031662269, "grad_norm": 1.0154201273351295, "learning_rate": 0.00013998591267098736, "loss": 3.284541130065918, "step": 1913, "token_acc": 0.2716598255515649 }, { "epoch": 1.1219583699794782, "grad_norm": 0.820760376225301, "learning_rate": 0.0001399857762373823, "loss": 3.3344931602478027, "step": 1914, "token_acc": 0.2649089155763059 }, { "epoch": 1.1225447082966873, "grad_norm": 0.8550779949304272, "learning_rate": 0.0001399856391463552, "loss": 3.297438144683838, "step": 1915, "token_acc": 0.26884452383821134 }, { "epoch": 1.1231310466138962, "grad_norm": 0.793883068887793, "learning_rate": 0.00013998550139790732, "loss": 3.295935869216919, "step": 1916, "token_acc": 0.26726366107897076 }, { "epoch": 1.1237173849311053, "grad_norm": 0.6925294249109684, "learning_rate": 0.00013998536299203996, "loss": 3.2794947624206543, "step": 1917, "token_acc": 0.2713245132340919 }, { "epoch": 1.1243037232483142, "grad_norm": 0.8790466568877678, "learning_rate": 0.00013998522392875441, "loss": 3.3019251823425293, "step": 1918, "token_acc": 0.26880011596972975 }, { "epoch": 1.1248900615655233, "grad_norm": 1.142344284370931, "learning_rate": 0.000139985084208052, "loss": 3.3230037689208984, "step": 1919, "token_acc": 0.2657927790204032 }, { "epoch": 1.1254763998827324, "grad_norm": 1.0506056379450228, "learning_rate": 0.000139984943829934, "loss": 3.3077712059020996, "step": 1920, "token_acc": 0.26848174746787623 }, { "epoch": 1.1260627381999413, "grad_norm": 1.0067958912755068, "learning_rate": 0.00013998480279440182, "loss": 3.271796226501465, "step": 1921, "token_acc": 0.2695801832670575 }, { "epoch": 1.1266490765171504, "grad_norm": 0.909357420572101, "learning_rate": 0.00013998466110145665, "loss": 3.307565212249756, "step": 1922, "token_acc": 0.2700992003785921 }, { "epoch": 1.1272354148343595, "grad_norm": 0.8916113891904401, "learning_rate": 0.00013998451875109994, "loss": 3.2779183387756348, "step": 1923, "token_acc": 0.27172233892995934 }, { "epoch": 1.1278217531515684, "grad_norm": 1.0548486588114818, "learning_rate": 0.00013998437574333297, "loss": 3.287111759185791, "step": 1924, "token_acc": 0.27082784265118404 }, { "epoch": 1.1284080914687775, "grad_norm": 0.767115472119786, "learning_rate": 0.00013998423207815713, "loss": 3.2825584411621094, "step": 1925, "token_acc": 0.27224032385466035 }, { "epoch": 1.1289944297859864, "grad_norm": 0.9290370087135374, "learning_rate": 0.00013998408775557368, "loss": 3.297276496887207, "step": 1926, "token_acc": 0.2677517185604529 }, { "epoch": 1.1295807681031955, "grad_norm": 1.397897744791598, "learning_rate": 0.00013998394277558405, "loss": 3.290966510772705, "step": 1927, "token_acc": 0.2690842094188711 }, { "epoch": 1.1301671064204046, "grad_norm": 0.8497468891904235, "learning_rate": 0.0001399837971381896, "loss": 3.2938361167907715, "step": 1928, "token_acc": 0.2695718717470395 }, { "epoch": 1.1307534447376135, "grad_norm": 0.816515040224627, "learning_rate": 0.00013998365084339168, "loss": 3.245223045349121, "step": 1929, "token_acc": 0.27621646654962284 }, { "epoch": 1.1313397830548226, "grad_norm": 0.6734272779440157, "learning_rate": 0.00013998350389119162, "loss": 3.2750918865203857, "step": 1930, "token_acc": 0.27203950961473444 }, { "epoch": 1.1319261213720317, "grad_norm": 0.618889470596775, "learning_rate": 0.00013998335628159086, "loss": 3.2781543731689453, "step": 1931, "token_acc": 0.27140484462381553 }, { "epoch": 1.1325124596892406, "grad_norm": 0.7517126522012283, "learning_rate": 0.00013998320801459081, "loss": 3.278299331665039, "step": 1932, "token_acc": 0.2724875716136167 }, { "epoch": 1.1330987980064497, "grad_norm": 0.8107779686763992, "learning_rate": 0.0001399830590901928, "loss": 3.301764488220215, "step": 1933, "token_acc": 0.2683763002444233 }, { "epoch": 1.1336851363236589, "grad_norm": 0.7971392793476056, "learning_rate": 0.00013998290950839826, "loss": 3.3297338485717773, "step": 1934, "token_acc": 0.26359942396974473 }, { "epoch": 1.1342714746408677, "grad_norm": 0.7216806327999759, "learning_rate": 0.00013998275926920857, "loss": 3.2673377990722656, "step": 1935, "token_acc": 0.2733427894474017 }, { "epoch": 1.1348578129580769, "grad_norm": 0.8779734375967914, "learning_rate": 0.0001399826083726252, "loss": 3.258728504180908, "step": 1936, "token_acc": 0.2753815304630046 }, { "epoch": 1.1354441512752858, "grad_norm": 1.1227995532520973, "learning_rate": 0.00013998245681864948, "loss": 3.294938087463379, "step": 1937, "token_acc": 0.2678384981900476 }, { "epoch": 1.1360304895924949, "grad_norm": 1.035902863609576, "learning_rate": 0.0001399823046072829, "loss": 3.2935845851898193, "step": 1938, "token_acc": 0.2689313949169629 }, { "epoch": 1.136616827909704, "grad_norm": 0.9096071089849996, "learning_rate": 0.00013998215173852688, "loss": 3.327840566635132, "step": 1939, "token_acc": 0.26409631209996953 }, { "epoch": 1.1372031662269129, "grad_norm": 0.9578814886908256, "learning_rate": 0.00013998199821238283, "loss": 3.3148975372314453, "step": 1940, "token_acc": 0.2677343362102661 }, { "epoch": 1.137789504544122, "grad_norm": 1.2517582423569324, "learning_rate": 0.0001399818440288522, "loss": 3.2710928916931152, "step": 1941, "token_acc": 0.2720199915819345 }, { "epoch": 1.1383758428613309, "grad_norm": 0.7478999600166207, "learning_rate": 0.00013998168918793647, "loss": 3.2860074043273926, "step": 1942, "token_acc": 0.2711199778039081 }, { "epoch": 1.13896218117854, "grad_norm": 0.8609328848135778, "learning_rate": 0.00013998153368963707, "loss": 3.3212461471557617, "step": 1943, "token_acc": 0.26525140466718616 }, { "epoch": 1.139548519495749, "grad_norm": 1.0721584093297092, "learning_rate": 0.00013998137753395545, "loss": 3.335183620452881, "step": 1944, "token_acc": 0.26417570451562866 }, { "epoch": 1.140134857812958, "grad_norm": 1.0505126174816861, "learning_rate": 0.0001399812207208931, "loss": 3.250131607055664, "step": 1945, "token_acc": 0.2735841006585939 }, { "epoch": 1.140721196130167, "grad_norm": 0.9274660129297787, "learning_rate": 0.00013998106325045147, "loss": 3.2657017707824707, "step": 1946, "token_acc": 0.2739049672527762 }, { "epoch": 1.1413075344473762, "grad_norm": 1.2913452709511533, "learning_rate": 0.00013998090512263206, "loss": 3.3363468647003174, "step": 1947, "token_acc": 0.2634593356242841 }, { "epoch": 1.141893872764585, "grad_norm": 0.6525713895374821, "learning_rate": 0.00013998074633743635, "loss": 3.2542076110839844, "step": 1948, "token_acc": 0.27485320232478566 }, { "epoch": 1.1424802110817942, "grad_norm": 0.7356740458820169, "learning_rate": 0.00013998058689486582, "loss": 3.282883405685425, "step": 1949, "token_acc": 0.27123746362151224 }, { "epoch": 1.1430665493990033, "grad_norm": 0.85794360317439, "learning_rate": 0.000139980426794922, "loss": 3.340507984161377, "step": 1950, "token_acc": 0.2633244878079031 }, { "epoch": 1.1436528877162122, "grad_norm": 0.7414672908723083, "learning_rate": 0.00013998026603760633, "loss": 3.3087711334228516, "step": 1951, "token_acc": 0.2665588164943893 }, { "epoch": 1.1442392260334213, "grad_norm": 0.6330179418724468, "learning_rate": 0.0001399801046229204, "loss": 3.303285598754883, "step": 1952, "token_acc": 0.2676859759673309 }, { "epoch": 1.1448255643506302, "grad_norm": 0.8925808006937246, "learning_rate": 0.00013997994255086565, "loss": 3.2777936458587646, "step": 1953, "token_acc": 0.2713731467368713 }, { "epoch": 1.1454119026678393, "grad_norm": 1.1276444745587175, "learning_rate": 0.00013997977982144365, "loss": 3.272172212600708, "step": 1954, "token_acc": 0.2713280761712257 }, { "epoch": 1.1459982409850484, "grad_norm": 0.7039309572929954, "learning_rate": 0.00013997961643465593, "loss": 3.313575029373169, "step": 1955, "token_acc": 0.2674113668475534 }, { "epoch": 1.1465845793022573, "grad_norm": 1.0493796151181258, "learning_rate": 0.000139979452390504, "loss": 3.3273534774780273, "step": 1956, "token_acc": 0.26380629933144745 }, { "epoch": 1.1471709176194664, "grad_norm": 1.1357127094696116, "learning_rate": 0.00013997928768898945, "loss": 3.296158790588379, "step": 1957, "token_acc": 0.2697438254945157 }, { "epoch": 1.1477572559366755, "grad_norm": 0.8153337896490619, "learning_rate": 0.00013997912233011374, "loss": 3.260286808013916, "step": 1958, "token_acc": 0.27425285998808224 }, { "epoch": 1.1483435942538844, "grad_norm": 0.7717407694352988, "learning_rate": 0.0001399789563138785, "loss": 3.2907774448394775, "step": 1959, "token_acc": 0.2704232570057537 }, { "epoch": 1.1489299325710935, "grad_norm": 0.8626275426143295, "learning_rate": 0.00013997878964028525, "loss": 3.289522171020508, "step": 1960, "token_acc": 0.27131638632877536 }, { "epoch": 1.1495162708883027, "grad_norm": 0.8380905507944597, "learning_rate": 0.0001399786223093356, "loss": 3.233691692352295, "step": 1961, "token_acc": 0.27606119951687375 }, { "epoch": 1.1501026092055116, "grad_norm": 0.9383138095442958, "learning_rate": 0.00013997845432103104, "loss": 3.3120272159576416, "step": 1962, "token_acc": 0.26775831396330296 }, { "epoch": 1.1506889475227207, "grad_norm": 1.4280232083851079, "learning_rate": 0.00013997828567537322, "loss": 3.29605770111084, "step": 1963, "token_acc": 0.2682191520676067 }, { "epoch": 1.1512752858399296, "grad_norm": 0.5793568213274755, "learning_rate": 0.00013997811637236372, "loss": 3.2406818866729736, "step": 1964, "token_acc": 0.2752665836814385 }, { "epoch": 1.1518616241571387, "grad_norm": 1.0410063136477115, "learning_rate": 0.00013997794641200408, "loss": 3.319178342819214, "step": 1965, "token_acc": 0.26747206677261165 }, { "epoch": 1.1524479624743478, "grad_norm": 1.4843280748335523, "learning_rate": 0.00013997777579429597, "loss": 3.3066418170928955, "step": 1966, "token_acc": 0.26656780878885533 }, { "epoch": 1.1530343007915567, "grad_norm": 0.7725640772898864, "learning_rate": 0.00013997760451924093, "loss": 3.298063278198242, "step": 1967, "token_acc": 0.269319377835042 }, { "epoch": 1.1536206391087658, "grad_norm": 1.460568808752417, "learning_rate": 0.0001399774325868406, "loss": 3.2945804595947266, "step": 1968, "token_acc": 0.2691454697662438 }, { "epoch": 1.1542069774259747, "grad_norm": 0.9105052646406522, "learning_rate": 0.00013997725999709658, "loss": 3.244741916656494, "step": 1969, "token_acc": 0.27579952267303104 }, { "epoch": 1.1547933157431838, "grad_norm": 1.011474365360013, "learning_rate": 0.0001399770867500105, "loss": 3.247795581817627, "step": 1970, "token_acc": 0.27575713626475046 }, { "epoch": 1.155379654060393, "grad_norm": 1.243656398316088, "learning_rate": 0.000139976912845584, "loss": 3.3330931663513184, "step": 1971, "token_acc": 0.2658736779784171 }, { "epoch": 1.1559659923776018, "grad_norm": 0.7296939637018416, "learning_rate": 0.00013997673828381867, "loss": 3.2535276412963867, "step": 1972, "token_acc": 0.27408158390885207 }, { "epoch": 1.156552330694811, "grad_norm": 0.8402281762265186, "learning_rate": 0.00013997656306471618, "loss": 3.2534587383270264, "step": 1973, "token_acc": 0.2740848286365936 }, { "epoch": 1.15713866901202, "grad_norm": 0.9647313407763141, "learning_rate": 0.0001399763871882782, "loss": 3.320188522338867, "step": 1974, "token_acc": 0.264661799055051 }, { "epoch": 1.157725007329229, "grad_norm": 1.0192220820580344, "learning_rate": 0.00013997621065450633, "loss": 3.2865147590637207, "step": 1975, "token_acc": 0.27192734268899954 }, { "epoch": 1.158311345646438, "grad_norm": 0.9801568969967671, "learning_rate": 0.00013997603346340229, "loss": 3.2362523078918457, "step": 1976, "token_acc": 0.2774556139423525 }, { "epoch": 1.1588976839636471, "grad_norm": 0.8784503993417647, "learning_rate": 0.00013997585561496768, "loss": 3.318542718887329, "step": 1977, "token_acc": 0.26686570939936705 }, { "epoch": 1.159484022280856, "grad_norm": 0.7517384391080224, "learning_rate": 0.0001399756771092042, "loss": 3.3092856407165527, "step": 1978, "token_acc": 0.26675811283976464 }, { "epoch": 1.1600703605980651, "grad_norm": 0.768156455276333, "learning_rate": 0.00013997549794611354, "loss": 3.283203125, "step": 1979, "token_acc": 0.2714389177724322 }, { "epoch": 1.160656698915274, "grad_norm": 0.612608204651872, "learning_rate": 0.00013997531812569736, "loss": 3.2756786346435547, "step": 1980, "token_acc": 0.270528508268185 }, { "epoch": 1.1612430372324831, "grad_norm": 0.7088899619518892, "learning_rate": 0.00013997513764795738, "loss": 3.2976560592651367, "step": 1981, "token_acc": 0.2698743569140221 }, { "epoch": 1.1618293755496922, "grad_norm": 0.7059857392786851, "learning_rate": 0.00013997495651289527, "loss": 3.2709131240844727, "step": 1982, "token_acc": 0.27256450255437087 }, { "epoch": 1.1624157138669011, "grad_norm": 0.6484008510232048, "learning_rate": 0.00013997477472051272, "loss": 3.266232490539551, "step": 1983, "token_acc": 0.2733303259687669 }, { "epoch": 1.1630020521841102, "grad_norm": 0.5875863516838354, "learning_rate": 0.00013997459227081145, "loss": 3.2704479694366455, "step": 1984, "token_acc": 0.2729809076997743 }, { "epoch": 1.1635883905013193, "grad_norm": 0.6857931319123926, "learning_rate": 0.0001399744091637932, "loss": 3.2780685424804688, "step": 1985, "token_acc": 0.2701982450438739 }, { "epoch": 1.1641747288185282, "grad_norm": 0.6688390468442005, "learning_rate": 0.00013997422539945966, "loss": 3.2904343605041504, "step": 1986, "token_acc": 0.2698845113178909 }, { "epoch": 1.1647610671357373, "grad_norm": 0.9386681815814419, "learning_rate": 0.00013997404097781255, "loss": 3.2261009216308594, "step": 1987, "token_acc": 0.27882530764298147 }, { "epoch": 1.1653474054529465, "grad_norm": 1.0715909370535577, "learning_rate": 0.0001399738558988536, "loss": 3.286068916320801, "step": 1988, "token_acc": 0.27039242544648456 }, { "epoch": 1.1659337437701554, "grad_norm": 1.1455103355124707, "learning_rate": 0.0001399736701625846, "loss": 3.263002872467041, "step": 1989, "token_acc": 0.2723936933650387 }, { "epoch": 1.1665200820873645, "grad_norm": 0.9575400958748154, "learning_rate": 0.00013997348376900724, "loss": 3.3112387657165527, "step": 1990, "token_acc": 0.2672752131253016 }, { "epoch": 1.1671064204045734, "grad_norm": 0.855923533225229, "learning_rate": 0.00013997329671812332, "loss": 3.2915282249450684, "step": 1991, "token_acc": 0.2683620745753888 }, { "epoch": 1.1676927587217825, "grad_norm": 0.9132767649565504, "learning_rate": 0.00013997310900993454, "loss": 3.261610746383667, "step": 1992, "token_acc": 0.27440615590498496 }, { "epoch": 1.1682790970389916, "grad_norm": 0.9402467601171423, "learning_rate": 0.0001399729206444427, "loss": 3.319091558456421, "step": 1993, "token_acc": 0.2648558004468371 }, { "epoch": 1.1688654353562005, "grad_norm": 1.096000376730819, "learning_rate": 0.00013997273162164956, "loss": 3.32125186920166, "step": 1994, "token_acc": 0.26607096382406525 }, { "epoch": 1.1694517736734096, "grad_norm": 0.9743500299861092, "learning_rate": 0.0001399725419415569, "loss": 3.270928382873535, "step": 1995, "token_acc": 0.26980207772422216 }, { "epoch": 1.1700381119906185, "grad_norm": 1.0489637739727886, "learning_rate": 0.00013997235160416647, "loss": 3.3269383907318115, "step": 1996, "token_acc": 0.2659689991281331 }, { "epoch": 1.1706244503078276, "grad_norm": 0.6031897461874, "learning_rate": 0.00013997216060948012, "loss": 3.3214361667633057, "step": 1997, "token_acc": 0.2638300845036688 }, { "epoch": 1.1712107886250367, "grad_norm": 0.7184589396407897, "learning_rate": 0.0001399719689574996, "loss": 3.2780709266662598, "step": 1998, "token_acc": 0.27127680024605605 }, { "epoch": 1.1717971269422456, "grad_norm": 0.8084578419598454, "learning_rate": 0.0001399717766482267, "loss": 3.283168077468872, "step": 1999, "token_acc": 0.27009540489969974 }, { "epoch": 1.1723834652594547, "grad_norm": 0.9251953725991897, "learning_rate": 0.00013997158368166327, "loss": 3.2896370887756348, "step": 2000, "token_acc": 0.2705444324027856 }, { "epoch": 1.1729698035766638, "grad_norm": 0.9205641509985651, "learning_rate": 0.0001399713900578111, "loss": 3.273399829864502, "step": 2001, "token_acc": 0.27039277229787817 }, { "epoch": 1.1735561418938727, "grad_norm": 0.8926323668634545, "learning_rate": 0.000139971195776672, "loss": 3.2432026863098145, "step": 2002, "token_acc": 0.27554514144892994 }, { "epoch": 1.1741424802110818, "grad_norm": 0.9918193854914225, "learning_rate": 0.00013997100083824778, "loss": 3.311223030090332, "step": 2003, "token_acc": 0.26720193475331383 }, { "epoch": 1.174728818528291, "grad_norm": 0.9761518260911817, "learning_rate": 0.00013997080524254032, "loss": 3.2806928157806396, "step": 2004, "token_acc": 0.2702721222903077 }, { "epoch": 1.1753151568454998, "grad_norm": 0.8462092574010112, "learning_rate": 0.00013997060898955144, "loss": 3.290086507797241, "step": 2005, "token_acc": 0.26968710194354273 }, { "epoch": 1.175901495162709, "grad_norm": 0.7199809154954767, "learning_rate": 0.00013997041207928297, "loss": 3.2581470012664795, "step": 2006, "token_acc": 0.27425520644422235 }, { "epoch": 1.1764878334799178, "grad_norm": 0.7111798552281327, "learning_rate": 0.00013997021451173677, "loss": 3.3144493103027344, "step": 2007, "token_acc": 0.2649414934101375 }, { "epoch": 1.177074171797127, "grad_norm": 0.7269533251462796, "learning_rate": 0.00013997001628691466, "loss": 3.3021883964538574, "step": 2008, "token_acc": 0.2656302313811495 }, { "epoch": 1.177660510114336, "grad_norm": 0.6922427232460047, "learning_rate": 0.00013996981740481857, "loss": 3.256155014038086, "step": 2009, "token_acc": 0.27238125885266784 }, { "epoch": 1.178246848431545, "grad_norm": 0.9073039935713157, "learning_rate": 0.0001399696178654503, "loss": 3.269740104675293, "step": 2010, "token_acc": 0.2719516539638705 }, { "epoch": 1.178833186748754, "grad_norm": 0.9175877466329571, "learning_rate": 0.00013996941766881177, "loss": 3.21970534324646, "step": 2011, "token_acc": 0.27754819508249423 }, { "epoch": 1.1794195250659631, "grad_norm": 0.8993502067710845, "learning_rate": 0.00013996921681490486, "loss": 3.309999942779541, "step": 2012, "token_acc": 0.26560222109263926 }, { "epoch": 1.180005863383172, "grad_norm": 0.8548468793758872, "learning_rate": 0.00013996901530373141, "loss": 3.269012212753296, "step": 2013, "token_acc": 0.2731310040599203 }, { "epoch": 1.1805922017003811, "grad_norm": 0.8315697921166076, "learning_rate": 0.00013996881313529336, "loss": 3.260809898376465, "step": 2014, "token_acc": 0.2727009657298745 }, { "epoch": 1.1811785400175903, "grad_norm": 0.7797126073819475, "learning_rate": 0.0001399686103095926, "loss": 3.2540645599365234, "step": 2015, "token_acc": 0.27387683384238326 }, { "epoch": 1.1817648783347992, "grad_norm": 0.8879051190914242, "learning_rate": 0.00013996840682663103, "loss": 3.283329486846924, "step": 2016, "token_acc": 0.2705465334504941 }, { "epoch": 1.1823512166520083, "grad_norm": 0.8763884759057723, "learning_rate": 0.00013996820268641057, "loss": 3.2482025623321533, "step": 2017, "token_acc": 0.2745193728462942 }, { "epoch": 1.1829375549692172, "grad_norm": 0.6522520794303478, "learning_rate": 0.00013996799788893312, "loss": 3.273881435394287, "step": 2018, "token_acc": 0.271554576382023 }, { "epoch": 1.1835238932864263, "grad_norm": 0.7529803975708894, "learning_rate": 0.0001399677924342006, "loss": 3.247715711593628, "step": 2019, "token_acc": 0.27410420136477787 }, { "epoch": 1.1841102316036354, "grad_norm": 0.8418183421081263, "learning_rate": 0.00013996758632221496, "loss": 3.3056061267852783, "step": 2020, "token_acc": 0.26771928726422967 }, { "epoch": 1.1846965699208443, "grad_norm": 0.7325558729225324, "learning_rate": 0.00013996737955297814, "loss": 3.279817581176758, "step": 2021, "token_acc": 0.27071971596083205 }, { "epoch": 1.1852829082380534, "grad_norm": 0.8030224441444531, "learning_rate": 0.00013996717212649208, "loss": 3.2769505977630615, "step": 2022, "token_acc": 0.2719305892287995 }, { "epoch": 1.1858692465552623, "grad_norm": 0.8242029847771069, "learning_rate": 0.0001399669640427587, "loss": 3.2430787086486816, "step": 2023, "token_acc": 0.2758381087706697 }, { "epoch": 1.1864555848724714, "grad_norm": 0.8264266931101405, "learning_rate": 0.00013996675530177996, "loss": 3.300528049468994, "step": 2024, "token_acc": 0.27027717127924006 }, { "epoch": 1.1870419231896805, "grad_norm": 0.973956471615252, "learning_rate": 0.00013996654590355787, "loss": 3.2806429862976074, "step": 2025, "token_acc": 0.2704477102905156 }, { "epoch": 1.1876282615068894, "grad_norm": 0.7534006546208338, "learning_rate": 0.00013996633584809434, "loss": 3.28279447555542, "step": 2026, "token_acc": 0.26853968526421734 }, { "epoch": 1.1882145998240985, "grad_norm": 0.7560127380298783, "learning_rate": 0.00013996612513539138, "loss": 3.293433666229248, "step": 2027, "token_acc": 0.2693936920143699 }, { "epoch": 1.1888009381413076, "grad_norm": 1.055578746112734, "learning_rate": 0.00013996591376545092, "loss": 3.261115074157715, "step": 2028, "token_acc": 0.27160234182556187 }, { "epoch": 1.1893872764585165, "grad_norm": 1.0443696121586854, "learning_rate": 0.00013996570173827502, "loss": 3.2751612663269043, "step": 2029, "token_acc": 0.270351408859372 }, { "epoch": 1.1899736147757256, "grad_norm": 0.7233969196383906, "learning_rate": 0.00013996548905386563, "loss": 3.2878074645996094, "step": 2030, "token_acc": 0.26900639987348196 }, { "epoch": 1.1905599530929347, "grad_norm": 0.7190116803157567, "learning_rate": 0.00013996527571222473, "loss": 3.248185157775879, "step": 2031, "token_acc": 0.2727448598203162 }, { "epoch": 1.1911462914101436, "grad_norm": 0.7551477262953156, "learning_rate": 0.00013996506171335438, "loss": 3.256265163421631, "step": 2032, "token_acc": 0.2735621197171518 }, { "epoch": 1.1917326297273527, "grad_norm": 0.7080222965655164, "learning_rate": 0.00013996484705725652, "loss": 3.3316240310668945, "step": 2033, "token_acc": 0.2641333378764806 }, { "epoch": 1.1923189680445616, "grad_norm": 0.669109683810363, "learning_rate": 0.0001399646317439332, "loss": 3.2331409454345703, "step": 2034, "token_acc": 0.2776335364913024 }, { "epoch": 1.1929053063617707, "grad_norm": 0.8943186847361569, "learning_rate": 0.00013996441577338647, "loss": 3.31321382522583, "step": 2035, "token_acc": 0.2660338284608996 }, { "epoch": 1.1934916446789798, "grad_norm": 0.8394702091312193, "learning_rate": 0.0001399641991456183, "loss": 3.254009485244751, "step": 2036, "token_acc": 0.2747039750086346 }, { "epoch": 1.1940779829961887, "grad_norm": 0.6482322937941302, "learning_rate": 0.00013996398186063075, "loss": 3.2829437255859375, "step": 2037, "token_acc": 0.2698733208064981 }, { "epoch": 1.1946643213133978, "grad_norm": 0.6666697229093557, "learning_rate": 0.00013996376391842591, "loss": 3.2279770374298096, "step": 2038, "token_acc": 0.2764330159642248 }, { "epoch": 1.195250659630607, "grad_norm": 0.7812163405303421, "learning_rate": 0.00013996354531900577, "loss": 3.2676010131835938, "step": 2039, "token_acc": 0.27284011458599616 }, { "epoch": 1.1958369979478158, "grad_norm": 0.8326625457349554, "learning_rate": 0.0001399633260623724, "loss": 3.2590248584747314, "step": 2040, "token_acc": 0.2719198051456974 }, { "epoch": 1.196423336265025, "grad_norm": 1.0455332220398084, "learning_rate": 0.00013996310614852782, "loss": 3.2858152389526367, "step": 2041, "token_acc": 0.269239996594805 }, { "epoch": 1.197009674582234, "grad_norm": 1.0502970910975296, "learning_rate": 0.00013996288557747419, "loss": 3.2631282806396484, "step": 2042, "token_acc": 0.2730490531476443 }, { "epoch": 1.197596012899443, "grad_norm": 0.8054852590068742, "learning_rate": 0.00013996266434921349, "loss": 3.3076512813568115, "step": 2043, "token_acc": 0.2664837165747581 }, { "epoch": 1.198182351216652, "grad_norm": 1.0093675786483056, "learning_rate": 0.00013996244246374786, "loss": 3.2395107746124268, "step": 2044, "token_acc": 0.27610904883322773 }, { "epoch": 1.198768689533861, "grad_norm": 0.6916907559025842, "learning_rate": 0.00013996221992107935, "loss": 3.2929985523223877, "step": 2045, "token_acc": 0.26778898201617773 }, { "epoch": 1.19935502785107, "grad_norm": 0.817989571617489, "learning_rate": 0.00013996199672121004, "loss": 3.26029109954834, "step": 2046, "token_acc": 0.2742026233897139 }, { "epoch": 1.1999413661682792, "grad_norm": 0.9180749824034695, "learning_rate": 0.00013996177286414207, "loss": 3.211268186569214, "step": 2047, "token_acc": 0.2794246955529329 }, { "epoch": 1.200527704485488, "grad_norm": 0.8684883920231655, "learning_rate": 0.00013996154834987752, "loss": 3.3434700965881348, "step": 2048, "token_acc": 0.2641811308699819 }, { "epoch": 1.2011140428026972, "grad_norm": 0.8644291288375126, "learning_rate": 0.00013996132317841846, "loss": 3.2719063758850098, "step": 2049, "token_acc": 0.27049551914639675 }, { "epoch": 1.201700381119906, "grad_norm": 1.0050278038899023, "learning_rate": 0.00013996109734976708, "loss": 3.2906382083892822, "step": 2050, "token_acc": 0.2696166518835104 }, { "epoch": 1.2022867194371152, "grad_norm": 1.1136608484222708, "learning_rate": 0.00013996087086392544, "loss": 3.251469373703003, "step": 2051, "token_acc": 0.27550584688517676 }, { "epoch": 1.2028730577543243, "grad_norm": 0.7153490602730995, "learning_rate": 0.00013996064372089572, "loss": 3.2633090019226074, "step": 2052, "token_acc": 0.272893039225887 }, { "epoch": 1.2034593960715332, "grad_norm": 0.6357191713719329, "learning_rate": 0.00013996041592068, "loss": 3.2354817390441895, "step": 2053, "token_acc": 0.27708910409574355 }, { "epoch": 1.2040457343887423, "grad_norm": 0.8922975198207472, "learning_rate": 0.00013996018746328048, "loss": 3.2724735736846924, "step": 2054, "token_acc": 0.2710692319823372 }, { "epoch": 1.2046320727059514, "grad_norm": 1.057652080223769, "learning_rate": 0.00013995995834869922, "loss": 3.21077823638916, "step": 2055, "token_acc": 0.2786402022190652 }, { "epoch": 1.2052184110231603, "grad_norm": 0.8423578122477403, "learning_rate": 0.00013995972857693846, "loss": 3.290818214416504, "step": 2056, "token_acc": 0.26963003082001885 }, { "epoch": 1.2058047493403694, "grad_norm": 1.1099485819974582, "learning_rate": 0.0001399594981480003, "loss": 3.2558577060699463, "step": 2057, "token_acc": 0.2736841277826689 }, { "epoch": 1.2063910876575785, "grad_norm": 0.9684048199069077, "learning_rate": 0.00013995926706188695, "loss": 3.318784236907959, "step": 2058, "token_acc": 0.26525050751968443 }, { "epoch": 1.2069774259747874, "grad_norm": 0.8075818261541164, "learning_rate": 0.00013995903531860055, "loss": 3.2668185234069824, "step": 2059, "token_acc": 0.2729023675435253 }, { "epoch": 1.2075637642919965, "grad_norm": 0.7951807721010673, "learning_rate": 0.00013995880291814327, "loss": 3.2587151527404785, "step": 2060, "token_acc": 0.27283289043679865 }, { "epoch": 1.2081501026092054, "grad_norm": 0.7733483946839925, "learning_rate": 0.0001399585698605173, "loss": 3.295067548751831, "step": 2061, "token_acc": 0.26807482214965944 }, { "epoch": 1.2087364409264145, "grad_norm": 0.8600870866196413, "learning_rate": 0.00013995833614572487, "loss": 3.272195339202881, "step": 2062, "token_acc": 0.2732310022026432 }, { "epoch": 1.2093227792436236, "grad_norm": 1.017558241951975, "learning_rate": 0.00013995810177376813, "loss": 3.253392219543457, "step": 2063, "token_acc": 0.27330428071855556 }, { "epoch": 1.2099091175608325, "grad_norm": 1.0994980717808918, "learning_rate": 0.0001399578667446493, "loss": 3.197162628173828, "step": 2064, "token_acc": 0.28170951240044034 }, { "epoch": 1.2104954558780416, "grad_norm": 0.7554169682643791, "learning_rate": 0.00013995763105837056, "loss": 3.2833306789398193, "step": 2065, "token_acc": 0.27028180589061795 }, { "epoch": 1.2110817941952507, "grad_norm": 0.8643574044858382, "learning_rate": 0.00013995739471493415, "loss": 3.2892372608184814, "step": 2066, "token_acc": 0.2668725422336485 }, { "epoch": 1.2116681325124596, "grad_norm": 1.4286750909845534, "learning_rate": 0.0001399571577143423, "loss": 3.2418484687805176, "step": 2067, "token_acc": 0.2748956746192907 }, { "epoch": 1.2122544708296688, "grad_norm": 0.6373148224048629, "learning_rate": 0.0001399569200565972, "loss": 3.2708046436309814, "step": 2068, "token_acc": 0.27162786624285856 }, { "epoch": 1.2128408091468779, "grad_norm": 0.9826710697592247, "learning_rate": 0.00013995668174170112, "loss": 3.2232444286346436, "step": 2069, "token_acc": 0.27724404820731013 }, { "epoch": 1.2134271474640868, "grad_norm": 1.0880332011677372, "learning_rate": 0.0001399564427696563, "loss": 3.244549512863159, "step": 2070, "token_acc": 0.27509658285312905 }, { "epoch": 1.2140134857812959, "grad_norm": 0.7967802119367023, "learning_rate": 0.00013995620314046493, "loss": 3.3176679611206055, "step": 2071, "token_acc": 0.2660486143134741 }, { "epoch": 1.2145998240985048, "grad_norm": 1.0233166266062386, "learning_rate": 0.0001399559628541293, "loss": 3.299207925796509, "step": 2072, "token_acc": 0.26728944423047596 }, { "epoch": 1.2151861624157139, "grad_norm": 0.8045512762453749, "learning_rate": 0.0001399557219106517, "loss": 3.2874369621276855, "step": 2073, "token_acc": 0.26988809898236354 }, { "epoch": 1.215772500732923, "grad_norm": 0.8925296046058288, "learning_rate": 0.00013995548031003435, "loss": 3.2711923122406006, "step": 2074, "token_acc": 0.2720515005595655 }, { "epoch": 1.2163588390501319, "grad_norm": 0.8024475997723093, "learning_rate": 0.0001399552380522795, "loss": 3.2613604068756104, "step": 2075, "token_acc": 0.2742474561524505 }, { "epoch": 1.216945177367341, "grad_norm": 0.678643848820099, "learning_rate": 0.0001399549951373895, "loss": 3.2243504524230957, "step": 2076, "token_acc": 0.27744292710735663 }, { "epoch": 1.2175315156845499, "grad_norm": 0.7192385242812829, "learning_rate": 0.00013995475156536655, "loss": 3.2358908653259277, "step": 2077, "token_acc": 0.27488016212213723 }, { "epoch": 1.218117854001759, "grad_norm": 0.632856485412475, "learning_rate": 0.000139954507336213, "loss": 3.251486301422119, "step": 2078, "token_acc": 0.2743380697259104 }, { "epoch": 1.218704192318968, "grad_norm": 0.8076257263986828, "learning_rate": 0.0001399542624499311, "loss": 3.2412564754486084, "step": 2079, "token_acc": 0.27506333138467537 }, { "epoch": 1.219290530636177, "grad_norm": 0.7162834114217783, "learning_rate": 0.00013995401690652316, "loss": 3.2543599605560303, "step": 2080, "token_acc": 0.2735167377839316 }, { "epoch": 1.219876868953386, "grad_norm": 0.5783302552105949, "learning_rate": 0.00013995377070599152, "loss": 3.243490695953369, "step": 2081, "token_acc": 0.2763864161949698 }, { "epoch": 1.2204632072705952, "grad_norm": 0.8053531322373088, "learning_rate": 0.00013995352384833844, "loss": 3.2940032482147217, "step": 2082, "token_acc": 0.2686937299778419 }, { "epoch": 1.221049545587804, "grad_norm": 0.7627267985125418, "learning_rate": 0.0001399532763335663, "loss": 3.2762742042541504, "step": 2083, "token_acc": 0.27110944305004964 }, { "epoch": 1.2216358839050132, "grad_norm": 0.8501251019049632, "learning_rate": 0.00013995302816167737, "loss": 3.251089572906494, "step": 2084, "token_acc": 0.2741546773824148 }, { "epoch": 1.2222222222222223, "grad_norm": 0.7930636738934369, "learning_rate": 0.00013995277933267401, "loss": 3.293471097946167, "step": 2085, "token_acc": 0.26694135499605665 }, { "epoch": 1.2228085605394312, "grad_norm": 1.0517571730044282, "learning_rate": 0.00013995252984655855, "loss": 3.258188247680664, "step": 2086, "token_acc": 0.27355519954542945 }, { "epoch": 1.2233948988566403, "grad_norm": 0.9492592108768643, "learning_rate": 0.00013995227970333332, "loss": 3.2818026542663574, "step": 2087, "token_acc": 0.2710666816264986 }, { "epoch": 1.2239812371738492, "grad_norm": 0.9075878388127805, "learning_rate": 0.0001399520289030007, "loss": 3.2445836067199707, "step": 2088, "token_acc": 0.27482413668291095 }, { "epoch": 1.2245675754910583, "grad_norm": 0.7624384267735543, "learning_rate": 0.00013995177744556303, "loss": 3.2573299407958984, "step": 2089, "token_acc": 0.27402552245466766 }, { "epoch": 1.2251539138082674, "grad_norm": 0.7254844398683863, "learning_rate": 0.00013995152533102266, "loss": 3.257793426513672, "step": 2090, "token_acc": 0.2725927120619329 }, { "epoch": 1.2257402521254763, "grad_norm": 0.762945481114782, "learning_rate": 0.000139951272559382, "loss": 3.2640485763549805, "step": 2091, "token_acc": 0.27323780221652066 }, { "epoch": 1.2263265904426854, "grad_norm": 0.6901424543644455, "learning_rate": 0.00013995101913064336, "loss": 3.243479013442993, "step": 2092, "token_acc": 0.27411229244992535 }, { "epoch": 1.2269129287598945, "grad_norm": 0.8103476249573017, "learning_rate": 0.00013995076504480917, "loss": 3.2786288261413574, "step": 2093, "token_acc": 0.26982325615061875 }, { "epoch": 1.2274992670771034, "grad_norm": 0.8273412066621197, "learning_rate": 0.00013995051030188182, "loss": 3.2869534492492676, "step": 2094, "token_acc": 0.2680933832379424 }, { "epoch": 1.2280856053943126, "grad_norm": 0.6300906084649253, "learning_rate": 0.00013995025490186365, "loss": 3.275829792022705, "step": 2095, "token_acc": 0.27177046180771935 }, { "epoch": 1.2286719437115217, "grad_norm": 0.6638352130970439, "learning_rate": 0.00013994999884475712, "loss": 3.230128288269043, "step": 2096, "token_acc": 0.2759711085169794 }, { "epoch": 1.2292582820287306, "grad_norm": 0.7999811478776575, "learning_rate": 0.0001399497421305646, "loss": 3.28938627243042, "step": 2097, "token_acc": 0.2693783845831814 }, { "epoch": 1.2298446203459397, "grad_norm": 0.7432158789397402, "learning_rate": 0.0001399494847592885, "loss": 3.264371395111084, "step": 2098, "token_acc": 0.271819842396462 }, { "epoch": 1.2304309586631486, "grad_norm": 0.6103956760103257, "learning_rate": 0.00013994922673093128, "loss": 3.2540974617004395, "step": 2099, "token_acc": 0.2737752381924161 }, { "epoch": 1.2310172969803577, "grad_norm": 0.7966789308916669, "learning_rate": 0.0001399489680454953, "loss": 3.298779010772705, "step": 2100, "token_acc": 0.26808113401441835 }, { "epoch": 1.2316036352975668, "grad_norm": 1.050472356644393, "learning_rate": 0.00013994870870298303, "loss": 3.289454221725464, "step": 2101, "token_acc": 0.26979128431657956 }, { "epoch": 1.2321899736147757, "grad_norm": 0.8180846020768008, "learning_rate": 0.00013994844870339693, "loss": 3.3563408851623535, "step": 2102, "token_acc": 0.2609291936801672 }, { "epoch": 1.2327763119319848, "grad_norm": 0.7348362046380063, "learning_rate": 0.00013994818804673938, "loss": 3.2577309608459473, "step": 2103, "token_acc": 0.2746498922952132 }, { "epoch": 1.2333626502491937, "grad_norm": 1.0033770055055589, "learning_rate": 0.00013994792673301286, "loss": 3.2838189601898193, "step": 2104, "token_acc": 0.26996027255565486 }, { "epoch": 1.2339489885664028, "grad_norm": 1.0723149175367184, "learning_rate": 0.00013994766476221985, "loss": 3.295071601867676, "step": 2105, "token_acc": 0.2668412629128474 }, { "epoch": 1.234535326883612, "grad_norm": 0.7730063255573597, "learning_rate": 0.00013994740213436275, "loss": 3.280414581298828, "step": 2106, "token_acc": 0.2689909606839065 }, { "epoch": 1.2351216652008208, "grad_norm": 0.778235477399716, "learning_rate": 0.0001399471388494441, "loss": 3.298107147216797, "step": 2107, "token_acc": 0.2681080941365005 }, { "epoch": 1.23570800351803, "grad_norm": 0.7719802633313659, "learning_rate": 0.00013994687490746628, "loss": 3.315682888031006, "step": 2108, "token_acc": 0.26536364257807366 }, { "epoch": 1.236294341835239, "grad_norm": 0.5813641941027856, "learning_rate": 0.0001399466103084319, "loss": 3.2468855381011963, "step": 2109, "token_acc": 0.2745504990256989 }, { "epoch": 1.236880680152448, "grad_norm": 0.5966761416628407, "learning_rate": 0.0001399463450523433, "loss": 3.254228115081787, "step": 2110, "token_acc": 0.2745239639601787 }, { "epoch": 1.237467018469657, "grad_norm": 0.7528144098108003, "learning_rate": 0.00013994607913920306, "loss": 3.285797595977783, "step": 2111, "token_acc": 0.26844335820701487 }, { "epoch": 1.2380533567868661, "grad_norm": 0.7999341965629413, "learning_rate": 0.0001399458125690137, "loss": 3.222423553466797, "step": 2112, "token_acc": 0.27795262676985794 }, { "epoch": 1.238639695104075, "grad_norm": 0.8524942198868808, "learning_rate": 0.00013994554534177764, "loss": 3.279275417327881, "step": 2113, "token_acc": 0.2701440134889255 }, { "epoch": 1.2392260334212841, "grad_norm": 0.6335293228359163, "learning_rate": 0.00013994527745749745, "loss": 3.257563591003418, "step": 2114, "token_acc": 0.27245539663020735 }, { "epoch": 1.239812371738493, "grad_norm": 0.7609621354257848, "learning_rate": 0.00013994500891617562, "loss": 3.276428699493408, "step": 2115, "token_acc": 0.27117695265712666 }, { "epoch": 1.2403987100557021, "grad_norm": 0.9137486055077021, "learning_rate": 0.0001399447397178147, "loss": 3.2814674377441406, "step": 2116, "token_acc": 0.2706345970919962 }, { "epoch": 1.2409850483729112, "grad_norm": 0.8954839424820901, "learning_rate": 0.0001399444698624172, "loss": 3.2411656379699707, "step": 2117, "token_acc": 0.2748881460529699 }, { "epoch": 1.2415713866901201, "grad_norm": 0.8510782177440706, "learning_rate": 0.00013994419934998563, "loss": 3.3044254779815674, "step": 2118, "token_acc": 0.2667808811042733 }, { "epoch": 1.2421577250073292, "grad_norm": 0.711325446251975, "learning_rate": 0.0001399439281805226, "loss": 3.2630367279052734, "step": 2119, "token_acc": 0.2726042079451494 }, { "epoch": 1.2427440633245384, "grad_norm": 0.732955914795538, "learning_rate": 0.0001399436563540306, "loss": 3.2871408462524414, "step": 2120, "token_acc": 0.26821046698785517 }, { "epoch": 1.2433304016417472, "grad_norm": 0.9785958116921543, "learning_rate": 0.00013994338387051218, "loss": 3.275796890258789, "step": 2121, "token_acc": 0.2699258349743635 }, { "epoch": 1.2439167399589564, "grad_norm": 0.9806795031341744, "learning_rate": 0.00013994311072996994, "loss": 3.2688193321228027, "step": 2122, "token_acc": 0.2715173345339937 }, { "epoch": 1.2445030782761655, "grad_norm": 0.6757998200561142, "learning_rate": 0.00013994283693240643, "loss": 3.2210726737976074, "step": 2123, "token_acc": 0.27666671782829133 }, { "epoch": 1.2450894165933744, "grad_norm": 0.6555366961423104, "learning_rate": 0.0001399425624778242, "loss": 3.2891485691070557, "step": 2124, "token_acc": 0.26967460128479875 }, { "epoch": 1.2456757549105835, "grad_norm": 0.7217183048077976, "learning_rate": 0.00013994228736622584, "loss": 3.289639472961426, "step": 2125, "token_acc": 0.27032646953891704 }, { "epoch": 1.2462620932277924, "grad_norm": 0.6204837826883519, "learning_rate": 0.00013994201159761395, "loss": 3.3212976455688477, "step": 2126, "token_acc": 0.2638176126449759 }, { "epoch": 1.2468484315450015, "grad_norm": 0.512733277185017, "learning_rate": 0.0001399417351719911, "loss": 3.3063395023345947, "step": 2127, "token_acc": 0.26451418882833927 }, { "epoch": 1.2474347698622106, "grad_norm": 0.5962678951896425, "learning_rate": 0.0001399414580893599, "loss": 3.2598273754119873, "step": 2128, "token_acc": 0.2727725732509468 }, { "epoch": 1.2480211081794195, "grad_norm": 0.6114274931745662, "learning_rate": 0.00013994118034972296, "loss": 3.234663486480713, "step": 2129, "token_acc": 0.2764413729026331 }, { "epoch": 1.2486074464966286, "grad_norm": 0.631441212734014, "learning_rate": 0.00013994090195308285, "loss": 3.2678322792053223, "step": 2130, "token_acc": 0.27025203306673834 }, { "epoch": 1.2491937848138375, "grad_norm": 0.5954373081725135, "learning_rate": 0.00013994062289944225, "loss": 3.2652812004089355, "step": 2131, "token_acc": 0.2728173502227436 }, { "epoch": 1.2497801231310466, "grad_norm": 0.8286947267672502, "learning_rate": 0.00013994034318880373, "loss": 3.2767622470855713, "step": 2132, "token_acc": 0.27050031731230006 }, { "epoch": 1.2503664614482557, "grad_norm": 0.9711431422832624, "learning_rate": 0.00013994006282116992, "loss": 3.2957820892333984, "step": 2133, "token_acc": 0.26634402272252533 }, { "epoch": 1.2509527997654648, "grad_norm": 1.3140557543022733, "learning_rate": 0.00013993978179654347, "loss": 3.303408145904541, "step": 2134, "token_acc": 0.26676655919301107 }, { "epoch": 1.2515391380826737, "grad_norm": 0.8103839155007129, "learning_rate": 0.000139939500114927, "loss": 3.262760639190674, "step": 2135, "token_acc": 0.2722601781526456 }, { "epoch": 1.2521254763998828, "grad_norm": 0.5720632435812079, "learning_rate": 0.00013993921777632318, "loss": 3.2550487518310547, "step": 2136, "token_acc": 0.2746788766540017 }, { "epoch": 1.2527118147170917, "grad_norm": 0.5618387843506389, "learning_rate": 0.00013993893478073468, "loss": 3.243192672729492, "step": 2137, "token_acc": 0.27482293341651115 }, { "epoch": 1.2532981530343008, "grad_norm": 0.8570650550360386, "learning_rate": 0.00013993865112816412, "loss": 3.254253387451172, "step": 2138, "token_acc": 0.2717191035988224 }, { "epoch": 1.25388449135151, "grad_norm": 0.7503022875993651, "learning_rate": 0.00013993836681861415, "loss": 3.2357943058013916, "step": 2139, "token_acc": 0.27489379768102284 }, { "epoch": 1.2544708296687188, "grad_norm": 0.8244196220344667, "learning_rate": 0.0001399380818520875, "loss": 3.3101367950439453, "step": 2140, "token_acc": 0.26650898181874705 }, { "epoch": 1.255057167985928, "grad_norm": 0.8434348438490735, "learning_rate": 0.00013993779622858678, "loss": 3.251595973968506, "step": 2141, "token_acc": 0.27360839760293876 }, { "epoch": 1.2556435063031368, "grad_norm": 0.7006240332628441, "learning_rate": 0.00013993750994811474, "loss": 3.2532222270965576, "step": 2142, "token_acc": 0.2744377035022503 }, { "epoch": 1.256229844620346, "grad_norm": 0.7828525019675897, "learning_rate": 0.00013993722301067403, "loss": 3.2525672912597656, "step": 2143, "token_acc": 0.2729189545354711 }, { "epoch": 1.256816182937555, "grad_norm": 0.7609387188798483, "learning_rate": 0.00013993693541626732, "loss": 3.326597213745117, "step": 2144, "token_acc": 0.26526368113780807 }, { "epoch": 1.257402521254764, "grad_norm": 0.6039315136945768, "learning_rate": 0.00013993664716489737, "loss": 3.2765822410583496, "step": 2145, "token_acc": 0.26981815674167436 }, { "epoch": 1.257988859571973, "grad_norm": 0.9156171550963622, "learning_rate": 0.00013993635825656687, "loss": 3.279661178588867, "step": 2146, "token_acc": 0.2692610735297929 }, { "epoch": 1.258575197889182, "grad_norm": 0.8377394965404859, "learning_rate": 0.0001399360686912785, "loss": 3.210858106613159, "step": 2147, "token_acc": 0.27934817466146666 }, { "epoch": 1.259161536206391, "grad_norm": 0.8144310150812833, "learning_rate": 0.00013993577846903502, "loss": 3.296915054321289, "step": 2148, "token_acc": 0.2678595490335161 }, { "epoch": 1.2597478745236002, "grad_norm": 0.6878299396746819, "learning_rate": 0.00013993548758983913, "loss": 3.2164597511291504, "step": 2149, "token_acc": 0.2772925017172544 }, { "epoch": 1.2603342128408093, "grad_norm": 0.695183962294549, "learning_rate": 0.0001399351960536936, "loss": 3.2438132762908936, "step": 2150, "token_acc": 0.27531160705165647 }, { "epoch": 1.2609205511580182, "grad_norm": 0.7604190801224, "learning_rate": 0.0001399349038606011, "loss": 3.244269847869873, "step": 2151, "token_acc": 0.2743884765157339 }, { "epoch": 1.2615068894752273, "grad_norm": 0.6687786812743288, "learning_rate": 0.00013993461101056444, "loss": 3.2383170127868652, "step": 2152, "token_acc": 0.27533110940575195 }, { "epoch": 1.2620932277924362, "grad_norm": 0.7039604666216203, "learning_rate": 0.00013993431750358633, "loss": 3.2438838481903076, "step": 2153, "token_acc": 0.2748859424962873 }, { "epoch": 1.2626795661096453, "grad_norm": 0.5303302765718964, "learning_rate": 0.00013993402333966959, "loss": 3.2546653747558594, "step": 2154, "token_acc": 0.2726827423116382 }, { "epoch": 1.2632659044268544, "grad_norm": 0.7134035170074915, "learning_rate": 0.0001399337285188169, "loss": 3.2302613258361816, "step": 2155, "token_acc": 0.27678849452434356 }, { "epoch": 1.2638522427440633, "grad_norm": 0.7681233256057425, "learning_rate": 0.00013993343304103108, "loss": 3.2539103031158447, "step": 2156, "token_acc": 0.27249049587635293 }, { "epoch": 1.2644385810612724, "grad_norm": 0.6052256364281839, "learning_rate": 0.0001399331369063149, "loss": 3.318514823913574, "step": 2157, "token_acc": 0.2646158045155009 }, { "epoch": 1.2650249193784813, "grad_norm": 0.7278428143331697, "learning_rate": 0.0001399328401146711, "loss": 3.2906060218811035, "step": 2158, "token_acc": 0.2665939524554187 }, { "epoch": 1.2656112576956904, "grad_norm": 0.6841650320078934, "learning_rate": 0.00013993254266610254, "loss": 3.234330654144287, "step": 2159, "token_acc": 0.2754574752541239 }, { "epoch": 1.2661975960128995, "grad_norm": 0.6959107507921208, "learning_rate": 0.00013993224456061197, "loss": 3.236572265625, "step": 2160, "token_acc": 0.2746586085993726 }, { "epoch": 1.2667839343301086, "grad_norm": 0.639403218909042, "learning_rate": 0.0001399319457982022, "loss": 3.2537500858306885, "step": 2161, "token_acc": 0.27302771278772575 }, { "epoch": 1.2673702726473175, "grad_norm": 0.5497072981960509, "learning_rate": 0.000139931646378876, "loss": 3.2242488861083984, "step": 2162, "token_acc": 0.2782415443690495 }, { "epoch": 1.2679566109645266, "grad_norm": 0.7506794427588356, "learning_rate": 0.00013993134630263624, "loss": 3.287839412689209, "step": 2163, "token_acc": 0.267107909070328 }, { "epoch": 1.2685429492817355, "grad_norm": 0.7600482615707181, "learning_rate": 0.00013993104556948572, "loss": 3.2164316177368164, "step": 2164, "token_acc": 0.27654171423246277 }, { "epoch": 1.2691292875989446, "grad_norm": 0.6928488895360626, "learning_rate": 0.00013993074417942725, "loss": 3.219045639038086, "step": 2165, "token_acc": 0.279921189231621 }, { "epoch": 1.2697156259161537, "grad_norm": 0.5433994634145555, "learning_rate": 0.0001399304421324637, "loss": 3.2466506958007812, "step": 2166, "token_acc": 0.2734385088072433 }, { "epoch": 1.2703019642333626, "grad_norm": 0.7280912637355488, "learning_rate": 0.00013993013942859784, "loss": 3.266098976135254, "step": 2167, "token_acc": 0.27145218770599866 }, { "epoch": 1.2708883025505717, "grad_norm": 0.8838009378643746, "learning_rate": 0.0001399298360678326, "loss": 3.2824175357818604, "step": 2168, "token_acc": 0.26815708601226174 }, { "epoch": 1.2714746408677806, "grad_norm": 0.8937601834621671, "learning_rate": 0.00013992953205017075, "loss": 3.280222177505493, "step": 2169, "token_acc": 0.26840512223515717 }, { "epoch": 1.2720609791849897, "grad_norm": 0.951924473822936, "learning_rate": 0.00013992922737561517, "loss": 3.260284423828125, "step": 2170, "token_acc": 0.27293685486911423 }, { "epoch": 1.2726473175021988, "grad_norm": 1.1055187112944074, "learning_rate": 0.0001399289220441687, "loss": 3.2998034954071045, "step": 2171, "token_acc": 0.26654556432572746 }, { "epoch": 1.2732336558194077, "grad_norm": 0.8585618721513086, "learning_rate": 0.0001399286160558343, "loss": 3.2888336181640625, "step": 2172, "token_acc": 0.2689896313690352 }, { "epoch": 1.2738199941366168, "grad_norm": 0.8033663032148733, "learning_rate": 0.00013992830941061477, "loss": 3.2678914070129395, "step": 2173, "token_acc": 0.27175066748369586 }, { "epoch": 1.2744063324538257, "grad_norm": 0.8966734838661492, "learning_rate": 0.00013992800210851298, "loss": 3.25895619392395, "step": 2174, "token_acc": 0.27261749807894364 }, { "epoch": 1.2749926707710348, "grad_norm": 0.7665915285163452, "learning_rate": 0.00013992769414953183, "loss": 3.279198408126831, "step": 2175, "token_acc": 0.26911084106833516 }, { "epoch": 1.275579009088244, "grad_norm": 0.6305990459156104, "learning_rate": 0.00013992738553367427, "loss": 3.263517379760742, "step": 2176, "token_acc": 0.2719003525849745 }, { "epoch": 1.276165347405453, "grad_norm": 0.6599784062293278, "learning_rate": 0.00013992707626094312, "loss": 3.2567808628082275, "step": 2177, "token_acc": 0.2723966975750199 }, { "epoch": 1.276751685722662, "grad_norm": 0.5030147367007866, "learning_rate": 0.00013992676633134134, "loss": 3.2581636905670166, "step": 2178, "token_acc": 0.27204981519166077 }, { "epoch": 1.277338024039871, "grad_norm": 0.5296856079452157, "learning_rate": 0.00013992645574487176, "loss": 3.2410213947296143, "step": 2179, "token_acc": 0.2739711283617431 }, { "epoch": 1.27792436235708, "grad_norm": 0.5666534333246164, "learning_rate": 0.0001399261445015374, "loss": 3.2737512588500977, "step": 2180, "token_acc": 0.2682639806658488 }, { "epoch": 1.278510700674289, "grad_norm": 0.48618000394802513, "learning_rate": 0.00013992583260134112, "loss": 3.261039972305298, "step": 2181, "token_acc": 0.2719647774525198 }, { "epoch": 1.2790970389914982, "grad_norm": 0.527101327377743, "learning_rate": 0.0001399255200442859, "loss": 3.288301944732666, "step": 2182, "token_acc": 0.268849431525081 }, { "epoch": 1.279683377308707, "grad_norm": 0.8100077547456774, "learning_rate": 0.00013992520683037463, "loss": 3.251373052597046, "step": 2183, "token_acc": 0.2743277104698923 }, { "epoch": 1.2802697156259162, "grad_norm": 0.9152355257963801, "learning_rate": 0.00013992489295961026, "loss": 3.2229623794555664, "step": 2184, "token_acc": 0.27634457434965853 }, { "epoch": 1.280856053943125, "grad_norm": 0.8150004593288109, "learning_rate": 0.00013992457843199575, "loss": 3.2767679691314697, "step": 2185, "token_acc": 0.26813752104550215 }, { "epoch": 1.2814423922603342, "grad_norm": 0.9523348961348439, "learning_rate": 0.00013992426324753403, "loss": 3.2204365730285645, "step": 2186, "token_acc": 0.2771839018671254 }, { "epoch": 1.2820287305775433, "grad_norm": 1.0640896897353698, "learning_rate": 0.00013992394740622812, "loss": 3.294424533843994, "step": 2187, "token_acc": 0.2661403860349608 }, { "epoch": 1.2826150688947524, "grad_norm": 0.749922571141057, "learning_rate": 0.00013992363090808093, "loss": 3.2759032249450684, "step": 2188, "token_acc": 0.26952181085321375 }, { "epoch": 1.2832014072119613, "grad_norm": 0.7921172047424397, "learning_rate": 0.00013992331375309544, "loss": 3.232530117034912, "step": 2189, "token_acc": 0.27504480618926236 }, { "epoch": 1.2837877455291704, "grad_norm": 0.9163259784355665, "learning_rate": 0.00013992299594127463, "loss": 3.257448673248291, "step": 2190, "token_acc": 0.273471002620797 }, { "epoch": 1.2843740838463793, "grad_norm": 1.0882052565088902, "learning_rate": 0.00013992267747262152, "loss": 3.254312515258789, "step": 2191, "token_acc": 0.2736796592599754 }, { "epoch": 1.2849604221635884, "grad_norm": 0.9771471248614845, "learning_rate": 0.0001399223583471391, "loss": 3.2629873752593994, "step": 2192, "token_acc": 0.2735318729755063 }, { "epoch": 1.2855467604807975, "grad_norm": 0.8248492949865283, "learning_rate": 0.0001399220385648303, "loss": 3.293623208999634, "step": 2193, "token_acc": 0.26619862365048297 }, { "epoch": 1.2861330987980064, "grad_norm": 0.8763376985350948, "learning_rate": 0.0001399217181256982, "loss": 3.2297704219818115, "step": 2194, "token_acc": 0.27665187517151746 }, { "epoch": 1.2867194371152155, "grad_norm": 0.8743851267136841, "learning_rate": 0.00013992139702974574, "loss": 3.2616500854492188, "step": 2195, "token_acc": 0.272131557971755 }, { "epoch": 1.2873057754324244, "grad_norm": 0.6816364200656553, "learning_rate": 0.000139921075276976, "loss": 3.3285412788391113, "step": 2196, "token_acc": 0.26408010330017684 }, { "epoch": 1.2878921137496335, "grad_norm": 0.7374953897483747, "learning_rate": 0.00013992075286739197, "loss": 3.2469429969787598, "step": 2197, "token_acc": 0.2743279679712395 }, { "epoch": 1.2884784520668426, "grad_norm": 0.6696799842656691, "learning_rate": 0.00013992042980099672, "loss": 3.282231330871582, "step": 2198, "token_acc": 0.27053639200472296 }, { "epoch": 1.2890647903840515, "grad_norm": 0.7366887469516081, "learning_rate": 0.00013992010607779323, "loss": 3.2968177795410156, "step": 2199, "token_acc": 0.2692325320839275 }, { "epoch": 1.2896511287012606, "grad_norm": 0.6935747589713157, "learning_rate": 0.00013991978169778453, "loss": 3.1987712383270264, "step": 2200, "token_acc": 0.280801938683617 }, { "epoch": 1.2902374670184695, "grad_norm": 0.6438118179328719, "learning_rate": 0.00013991945666097375, "loss": 3.2876834869384766, "step": 2201, "token_acc": 0.2694637174139596 }, { "epoch": 1.2908238053356786, "grad_norm": 0.6707368068251847, "learning_rate": 0.00013991913096736384, "loss": 3.2258429527282715, "step": 2202, "token_acc": 0.2751306023899791 }, { "epoch": 1.2914101436528878, "grad_norm": 0.9587532855426293, "learning_rate": 0.00013991880461695797, "loss": 3.187530994415283, "step": 2203, "token_acc": 0.28280909928009373 }, { "epoch": 1.2919964819700969, "grad_norm": 0.8824033870920983, "learning_rate": 0.00013991847760975909, "loss": 3.28365421295166, "step": 2204, "token_acc": 0.271241541038526 }, { "epoch": 1.2925828202873058, "grad_norm": 0.6811511793129583, "learning_rate": 0.00013991814994577037, "loss": 3.2248082160949707, "step": 2205, "token_acc": 0.27781760514424747 }, { "epoch": 1.2931691586045149, "grad_norm": 0.6436003691980726, "learning_rate": 0.00013991782162499483, "loss": 3.1893463134765625, "step": 2206, "token_acc": 0.28056817454731586 }, { "epoch": 1.2937554969217238, "grad_norm": 0.7087569458317957, "learning_rate": 0.00013991749264743558, "loss": 3.2576708793640137, "step": 2207, "token_acc": 0.27270859350817084 }, { "epoch": 1.2943418352389329, "grad_norm": 0.8650500063591986, "learning_rate": 0.00013991716301309568, "loss": 3.255648612976074, "step": 2208, "token_acc": 0.273727862788994 }, { "epoch": 1.294928173556142, "grad_norm": 0.8037243580309142, "learning_rate": 0.00013991683272197828, "loss": 3.2718734741210938, "step": 2209, "token_acc": 0.27259017118312173 }, { "epoch": 1.2955145118733509, "grad_norm": 0.7066036507332567, "learning_rate": 0.00013991650177408644, "loss": 3.2621817588806152, "step": 2210, "token_acc": 0.27049663673034874 }, { "epoch": 1.29610085019056, "grad_norm": 0.6380005805725367, "learning_rate": 0.00013991617016942325, "loss": 3.2806944847106934, "step": 2211, "token_acc": 0.27115543491089344 }, { "epoch": 1.2966871885077689, "grad_norm": 0.6849777060859824, "learning_rate": 0.00013991583790799188, "loss": 3.2411704063415527, "step": 2212, "token_acc": 0.2750044802980471 }, { "epoch": 1.297273526824978, "grad_norm": 0.6873223156311801, "learning_rate": 0.00013991550498979543, "loss": 3.2397704124450684, "step": 2213, "token_acc": 0.2751558923967907 }, { "epoch": 1.297859865142187, "grad_norm": 0.5073235932678809, "learning_rate": 0.00013991517141483703, "loss": 3.2567384243011475, "step": 2214, "token_acc": 0.2716094135558726 }, { "epoch": 1.2984462034593962, "grad_norm": 0.5924135063748301, "learning_rate": 0.00013991483718311977, "loss": 3.2667055130004883, "step": 2215, "token_acc": 0.27239217887547945 }, { "epoch": 1.299032541776605, "grad_norm": 0.7007936114896273, "learning_rate": 0.00013991450229464686, "loss": 3.245711326599121, "step": 2216, "token_acc": 0.2756184664684661 }, { "epoch": 1.2996188800938142, "grad_norm": 0.7035594150635341, "learning_rate": 0.0001399141667494214, "loss": 3.2703652381896973, "step": 2217, "token_acc": 0.2696799164236555 }, { "epoch": 1.300205218411023, "grad_norm": 0.7891022923693524, "learning_rate": 0.00013991383054744655, "loss": 3.2577526569366455, "step": 2218, "token_acc": 0.2723859837041582 }, { "epoch": 1.3007915567282322, "grad_norm": 0.6071154305791292, "learning_rate": 0.00013991349368872548, "loss": 3.2174909114837646, "step": 2219, "token_acc": 0.276816976127321 }, { "epoch": 1.3013778950454413, "grad_norm": 0.6881930191170118, "learning_rate": 0.00013991315617326134, "loss": 3.222494602203369, "step": 2220, "token_acc": 0.2765450554791065 }, { "epoch": 1.3019642333626502, "grad_norm": 0.7950987665635977, "learning_rate": 0.0001399128180010573, "loss": 3.281454086303711, "step": 2221, "token_acc": 0.27102085591111796 }, { "epoch": 1.3025505716798593, "grad_norm": 0.7698706310306992, "learning_rate": 0.00013991247917211657, "loss": 3.229562759399414, "step": 2222, "token_acc": 0.27657702420139535 }, { "epoch": 1.3031369099970682, "grad_norm": 0.5784162569507464, "learning_rate": 0.0001399121396864423, "loss": 3.2224950790405273, "step": 2223, "token_acc": 0.27815520952285727 }, { "epoch": 1.3037232483142773, "grad_norm": 0.6093473439474515, "learning_rate": 0.00013991179954403766, "loss": 3.2346553802490234, "step": 2224, "token_acc": 0.2759812307024187 }, { "epoch": 1.3043095866314864, "grad_norm": 0.8181487706003039, "learning_rate": 0.00013991145874490588, "loss": 3.2413926124572754, "step": 2225, "token_acc": 0.2733392987554866 }, { "epoch": 1.3048959249486953, "grad_norm": 0.8629654250072551, "learning_rate": 0.00013991111728905016, "loss": 3.305009126663208, "step": 2226, "token_acc": 0.266866320101724 }, { "epoch": 1.3054822632659044, "grad_norm": 0.8908262723733779, "learning_rate": 0.00013991077517647372, "loss": 3.266554594039917, "step": 2227, "token_acc": 0.2719248604677983 }, { "epoch": 1.3060686015831133, "grad_norm": 0.8457512147509902, "learning_rate": 0.00013991043240717973, "loss": 3.2323203086853027, "step": 2228, "token_acc": 0.27640771526268265 }, { "epoch": 1.3066549399003224, "grad_norm": 0.9163951833327513, "learning_rate": 0.00013991008898117142, "loss": 3.2162413597106934, "step": 2229, "token_acc": 0.2781150258110202 }, { "epoch": 1.3072412782175316, "grad_norm": 0.7933615059819582, "learning_rate": 0.00013990974489845205, "loss": 3.232268810272217, "step": 2230, "token_acc": 0.27553774326792196 }, { "epoch": 1.3078276165347407, "grad_norm": 0.6735725682753338, "learning_rate": 0.0001399094001590248, "loss": 3.23925518989563, "step": 2231, "token_acc": 0.2760740204520568 }, { "epoch": 1.3084139548519496, "grad_norm": 0.7372256267676556, "learning_rate": 0.000139909054762893, "loss": 3.284658908843994, "step": 2232, "token_acc": 0.26910521000173426 }, { "epoch": 1.3090002931691587, "grad_norm": 0.8957993682206438, "learning_rate": 0.0001399087087100598, "loss": 3.254683017730713, "step": 2233, "token_acc": 0.27298310690384364 }, { "epoch": 1.3095866314863676, "grad_norm": 0.9659274396400697, "learning_rate": 0.00013990836200052846, "loss": 3.2381184101104736, "step": 2234, "token_acc": 0.27497735647090304 }, { "epoch": 1.3101729698035767, "grad_norm": 0.9664935716020944, "learning_rate": 0.0001399080146343023, "loss": 3.264007091522217, "step": 2235, "token_acc": 0.2719191768286989 }, { "epoch": 1.3107593081207858, "grad_norm": 0.7427389549311973, "learning_rate": 0.00013990766661138451, "loss": 3.287780284881592, "step": 2236, "token_acc": 0.2692399822057968 }, { "epoch": 1.3113456464379947, "grad_norm": 0.9116604275183391, "learning_rate": 0.0001399073179317784, "loss": 3.2880234718322754, "step": 2237, "token_acc": 0.26625443731216686 }, { "epoch": 1.3119319847552038, "grad_norm": 1.0219219832465658, "learning_rate": 0.0001399069685954873, "loss": 3.2666263580322266, "step": 2238, "token_acc": 0.2697657705616183 }, { "epoch": 1.3125183230724127, "grad_norm": 0.9070759917080985, "learning_rate": 0.00013990661860251437, "loss": 3.283400774002075, "step": 2239, "token_acc": 0.26856703586332403 }, { "epoch": 1.3131046613896218, "grad_norm": 0.690824365002751, "learning_rate": 0.00013990626795286297, "loss": 3.2271370887756348, "step": 2240, "token_acc": 0.27551172114190103 }, { "epoch": 1.313690999706831, "grad_norm": 0.6005903088295038, "learning_rate": 0.00013990591664653638, "loss": 3.198882579803467, "step": 2241, "token_acc": 0.2797137922020676 }, { "epoch": 1.31427733802404, "grad_norm": 0.7027542094852006, "learning_rate": 0.0001399055646835379, "loss": 3.2818074226379395, "step": 2242, "token_acc": 0.26854223137672467 }, { "epoch": 1.314863676341249, "grad_norm": 0.7891555955411103, "learning_rate": 0.00013990521206387087, "loss": 3.2377758026123047, "step": 2243, "token_acc": 0.2756753256214938 }, { "epoch": 1.315450014658458, "grad_norm": 0.6325728029170314, "learning_rate": 0.00013990485878753853, "loss": 3.2493605613708496, "step": 2244, "token_acc": 0.27247135545913986 }, { "epoch": 1.316036352975667, "grad_norm": 0.5574190799075625, "learning_rate": 0.00013990450485454426, "loss": 3.21128511428833, "step": 2245, "token_acc": 0.279038450211768 }, { "epoch": 1.316622691292876, "grad_norm": 0.6902634736874556, "learning_rate": 0.00013990415026489137, "loss": 3.250037670135498, "step": 2246, "token_acc": 0.27363236299309557 }, { "epoch": 1.3172090296100851, "grad_norm": 0.567815681570269, "learning_rate": 0.00013990379501858317, "loss": 3.2443559169769287, "step": 2247, "token_acc": 0.2739386962152828 }, { "epoch": 1.317795367927294, "grad_norm": 0.621490499052342, "learning_rate": 0.00013990343911562302, "loss": 3.2534451484680176, "step": 2248, "token_acc": 0.27288684605779845 }, { "epoch": 1.3183817062445031, "grad_norm": 0.624033612586763, "learning_rate": 0.00013990308255601425, "loss": 3.217923164367676, "step": 2249, "token_acc": 0.2768168523259721 }, { "epoch": 1.318968044561712, "grad_norm": 0.516788534824758, "learning_rate": 0.00013990272533976022, "loss": 3.248495578765869, "step": 2250, "token_acc": 0.27262019460509423 }, { "epoch": 1.3195543828789211, "grad_norm": 0.6323250216859816, "learning_rate": 0.00013990236746686427, "loss": 3.180589437484741, "step": 2251, "token_acc": 0.2825820524033971 }, { "epoch": 1.3201407211961302, "grad_norm": 0.6770601872728982, "learning_rate": 0.0001399020089373298, "loss": 3.3008813858032227, "step": 2252, "token_acc": 0.26704514000376445 }, { "epoch": 1.3207270595133391, "grad_norm": 0.5900596975308456, "learning_rate": 0.00013990164975116013, "loss": 3.2625155448913574, "step": 2253, "token_acc": 0.2724385521574669 }, { "epoch": 1.3213133978305482, "grad_norm": 0.5370764327915902, "learning_rate": 0.00013990128990835866, "loss": 3.2561521530151367, "step": 2254, "token_acc": 0.2731086822233148 }, { "epoch": 1.3218997361477571, "grad_norm": 0.5898362850696133, "learning_rate": 0.00013990092940892874, "loss": 3.2581562995910645, "step": 2255, "token_acc": 0.2722880857460297 }, { "epoch": 1.3224860744649662, "grad_norm": 0.7490815052290202, "learning_rate": 0.0001399005682528738, "loss": 3.2722625732421875, "step": 2256, "token_acc": 0.2713863299755255 }, { "epoch": 1.3230724127821754, "grad_norm": 0.6981011104548723, "learning_rate": 0.00013990020644019722, "loss": 3.1947450637817383, "step": 2257, "token_acc": 0.2783922531007495 }, { "epoch": 1.3236587510993845, "grad_norm": 0.5768251372899952, "learning_rate": 0.00013989984397090238, "loss": 3.215456485748291, "step": 2258, "token_acc": 0.27762382317365286 }, { "epoch": 1.3242450894165934, "grad_norm": 0.5819599066154665, "learning_rate": 0.00013989948084499273, "loss": 3.2729148864746094, "step": 2259, "token_acc": 0.2707146362251091 }, { "epoch": 1.3248314277338025, "grad_norm": 0.6581855038886422, "learning_rate": 0.0001398991170624716, "loss": 3.233405113220215, "step": 2260, "token_acc": 0.2749104741372609 }, { "epoch": 1.3254177660510114, "grad_norm": 0.7116785490287024, "learning_rate": 0.0001398987526233425, "loss": 3.279329776763916, "step": 2261, "token_acc": 0.2687734610477265 }, { "epoch": 1.3260041043682205, "grad_norm": 0.6626271327531708, "learning_rate": 0.00013989838752760878, "loss": 3.231142520904541, "step": 2262, "token_acc": 0.2738908683335133 }, { "epoch": 1.3265904426854296, "grad_norm": 0.5163449107672403, "learning_rate": 0.0001398980217752739, "loss": 3.2220101356506348, "step": 2263, "token_acc": 0.27688355266607245 }, { "epoch": 1.3271767810026385, "grad_norm": 0.5105379548865576, "learning_rate": 0.0001398976553663413, "loss": 3.212674140930176, "step": 2264, "token_acc": 0.2791656201668231 }, { "epoch": 1.3277631193198476, "grad_norm": 0.597500451973825, "learning_rate": 0.00013989728830081442, "loss": 3.259384870529175, "step": 2265, "token_acc": 0.2724031226533482 }, { "epoch": 1.3283494576370565, "grad_norm": 0.6248314555747955, "learning_rate": 0.0001398969205786967, "loss": 3.2520155906677246, "step": 2266, "token_acc": 0.2724840644010078 }, { "epoch": 1.3289357959542656, "grad_norm": 0.5753221944334733, "learning_rate": 0.0001398965521999916, "loss": 3.2315878868103027, "step": 2267, "token_acc": 0.2770375448435116 }, { "epoch": 1.3295221342714747, "grad_norm": 0.5972267405912591, "learning_rate": 0.00013989618316470257, "loss": 3.223360061645508, "step": 2268, "token_acc": 0.2743675983556913 }, { "epoch": 1.3301084725886836, "grad_norm": 0.7280199036509593, "learning_rate": 0.0001398958134728331, "loss": 3.2836251258850098, "step": 2269, "token_acc": 0.268682654627349 }, { "epoch": 1.3306948109058927, "grad_norm": 0.6239475833475651, "learning_rate": 0.00013989544312438665, "loss": 3.2127394676208496, "step": 2270, "token_acc": 0.2777651083238312 }, { "epoch": 1.3312811492231018, "grad_norm": 0.5717660346157789, "learning_rate": 0.00013989507211936667, "loss": 3.2350687980651855, "step": 2271, "token_acc": 0.2741548650819016 }, { "epoch": 1.3318674875403107, "grad_norm": 0.689603580240118, "learning_rate": 0.0001398947004577767, "loss": 3.2244174480438232, "step": 2272, "token_acc": 0.27596050115890325 }, { "epoch": 1.3324538258575198, "grad_norm": 0.7110980711380444, "learning_rate": 0.00013989432813962018, "loss": 3.256523609161377, "step": 2273, "token_acc": 0.2724349864536098 }, { "epoch": 1.333040164174729, "grad_norm": 0.5711239219723008, "learning_rate": 0.00013989395516490066, "loss": 3.267484188079834, "step": 2274, "token_acc": 0.27222280970760854 }, { "epoch": 1.3336265024919378, "grad_norm": 0.5839717509114866, "learning_rate": 0.0001398935815336216, "loss": 3.2843594551086426, "step": 2275, "token_acc": 0.26760402288542473 }, { "epoch": 1.334212840809147, "grad_norm": 0.647329681353092, "learning_rate": 0.00013989320724578651, "loss": 3.282942533493042, "step": 2276, "token_acc": 0.2695252760558561 }, { "epoch": 1.3347991791263558, "grad_norm": 0.605308842065589, "learning_rate": 0.00013989283230139894, "loss": 3.2511353492736816, "step": 2277, "token_acc": 0.2724982940633404 }, { "epoch": 1.335385517443565, "grad_norm": 0.6458725221551095, "learning_rate": 0.00013989245670046238, "loss": 3.1752777099609375, "step": 2278, "token_acc": 0.28377980894783866 }, { "epoch": 1.335971855760774, "grad_norm": 0.6230641430460644, "learning_rate": 0.0001398920804429804, "loss": 3.2585110664367676, "step": 2279, "token_acc": 0.2709860261083655 }, { "epoch": 1.336558194077983, "grad_norm": 0.7087307441032221, "learning_rate": 0.00013989170352895648, "loss": 3.2426064014434814, "step": 2280, "token_acc": 0.27449025706462504 }, { "epoch": 1.337144532395192, "grad_norm": 0.8242285848711308, "learning_rate": 0.00013989132595839418, "loss": 3.2599806785583496, "step": 2281, "token_acc": 0.27169661015607893 }, { "epoch": 1.337730870712401, "grad_norm": 1.1412306922766946, "learning_rate": 0.00013989094773129705, "loss": 3.186037540435791, "step": 2282, "token_acc": 0.2826199491797549 }, { "epoch": 1.33831720902961, "grad_norm": 0.9834134779009901, "learning_rate": 0.00013989056884766867, "loss": 3.2204337120056152, "step": 2283, "token_acc": 0.27580652916005577 }, { "epoch": 1.3389035473468192, "grad_norm": 0.7613876138471651, "learning_rate": 0.00013989018930751253, "loss": 3.2575087547302246, "step": 2284, "token_acc": 0.2708401408469113 }, { "epoch": 1.3394898856640283, "grad_norm": 0.6574144687255081, "learning_rate": 0.0001398898091108323, "loss": 3.2546191215515137, "step": 2285, "token_acc": 0.2717124869048709 }, { "epoch": 1.3400762239812372, "grad_norm": 0.6381101187137609, "learning_rate": 0.00013988942825763145, "loss": 3.349344253540039, "step": 2286, "token_acc": 0.2600196950962172 }, { "epoch": 1.3406625622984463, "grad_norm": 0.8080468526471691, "learning_rate": 0.00013988904674791362, "loss": 3.203268527984619, "step": 2287, "token_acc": 0.2799437167390296 }, { "epoch": 1.3412489006156552, "grad_norm": 0.8668917357778302, "learning_rate": 0.00013988866458168234, "loss": 3.2792110443115234, "step": 2288, "token_acc": 0.2674151991490368 }, { "epoch": 1.3418352389328643, "grad_norm": 0.6711100042152606, "learning_rate": 0.00013988828175894128, "loss": 3.2638742923736572, "step": 2289, "token_acc": 0.27239694471706005 }, { "epoch": 1.3424215772500734, "grad_norm": 0.580276376009243, "learning_rate": 0.00013988789827969395, "loss": 3.232861280441284, "step": 2290, "token_acc": 0.2746714289847373 }, { "epoch": 1.3430079155672823, "grad_norm": 0.5652726388790901, "learning_rate": 0.000139887514143944, "loss": 3.240216016769409, "step": 2291, "token_acc": 0.27436996642973094 }, { "epoch": 1.3435942538844914, "grad_norm": 0.7195392428590845, "learning_rate": 0.00013988712935169504, "loss": 3.2431468963623047, "step": 2292, "token_acc": 0.2722858337241787 }, { "epoch": 1.3441805922017003, "grad_norm": 0.8354717541266719, "learning_rate": 0.00013988674390295064, "loss": 3.2693238258361816, "step": 2293, "token_acc": 0.2717522861753631 }, { "epoch": 1.3447669305189094, "grad_norm": 0.6683114300036848, "learning_rate": 0.0001398863577977145, "loss": 3.2912442684173584, "step": 2294, "token_acc": 0.2675400232413343 }, { "epoch": 1.3453532688361185, "grad_norm": 0.548183158486377, "learning_rate": 0.00013988597103599016, "loss": 3.1789822578430176, "step": 2295, "token_acc": 0.2833757828023111 }, { "epoch": 1.3459396071533274, "grad_norm": 0.6354089163628652, "learning_rate": 0.00013988558361778135, "loss": 3.2483458518981934, "step": 2296, "token_acc": 0.2726482912797258 }, { "epoch": 1.3465259454705365, "grad_norm": 0.49226274844270673, "learning_rate": 0.00013988519554309159, "loss": 3.254448652267456, "step": 2297, "token_acc": 0.27245641278137583 }, { "epoch": 1.3471122837877456, "grad_norm": 0.5895147834176369, "learning_rate": 0.00013988480681192465, "loss": 3.2330968379974365, "step": 2298, "token_acc": 0.27545101993126064 }, { "epoch": 1.3476986221049545, "grad_norm": 0.49747834714724776, "learning_rate": 0.00013988441742428408, "loss": 3.29148530960083, "step": 2299, "token_acc": 0.26753565688902187 }, { "epoch": 1.3482849604221636, "grad_norm": 0.5601327549414155, "learning_rate": 0.00013988402738017357, "loss": 3.2296857833862305, "step": 2300, "token_acc": 0.27627369714925065 }, { "epoch": 1.3488712987393727, "grad_norm": 0.596455531898281, "learning_rate": 0.00013988363667959684, "loss": 3.2047741413116455, "step": 2301, "token_acc": 0.2790164573705095 }, { "epoch": 1.3494576370565816, "grad_norm": 0.5732680965642541, "learning_rate": 0.00013988324532255748, "loss": 3.205781936645508, "step": 2302, "token_acc": 0.2773472345372769 }, { "epoch": 1.3500439753737907, "grad_norm": 0.5868920884054873, "learning_rate": 0.0001398828533090592, "loss": 3.232273578643799, "step": 2303, "token_acc": 0.27477729808316115 }, { "epoch": 1.3506303136909996, "grad_norm": 0.8164791487132735, "learning_rate": 0.00013988246063910566, "loss": 3.216388702392578, "step": 2304, "token_acc": 0.2766196199030835 }, { "epoch": 1.3512166520082087, "grad_norm": 1.0348367988049, "learning_rate": 0.0001398820673127006, "loss": 3.2795491218566895, "step": 2305, "token_acc": 0.2686818581098538 }, { "epoch": 1.3518029903254178, "grad_norm": 0.8079211873004316, "learning_rate": 0.0001398816733298477, "loss": 3.2118287086486816, "step": 2306, "token_acc": 0.27925875729774813 }, { "epoch": 1.3523893286426267, "grad_norm": 0.7937759856415924, "learning_rate": 0.00013988127869055063, "loss": 3.2431528568267822, "step": 2307, "token_acc": 0.2752873104946824 }, { "epoch": 1.3529756669598358, "grad_norm": 0.8032954254740285, "learning_rate": 0.0001398808833948131, "loss": 3.32612681388855, "step": 2308, "token_acc": 0.26353204990577067 }, { "epoch": 1.3535620052770447, "grad_norm": 0.7511147661094716, "learning_rate": 0.00013988048744263885, "loss": 3.2986321449279785, "step": 2309, "token_acc": 0.2661949105907369 }, { "epoch": 1.3541483435942538, "grad_norm": 0.5526329316514107, "learning_rate": 0.0001398800908340316, "loss": 3.2262182235717773, "step": 2310, "token_acc": 0.27626612121975225 }, { "epoch": 1.354734681911463, "grad_norm": 0.609129984802882, "learning_rate": 0.00013987969356899502, "loss": 3.233227252960205, "step": 2311, "token_acc": 0.27597269106174366 }, { "epoch": 1.355321020228672, "grad_norm": 0.5599129746364312, "learning_rate": 0.0001398792956475329, "loss": 3.2175755500793457, "step": 2312, "token_acc": 0.27763282641527753 }, { "epoch": 1.355907358545881, "grad_norm": 0.567176584633, "learning_rate": 0.00013987889706964897, "loss": 3.221271276473999, "step": 2313, "token_acc": 0.2751948760171512 }, { "epoch": 1.35649369686309, "grad_norm": 0.6933566821265675, "learning_rate": 0.00013987849783534697, "loss": 3.273721218109131, "step": 2314, "token_acc": 0.2692191581240894 }, { "epoch": 1.357080035180299, "grad_norm": 0.5323468311091328, "learning_rate": 0.00013987809794463064, "loss": 3.213636636734009, "step": 2315, "token_acc": 0.2771746696263823 }, { "epoch": 1.357666373497508, "grad_norm": 0.5046379123632694, "learning_rate": 0.00013987769739750374, "loss": 3.231790542602539, "step": 2316, "token_acc": 0.27432450348686127 }, { "epoch": 1.3582527118147172, "grad_norm": 0.6431027410514616, "learning_rate": 0.00013987729619397004, "loss": 3.2239303588867188, "step": 2317, "token_acc": 0.2743623516245087 }, { "epoch": 1.358839050131926, "grad_norm": 0.5619226316411767, "learning_rate": 0.00013987689433403328, "loss": 3.2327990531921387, "step": 2318, "token_acc": 0.2729363313894066 }, { "epoch": 1.3594253884491352, "grad_norm": 0.5748305354267821, "learning_rate": 0.00013987649181769729, "loss": 3.2189276218414307, "step": 2319, "token_acc": 0.2779763506155758 }, { "epoch": 1.360011726766344, "grad_norm": 0.69330126014728, "learning_rate": 0.00013987608864496578, "loss": 3.1874217987060547, "step": 2320, "token_acc": 0.28190465745984744 }, { "epoch": 1.3605980650835532, "grad_norm": 0.6851989188654681, "learning_rate": 0.0001398756848158426, "loss": 3.2739462852478027, "step": 2321, "token_acc": 0.2702360996310058 }, { "epoch": 1.3611844034007623, "grad_norm": 0.5630397555433547, "learning_rate": 0.00013987528033033154, "loss": 3.2431530952453613, "step": 2322, "token_acc": 0.2737073383548755 }, { "epoch": 1.3617707417179712, "grad_norm": 0.4886874592968571, "learning_rate": 0.00013987487518843635, "loss": 3.252169609069824, "step": 2323, "token_acc": 0.2731774366023095 }, { "epoch": 1.3623570800351803, "grad_norm": 0.6279518013932495, "learning_rate": 0.00013987446939016086, "loss": 3.2444815635681152, "step": 2324, "token_acc": 0.2743564696253663 }, { "epoch": 1.3629434183523892, "grad_norm": 0.5291448242790368, "learning_rate": 0.0001398740629355089, "loss": 3.2627408504486084, "step": 2325, "token_acc": 0.2722443280244412 }, { "epoch": 1.3635297566695983, "grad_norm": 0.5696048888343826, "learning_rate": 0.00013987365582448429, "loss": 3.269171714782715, "step": 2326, "token_acc": 0.2705178228953112 }, { "epoch": 1.3641160949868074, "grad_norm": 0.6442621176747142, "learning_rate": 0.0001398732480570908, "loss": 3.2238848209381104, "step": 2327, "token_acc": 0.2756909300661825 }, { "epoch": 1.3647024333040165, "grad_norm": 0.7648645101616475, "learning_rate": 0.00013987283963333235, "loss": 3.2791032791137695, "step": 2328, "token_acc": 0.2665929699681556 }, { "epoch": 1.3652887716212254, "grad_norm": 0.5748802047665874, "learning_rate": 0.0001398724305532127, "loss": 3.210606336593628, "step": 2329, "token_acc": 0.2779026005408796 }, { "epoch": 1.3658751099384345, "grad_norm": 0.5823731082802434, "learning_rate": 0.0001398720208167357, "loss": 3.258233070373535, "step": 2330, "token_acc": 0.2721549460359101 }, { "epoch": 1.3664614482556434, "grad_norm": 0.8683458738525525, "learning_rate": 0.00013987161042390526, "loss": 3.2206506729125977, "step": 2331, "token_acc": 0.27505782575173476 }, { "epoch": 1.3670477865728525, "grad_norm": 0.8662192119290943, "learning_rate": 0.00013987119937472516, "loss": 3.2621169090270996, "step": 2332, "token_acc": 0.2720903830371868 }, { "epoch": 1.3676341248900616, "grad_norm": 0.754262797356459, "learning_rate": 0.00013987078766919932, "loss": 3.264309883117676, "step": 2333, "token_acc": 0.27255841560853805 }, { "epoch": 1.3682204632072705, "grad_norm": 0.5130684829104256, "learning_rate": 0.00013987037530733157, "loss": 3.231562614440918, "step": 2334, "token_acc": 0.27413996061476176 }, { "epoch": 1.3688068015244796, "grad_norm": 0.7270047738552735, "learning_rate": 0.00013986996228912578, "loss": 3.208850860595703, "step": 2335, "token_acc": 0.2774473342494421 }, { "epoch": 1.3693931398416885, "grad_norm": 0.7999520139967754, "learning_rate": 0.00013986954861458587, "loss": 3.2066574096679688, "step": 2336, "token_acc": 0.27890765906050446 }, { "epoch": 1.3699794781588976, "grad_norm": 0.6849005162227756, "learning_rate": 0.0001398691342837157, "loss": 3.2388978004455566, "step": 2337, "token_acc": 0.27523591025519156 }, { "epoch": 1.3705658164761068, "grad_norm": 0.7317238359187801, "learning_rate": 0.00013986871929651913, "loss": 3.241654872894287, "step": 2338, "token_acc": 0.27437202061713833 }, { "epoch": 1.3711521547933159, "grad_norm": 0.7116512988719098, "learning_rate": 0.00013986830365300012, "loss": 3.2158799171447754, "step": 2339, "token_acc": 0.2773791952769043 }, { "epoch": 1.3717384931105248, "grad_norm": 0.686242397338959, "learning_rate": 0.00013986788735316255, "loss": 3.2340025901794434, "step": 2340, "token_acc": 0.2767342364751287 }, { "epoch": 1.3723248314277339, "grad_norm": 0.5730839072840935, "learning_rate": 0.00013986747039701033, "loss": 3.2664332389831543, "step": 2341, "token_acc": 0.2733342807856667 }, { "epoch": 1.3729111697449428, "grad_norm": 0.5320028174080119, "learning_rate": 0.00013986705278454736, "loss": 3.2581794261932373, "step": 2342, "token_acc": 0.2737702095691234 }, { "epoch": 1.3734975080621519, "grad_norm": 0.52884286054953, "learning_rate": 0.00013986663451577756, "loss": 3.224905490875244, "step": 2343, "token_acc": 0.2765184988604932 }, { "epoch": 1.374083846379361, "grad_norm": 0.6029620501484472, "learning_rate": 0.0001398662155907049, "loss": 3.223026752471924, "step": 2344, "token_acc": 0.27664117615380474 }, { "epoch": 1.3746701846965699, "grad_norm": 0.4965551290674581, "learning_rate": 0.0001398657960093333, "loss": 3.2916502952575684, "step": 2345, "token_acc": 0.26645371879049906 }, { "epoch": 1.375256523013779, "grad_norm": 0.48638189298899687, "learning_rate": 0.00013986537577166666, "loss": 3.1991796493530273, "step": 2346, "token_acc": 0.2798389377791772 }, { "epoch": 1.3758428613309879, "grad_norm": 0.41998949183780476, "learning_rate": 0.00013986495487770898, "loss": 3.2356410026550293, "step": 2347, "token_acc": 0.27565120585861036 }, { "epoch": 1.376429199648197, "grad_norm": 0.5383890677692373, "learning_rate": 0.00013986453332746418, "loss": 3.23232102394104, "step": 2348, "token_acc": 0.2737869522258125 }, { "epoch": 1.377015537965406, "grad_norm": 0.6115098432933126, "learning_rate": 0.00013986411112093625, "loss": 3.2500758171081543, "step": 2349, "token_acc": 0.2716582607213404 }, { "epoch": 1.377601876282615, "grad_norm": 0.5948083199990917, "learning_rate": 0.00013986368825812912, "loss": 3.1810178756713867, "step": 2350, "token_acc": 0.28305878917397825 }, { "epoch": 1.378188214599824, "grad_norm": 0.5273200057222567, "learning_rate": 0.0001398632647390468, "loss": 3.279315233230591, "step": 2351, "token_acc": 0.2702025776880703 }, { "epoch": 1.378774552917033, "grad_norm": 0.5346339655896671, "learning_rate": 0.00013986284056369323, "loss": 3.2478885650634766, "step": 2352, "token_acc": 0.27174627448002453 }, { "epoch": 1.379360891234242, "grad_norm": 0.5745821107767187, "learning_rate": 0.00013986241573207242, "loss": 3.2705845832824707, "step": 2353, "token_acc": 0.2701598598429237 }, { "epoch": 1.3799472295514512, "grad_norm": 0.5074410359556256, "learning_rate": 0.00013986199024418835, "loss": 3.2543606758117676, "step": 2354, "token_acc": 0.27121840814786874 }, { "epoch": 1.3805335678686603, "grad_norm": 0.5771362375406369, "learning_rate": 0.00013986156410004504, "loss": 3.246710777282715, "step": 2355, "token_acc": 0.2740377202679148 }, { "epoch": 1.3811199061858692, "grad_norm": 0.7245425071904756, "learning_rate": 0.00013986113729964647, "loss": 3.2576050758361816, "step": 2356, "token_acc": 0.27226459815808995 }, { "epoch": 1.3817062445030783, "grad_norm": 0.8024161952602262, "learning_rate": 0.00013986070984299664, "loss": 3.2356467247009277, "step": 2357, "token_acc": 0.2748091603053435 }, { "epoch": 1.3822925828202872, "grad_norm": 0.9561597859494859, "learning_rate": 0.00013986028173009962, "loss": 3.191582441329956, "step": 2358, "token_acc": 0.2803484942236536 }, { "epoch": 1.3828789211374963, "grad_norm": 0.8378281243184932, "learning_rate": 0.00013985985296095934, "loss": 3.25502610206604, "step": 2359, "token_acc": 0.2713004012642717 }, { "epoch": 1.3834652594547054, "grad_norm": 0.5274182033306564, "learning_rate": 0.0001398594235355799, "loss": 3.230663776397705, "step": 2360, "token_acc": 0.27262771680482173 }, { "epoch": 1.3840515977719143, "grad_norm": 0.6439696068737398, "learning_rate": 0.0001398589934539653, "loss": 3.278341293334961, "step": 2361, "token_acc": 0.26932425369536866 }, { "epoch": 1.3846379360891234, "grad_norm": 0.8136282321210115, "learning_rate": 0.0001398585627161196, "loss": 3.2586827278137207, "step": 2362, "token_acc": 0.2712596000484279 }, { "epoch": 1.3852242744063323, "grad_norm": 0.5969839917794962, "learning_rate": 0.00013985813132204685, "loss": 3.21920108795166, "step": 2363, "token_acc": 0.275329395729214 }, { "epoch": 1.3858106127235414, "grad_norm": 0.5799498682965233, "learning_rate": 0.00013985769927175108, "loss": 3.251772403717041, "step": 2364, "token_acc": 0.27124433128712977 }, { "epoch": 1.3863969510407506, "grad_norm": 0.6495105022734131, "learning_rate": 0.0001398572665652364, "loss": 3.2348406314849854, "step": 2365, "token_acc": 0.2733937873818252 }, { "epoch": 1.3869832893579597, "grad_norm": 0.45734174272460987, "learning_rate": 0.0001398568332025068, "loss": 3.264463424682617, "step": 2366, "token_acc": 0.2700660409605294 }, { "epoch": 1.3875696276751686, "grad_norm": 0.6120249163077733, "learning_rate": 0.00013985639918356638, "loss": 3.2258410453796387, "step": 2367, "token_acc": 0.27597315223757735 }, { "epoch": 1.3881559659923777, "grad_norm": 0.7254751182018163, "learning_rate": 0.0001398559645084192, "loss": 3.2677292823791504, "step": 2368, "token_acc": 0.27128922804816924 }, { "epoch": 1.3887423043095866, "grad_norm": 0.522960436691094, "learning_rate": 0.00013985552917706941, "loss": 3.252495527267456, "step": 2369, "token_acc": 0.2712597831635689 }, { "epoch": 1.3893286426267957, "grad_norm": 0.5916230230610566, "learning_rate": 0.00013985509318952102, "loss": 3.244598388671875, "step": 2370, "token_acc": 0.27390663261275966 }, { "epoch": 1.3899149809440048, "grad_norm": 0.6306482480318539, "learning_rate": 0.0001398546565457782, "loss": 3.1578688621520996, "step": 2371, "token_acc": 0.284473575745248 }, { "epoch": 1.3905013192612137, "grad_norm": 0.6098504752027983, "learning_rate": 0.000139854219245845, "loss": 3.2467424869537354, "step": 2372, "token_acc": 0.2719936294227046 }, { "epoch": 1.3910876575784228, "grad_norm": 0.4964983693689211, "learning_rate": 0.00013985378128972552, "loss": 3.194394111633301, "step": 2373, "token_acc": 0.2783039528400571 }, { "epoch": 1.3916739958956317, "grad_norm": 0.5475183553676816, "learning_rate": 0.0001398533426774239, "loss": 3.222277879714966, "step": 2374, "token_acc": 0.27405161952056706 }, { "epoch": 1.3922603342128408, "grad_norm": 0.7150686010433159, "learning_rate": 0.00013985290340894427, "loss": 3.251976490020752, "step": 2375, "token_acc": 0.27342810913894117 }, { "epoch": 1.39284667253005, "grad_norm": 0.6562257249720154, "learning_rate": 0.00013985246348429073, "loss": 3.2200140953063965, "step": 2376, "token_acc": 0.2769852347793629 }, { "epoch": 1.3934330108472588, "grad_norm": 0.5009438376180354, "learning_rate": 0.00013985202290346741, "loss": 3.2666430473327637, "step": 2377, "token_acc": 0.2698519758012614 }, { "epoch": 1.394019349164468, "grad_norm": 0.6719709243952812, "learning_rate": 0.0001398515816664785, "loss": 3.246985912322998, "step": 2378, "token_acc": 0.2732994111673017 }, { "epoch": 1.3946056874816768, "grad_norm": 0.7914211460824498, "learning_rate": 0.00013985113977332806, "loss": 3.221771240234375, "step": 2379, "token_acc": 0.2772318972476309 }, { "epoch": 1.395192025798886, "grad_norm": 0.9393115698433752, "learning_rate": 0.0001398506972240203, "loss": 3.2842864990234375, "step": 2380, "token_acc": 0.26903255319542757 }, { "epoch": 1.395778364116095, "grad_norm": 0.9724057797178768, "learning_rate": 0.00013985025401855937, "loss": 3.301084280014038, "step": 2381, "token_acc": 0.2644298739615482 }, { "epoch": 1.3963647024333041, "grad_norm": 0.6474235003597507, "learning_rate": 0.00013984981015694942, "loss": 3.2261769771575928, "step": 2382, "token_acc": 0.2755207770328167 }, { "epoch": 1.396951040750513, "grad_norm": 0.7111312822339051, "learning_rate": 0.00013984936563919464, "loss": 3.252040386199951, "step": 2383, "token_acc": 0.27120888652539027 }, { "epoch": 1.3975373790677221, "grad_norm": 0.6062412462742552, "learning_rate": 0.0001398489204652992, "loss": 3.269176483154297, "step": 2384, "token_acc": 0.2699137787876312 }, { "epoch": 1.398123717384931, "grad_norm": 0.6596948096515953, "learning_rate": 0.00013984847463526727, "loss": 3.2472195625305176, "step": 2385, "token_acc": 0.2737935586061246 }, { "epoch": 1.3987100557021401, "grad_norm": 0.5952703158323261, "learning_rate": 0.000139848028149103, "loss": 3.258357048034668, "step": 2386, "token_acc": 0.27197776155156733 }, { "epoch": 1.3992963940193492, "grad_norm": 0.6740266715112851, "learning_rate": 0.0001398475810068107, "loss": 3.270463466644287, "step": 2387, "token_acc": 0.26961353915194985 }, { "epoch": 1.3998827323365581, "grad_norm": 0.6004534191619872, "learning_rate": 0.00013984713320839445, "loss": 3.249237537384033, "step": 2388, "token_acc": 0.2750294104369209 }, { "epoch": 1.4004690706537672, "grad_norm": 0.6206680266876937, "learning_rate": 0.00013984668475385852, "loss": 3.2286877632141113, "step": 2389, "token_acc": 0.27423864938450704 }, { "epoch": 1.4010554089709761, "grad_norm": 0.5738526176746466, "learning_rate": 0.0001398462356432071, "loss": 3.224648952484131, "step": 2390, "token_acc": 0.27637327699376435 }, { "epoch": 1.4016417472881852, "grad_norm": 0.5407964034418798, "learning_rate": 0.00013984578587644442, "loss": 3.2323837280273438, "step": 2391, "token_acc": 0.2739146905467192 }, { "epoch": 1.4022280856053944, "grad_norm": 0.5673861611556864, "learning_rate": 0.00013984533545357468, "loss": 3.258605480194092, "step": 2392, "token_acc": 0.27303377317909666 }, { "epoch": 1.4028144239226035, "grad_norm": 0.6688576029284901, "learning_rate": 0.00013984488437460214, "loss": 3.2233543395996094, "step": 2393, "token_acc": 0.27544254390404044 }, { "epoch": 1.4034007622398124, "grad_norm": 0.6303158626622627, "learning_rate": 0.00013984443263953105, "loss": 3.2401535511016846, "step": 2394, "token_acc": 0.27658400634068764 }, { "epoch": 1.4039871005570215, "grad_norm": 0.529217700035354, "learning_rate": 0.00013984398024836562, "loss": 3.201978921890259, "step": 2395, "token_acc": 0.2781445856654812 }, { "epoch": 1.4045734388742304, "grad_norm": 0.56607946532355, "learning_rate": 0.00013984352720111012, "loss": 3.2326149940490723, "step": 2396, "token_acc": 0.27355680344197375 }, { "epoch": 1.4051597771914395, "grad_norm": 0.512343779762048, "learning_rate": 0.00013984307349776878, "loss": 3.2144575119018555, "step": 2397, "token_acc": 0.27770261504531385 }, { "epoch": 1.4057461155086486, "grad_norm": 0.6349051206000139, "learning_rate": 0.0001398426191383459, "loss": 3.215420722961426, "step": 2398, "token_acc": 0.27814626881180404 }, { "epoch": 1.4063324538258575, "grad_norm": 0.703018170398127, "learning_rate": 0.0001398421641228457, "loss": 3.2789053916931152, "step": 2399, "token_acc": 0.2683592564198871 }, { "epoch": 1.4069187921430666, "grad_norm": 0.7951526408804213, "learning_rate": 0.0001398417084512725, "loss": 3.250777244567871, "step": 2400, "token_acc": 0.2720056727380486 }, { "epoch": 1.4075051304602755, "grad_norm": 0.7735204236643429, "learning_rate": 0.00013984125212363054, "loss": 3.2899956703186035, "step": 2401, "token_acc": 0.2664959626817427 }, { "epoch": 1.4080914687774846, "grad_norm": 0.6762058240514218, "learning_rate": 0.00013984079513992416, "loss": 3.2738871574401855, "step": 2402, "token_acc": 0.26916248301042695 }, { "epoch": 1.4086778070946937, "grad_norm": 0.6554651196168081, "learning_rate": 0.0001398403375001576, "loss": 3.228583812713623, "step": 2403, "token_acc": 0.27528303819275746 }, { "epoch": 1.4092641454119026, "grad_norm": 0.7460082060750334, "learning_rate": 0.00013983987920433518, "loss": 3.2071847915649414, "step": 2404, "token_acc": 0.2807932326334222 }, { "epoch": 1.4098504837291117, "grad_norm": 0.5417904242913776, "learning_rate": 0.00013983942025246122, "loss": 3.222723960876465, "step": 2405, "token_acc": 0.2740727833109164 }, { "epoch": 1.4104368220463206, "grad_norm": 0.6641558497790798, "learning_rate": 0.00013983896064454003, "loss": 3.243206739425659, "step": 2406, "token_acc": 0.273592803473468 }, { "epoch": 1.4110231603635297, "grad_norm": 0.5861446436590138, "learning_rate": 0.00013983850038057588, "loss": 3.2592337131500244, "step": 2407, "token_acc": 0.2714180917646936 }, { "epoch": 1.4116094986807388, "grad_norm": 0.710919946430669, "learning_rate": 0.00013983803946057314, "loss": 3.22426176071167, "step": 2408, "token_acc": 0.276631834649132 }, { "epoch": 1.412195836997948, "grad_norm": 0.8212010231974327, "learning_rate": 0.00013983757788453612, "loss": 3.2760419845581055, "step": 2409, "token_acc": 0.26962649608980893 }, { "epoch": 1.4127821753151568, "grad_norm": 0.7552492593883318, "learning_rate": 0.00013983711565246918, "loss": 3.2396395206451416, "step": 2410, "token_acc": 0.27469004363092897 }, { "epoch": 1.413368513632366, "grad_norm": 0.641132158730426, "learning_rate": 0.00013983665276437663, "loss": 3.1882057189941406, "step": 2411, "token_acc": 0.28013891914884786 }, { "epoch": 1.4139548519495748, "grad_norm": 0.5501504600250186, "learning_rate": 0.00013983618922026284, "loss": 3.243659019470215, "step": 2412, "token_acc": 0.27248790546435087 }, { "epoch": 1.414541190266784, "grad_norm": 0.4872316805940302, "learning_rate": 0.00013983572502013217, "loss": 3.2369706630706787, "step": 2413, "token_acc": 0.2750343893736835 }, { "epoch": 1.415127528583993, "grad_norm": 0.6545925367804918, "learning_rate": 0.00013983526016398895, "loss": 3.202843189239502, "step": 2414, "token_acc": 0.27963820229860514 }, { "epoch": 1.415713866901202, "grad_norm": 0.6920926122356945, "learning_rate": 0.00013983479465183755, "loss": 3.2559728622436523, "step": 2415, "token_acc": 0.2708508187064386 }, { "epoch": 1.416300205218411, "grad_norm": 0.6317835047164144, "learning_rate": 0.0001398343284836824, "loss": 3.2196531295776367, "step": 2416, "token_acc": 0.2762013747396038 }, { "epoch": 1.41688654353562, "grad_norm": 0.601052014115884, "learning_rate": 0.00013983386165952781, "loss": 3.224344253540039, "step": 2417, "token_acc": 0.2764773682744379 }, { "epoch": 1.417472881852829, "grad_norm": 0.5588230081471142, "learning_rate": 0.0001398333941793782, "loss": 3.230104923248291, "step": 2418, "token_acc": 0.2758006405657887 }, { "epoch": 1.4180592201700382, "grad_norm": 0.4828314834208616, "learning_rate": 0.00013983292604323794, "loss": 3.266181468963623, "step": 2419, "token_acc": 0.2693253415399971 }, { "epoch": 1.4186455584872473, "grad_norm": 0.511727736689772, "learning_rate": 0.00013983245725111146, "loss": 3.2578468322753906, "step": 2420, "token_acc": 0.2722493178808299 }, { "epoch": 1.4192318968044562, "grad_norm": 0.5166091308562472, "learning_rate": 0.00013983198780300311, "loss": 3.254639148712158, "step": 2421, "token_acc": 0.2705831903945112 }, { "epoch": 1.4198182351216653, "grad_norm": 0.5700928412157392, "learning_rate": 0.00013983151769891738, "loss": 3.214261770248413, "step": 2422, "token_acc": 0.276788647427155 }, { "epoch": 1.4204045734388742, "grad_norm": 0.6366111276433738, "learning_rate": 0.00013983104693885863, "loss": 3.2175686359405518, "step": 2423, "token_acc": 0.2763402208487419 }, { "epoch": 1.4209909117560833, "grad_norm": 0.5983932926702524, "learning_rate": 0.0001398305755228313, "loss": 3.1876025199890137, "step": 2424, "token_acc": 0.2799941676941416 }, { "epoch": 1.4215772500732924, "grad_norm": 0.47244505946272425, "learning_rate": 0.00013983010345083977, "loss": 3.1966333389282227, "step": 2425, "token_acc": 0.28019370843449803 }, { "epoch": 1.4221635883905013, "grad_norm": 0.5461328205963042, "learning_rate": 0.00013982963072288856, "loss": 3.237764358520508, "step": 2426, "token_acc": 0.2748588952968515 }, { "epoch": 1.4227499267077104, "grad_norm": 0.6377304766389197, "learning_rate": 0.00013982915733898202, "loss": 3.253542900085449, "step": 2427, "token_acc": 0.27375972870657517 }, { "epoch": 1.4233362650249193, "grad_norm": 0.5438557474585347, "learning_rate": 0.00013982868329912468, "loss": 3.2908785343170166, "step": 2428, "token_acc": 0.2678315574928361 }, { "epoch": 1.4239226033421284, "grad_norm": 0.5964805824243781, "learning_rate": 0.00013982820860332094, "loss": 3.2137675285339355, "step": 2429, "token_acc": 0.277348267304219 }, { "epoch": 1.4245089416593375, "grad_norm": 0.6325379556253067, "learning_rate": 0.0001398277332515753, "loss": 3.231764793395996, "step": 2430, "token_acc": 0.2729342754299427 }, { "epoch": 1.4250952799765464, "grad_norm": 0.6067508645464627, "learning_rate": 0.00013982725724389215, "loss": 3.2056338787078857, "step": 2431, "token_acc": 0.27848917339073004 }, { "epoch": 1.4256816182937555, "grad_norm": 0.5839311101274166, "learning_rate": 0.00013982678058027605, "loss": 3.1988816261291504, "step": 2432, "token_acc": 0.2788803993790143 }, { "epoch": 1.4262679566109644, "grad_norm": 0.656760848134075, "learning_rate": 0.00013982630326073143, "loss": 3.269249439239502, "step": 2433, "token_acc": 0.2686118280776568 }, { "epoch": 1.4268542949281735, "grad_norm": 0.6449637479988619, "learning_rate": 0.00013982582528526276, "loss": 3.2284135818481445, "step": 2434, "token_acc": 0.27500480722374265 }, { "epoch": 1.4274406332453826, "grad_norm": 0.618894237909221, "learning_rate": 0.00013982534665387458, "loss": 3.222695827484131, "step": 2435, "token_acc": 0.27693057349369815 }, { "epoch": 1.4280269715625917, "grad_norm": 0.6985254532677547, "learning_rate": 0.00013982486736657137, "loss": 3.2316129207611084, "step": 2436, "token_acc": 0.27562998987150295 }, { "epoch": 1.4286133098798006, "grad_norm": 0.6274405318836794, "learning_rate": 0.0001398243874233576, "loss": 3.2195935249328613, "step": 2437, "token_acc": 0.27584777319894743 }, { "epoch": 1.4291996481970097, "grad_norm": 0.551814661063161, "learning_rate": 0.00013982390682423782, "loss": 3.2372705936431885, "step": 2438, "token_acc": 0.27380889832289523 }, { "epoch": 1.4297859865142186, "grad_norm": 0.4987265102792151, "learning_rate": 0.0001398234255692165, "loss": 3.2298762798309326, "step": 2439, "token_acc": 0.2737696335078534 }, { "epoch": 1.4303723248314277, "grad_norm": 0.5893724824781764, "learning_rate": 0.00013982294365829818, "loss": 3.2411513328552246, "step": 2440, "token_acc": 0.27373193189586453 }, { "epoch": 1.4309586631486368, "grad_norm": 0.8000977343325651, "learning_rate": 0.0001398224610914874, "loss": 3.2341487407684326, "step": 2441, "token_acc": 0.2736898429551218 }, { "epoch": 1.4315450014658457, "grad_norm": 0.7012828912966524, "learning_rate": 0.0001398219778687887, "loss": 3.1969716548919678, "step": 2442, "token_acc": 0.27902733519927014 }, { "epoch": 1.4321313397830548, "grad_norm": 0.6541723189277936, "learning_rate": 0.0001398214939902066, "loss": 3.222557783126831, "step": 2443, "token_acc": 0.27698512154455573 }, { "epoch": 1.4327176781002637, "grad_norm": 0.5831884616370641, "learning_rate": 0.00013982100945574566, "loss": 3.2349190711975098, "step": 2444, "token_acc": 0.2730755761380327 }, { "epoch": 1.4333040164174728, "grad_norm": 0.6127999475862088, "learning_rate": 0.00013982052426541038, "loss": 3.2241790294647217, "step": 2445, "token_acc": 0.275428150835272 }, { "epoch": 1.433890354734682, "grad_norm": 0.6460185068876323, "learning_rate": 0.0001398200384192054, "loss": 3.231858730316162, "step": 2446, "token_acc": 0.2754465230136903 }, { "epoch": 1.434476693051891, "grad_norm": 0.8507695748449875, "learning_rate": 0.00013981955191713524, "loss": 3.2536540031433105, "step": 2447, "token_acc": 0.2720825510059989 }, { "epoch": 1.4350630313691, "grad_norm": 1.050952069242236, "learning_rate": 0.00013981906475920444, "loss": 3.217353105545044, "step": 2448, "token_acc": 0.2759810951418911 }, { "epoch": 1.435649369686309, "grad_norm": 0.7885430134156483, "learning_rate": 0.00013981857694541765, "loss": 3.2596235275268555, "step": 2449, "token_acc": 0.27104769635188297 }, { "epoch": 1.436235708003518, "grad_norm": 0.587804477966935, "learning_rate": 0.00013981808847577938, "loss": 3.1938605308532715, "step": 2450, "token_acc": 0.28141183798281244 }, { "epoch": 1.436822046320727, "grad_norm": 0.7077522822114317, "learning_rate": 0.00013981759935029425, "loss": 3.233181953430176, "step": 2451, "token_acc": 0.27429254493830546 }, { "epoch": 1.4374083846379362, "grad_norm": 0.8260550515530766, "learning_rate": 0.0001398171095689669, "loss": 3.174018383026123, "step": 2452, "token_acc": 0.2815628461133155 }, { "epoch": 1.437994722955145, "grad_norm": 0.7219596264006004, "learning_rate": 0.00013981661913180183, "loss": 3.2035703659057617, "step": 2453, "token_acc": 0.2794444414409057 }, { "epoch": 1.4385810612723542, "grad_norm": 0.9086604556673789, "learning_rate": 0.00013981612803880373, "loss": 3.261613130569458, "step": 2454, "token_acc": 0.2691828193554097 }, { "epoch": 1.439167399589563, "grad_norm": 0.6248868631385788, "learning_rate": 0.00013981563628997717, "loss": 3.187314033508301, "step": 2455, "token_acc": 0.2815027476678622 }, { "epoch": 1.4397537379067722, "grad_norm": 0.5246225522196596, "learning_rate": 0.0001398151438853268, "loss": 3.238859176635742, "step": 2456, "token_acc": 0.27350756857768566 }, { "epoch": 1.4403400762239813, "grad_norm": 0.5735549648006387, "learning_rate": 0.0001398146508248572, "loss": 3.2098145484924316, "step": 2457, "token_acc": 0.27673749307927753 }, { "epoch": 1.4409264145411902, "grad_norm": 0.5053864241414865, "learning_rate": 0.00013981415710857307, "loss": 3.266812562942505, "step": 2458, "token_acc": 0.27105644267410284 }, { "epoch": 1.4415127528583993, "grad_norm": 0.4913960409943003, "learning_rate": 0.000139813662736479, "loss": 3.222018003463745, "step": 2459, "token_acc": 0.27398282000821456 }, { "epoch": 1.4420990911756082, "grad_norm": 0.6327417579947565, "learning_rate": 0.00013981316770857963, "loss": 3.226259708404541, "step": 2460, "token_acc": 0.27503610391436745 }, { "epoch": 1.4426854294928173, "grad_norm": 0.6324405253818951, "learning_rate": 0.00013981267202487963, "loss": 3.216109037399292, "step": 2461, "token_acc": 0.2765765363832238 }, { "epoch": 1.4432717678100264, "grad_norm": 0.48994333422123504, "learning_rate": 0.00013981217568538368, "loss": 3.2009785175323486, "step": 2462, "token_acc": 0.27727835286441477 }, { "epoch": 1.4438581061272355, "grad_norm": 0.5333057242858208, "learning_rate": 0.00013981167869009636, "loss": 3.2006654739379883, "step": 2463, "token_acc": 0.2797878475852389 }, { "epoch": 1.4444444444444444, "grad_norm": 0.7237844336245356, "learning_rate": 0.00013981118103902242, "loss": 3.227724075317383, "step": 2464, "token_acc": 0.2746798822152202 }, { "epoch": 1.4450307827616535, "grad_norm": 0.7140014959929148, "learning_rate": 0.00013981068273216651, "loss": 3.255819320678711, "step": 2465, "token_acc": 0.270896989447626 }, { "epoch": 1.4456171210788624, "grad_norm": 0.5742959378883638, "learning_rate": 0.0001398101837695333, "loss": 3.260256052017212, "step": 2466, "token_acc": 0.2703417139854478 }, { "epoch": 1.4462034593960715, "grad_norm": 0.38680497756062954, "learning_rate": 0.00013980968415112748, "loss": 3.209787607192993, "step": 2467, "token_acc": 0.2775748684355136 }, { "epoch": 1.4467897977132806, "grad_norm": 0.5614170195556832, "learning_rate": 0.00013980918387695375, "loss": 3.22489070892334, "step": 2468, "token_acc": 0.27501184768248743 }, { "epoch": 1.4473761360304895, "grad_norm": 0.6967380425106702, "learning_rate": 0.0001398086829470168, "loss": 3.277099609375, "step": 2469, "token_acc": 0.2697925388408224 }, { "epoch": 1.4479624743476986, "grad_norm": 0.7198215198735929, "learning_rate": 0.00013980818136132136, "loss": 3.225374698638916, "step": 2470, "token_acc": 0.2746195453586335 }, { "epoch": 1.4485488126649075, "grad_norm": 0.5950042314659139, "learning_rate": 0.00013980767911987208, "loss": 3.1966466903686523, "step": 2471, "token_acc": 0.28012465202137743 }, { "epoch": 1.4491351509821166, "grad_norm": 0.4836010137646836, "learning_rate": 0.00013980717622267378, "loss": 3.2653489112854004, "step": 2472, "token_acc": 0.270239411442565 }, { "epoch": 1.4497214892993258, "grad_norm": 0.42797447511473263, "learning_rate": 0.0001398066726697311, "loss": 3.1798219680786133, "step": 2473, "token_acc": 0.2802706571658161 }, { "epoch": 1.4503078276165349, "grad_norm": 0.6730463660294633, "learning_rate": 0.0001398061684610488, "loss": 3.232506275177002, "step": 2474, "token_acc": 0.2745652864884141 }, { "epoch": 1.4508941659337438, "grad_norm": 0.5714172431360138, "learning_rate": 0.00013980566359663162, "loss": 3.2706761360168457, "step": 2475, "token_acc": 0.2676581684554179 }, { "epoch": 1.4514805042509529, "grad_norm": 0.41679095839410296, "learning_rate": 0.00013980515807648426, "loss": 3.285898447036743, "step": 2476, "token_acc": 0.26596671094462826 }, { "epoch": 1.4520668425681618, "grad_norm": 0.390898732953776, "learning_rate": 0.00013980465190061153, "loss": 3.193160057067871, "step": 2477, "token_acc": 0.2793697476788869 }, { "epoch": 1.4526531808853709, "grad_norm": 0.4886332203034683, "learning_rate": 0.00013980414506901815, "loss": 3.2722482681274414, "step": 2478, "token_acc": 0.2697327615707075 }, { "epoch": 1.45323951920258, "grad_norm": 0.45924284494060735, "learning_rate": 0.0001398036375817089, "loss": 3.1913957595825195, "step": 2479, "token_acc": 0.2794312222454347 }, { "epoch": 1.4538258575197889, "grad_norm": 0.5781556347793593, "learning_rate": 0.00013980312943868853, "loss": 3.226372241973877, "step": 2480, "token_acc": 0.27423203416583525 }, { "epoch": 1.454412195836998, "grad_norm": 0.6914780890266232, "learning_rate": 0.00013980262063996183, "loss": 3.2139101028442383, "step": 2481, "token_acc": 0.2773929937813242 }, { "epoch": 1.4549985341542069, "grad_norm": 0.474447746757703, "learning_rate": 0.00013980211118553356, "loss": 3.1854515075683594, "step": 2482, "token_acc": 0.2811818024747146 }, { "epoch": 1.455584872471416, "grad_norm": 0.5711247404906978, "learning_rate": 0.0001398016010754085, "loss": 3.2113542556762695, "step": 2483, "token_acc": 0.2777541405097819 }, { "epoch": 1.456171210788625, "grad_norm": 0.7172604018289751, "learning_rate": 0.00013980109030959148, "loss": 3.21480655670166, "step": 2484, "token_acc": 0.2777329654001274 }, { "epoch": 1.456757549105834, "grad_norm": 0.5268505454016253, "learning_rate": 0.00013980057888808727, "loss": 3.1692748069763184, "step": 2485, "token_acc": 0.2821465620016412 }, { "epoch": 1.457343887423043, "grad_norm": 0.48488537251429903, "learning_rate": 0.00013980006681090068, "loss": 3.170778274536133, "step": 2486, "token_acc": 0.28302928009777084 }, { "epoch": 1.457930225740252, "grad_norm": 0.5139727052086006, "learning_rate": 0.0001397995540780365, "loss": 3.2047383785247803, "step": 2487, "token_acc": 0.2788228302372857 }, { "epoch": 1.458516564057461, "grad_norm": 0.5400128521648689, "learning_rate": 0.0001397990406894996, "loss": 3.2094039916992188, "step": 2488, "token_acc": 0.2788425544658232 }, { "epoch": 1.4591029023746702, "grad_norm": 0.5352485067628382, "learning_rate": 0.00013979852664529474, "loss": 3.2051069736480713, "step": 2489, "token_acc": 0.2768586238727544 }, { "epoch": 1.4596892406918793, "grad_norm": 0.6868011774118865, "learning_rate": 0.00013979801194542678, "loss": 3.1778130531311035, "step": 2490, "token_acc": 0.27994833203466146 }, { "epoch": 1.4602755790090882, "grad_norm": 0.6304069717996058, "learning_rate": 0.00013979749658990054, "loss": 3.184601306915283, "step": 2491, "token_acc": 0.2794661881798811 }, { "epoch": 1.4608619173262973, "grad_norm": 0.6630661438258401, "learning_rate": 0.0001397969805787209, "loss": 3.2271370887756348, "step": 2492, "token_acc": 0.274853347202923 }, { "epoch": 1.4614482556435062, "grad_norm": 0.6019424681475308, "learning_rate": 0.00013979646391189268, "loss": 3.2426910400390625, "step": 2493, "token_acc": 0.2742297941763723 }, { "epoch": 1.4620345939607153, "grad_norm": 0.4666156274953772, "learning_rate": 0.00013979594658942074, "loss": 3.1951870918273926, "step": 2494, "token_acc": 0.278254932354771 }, { "epoch": 1.4626209322779244, "grad_norm": 0.6528939611936597, "learning_rate": 0.0001397954286113099, "loss": 3.22921085357666, "step": 2495, "token_acc": 0.27374029739815864 }, { "epoch": 1.4632072705951333, "grad_norm": 0.5292733738515044, "learning_rate": 0.00013979490997756506, "loss": 3.248718738555908, "step": 2496, "token_acc": 0.2713083369361766 }, { "epoch": 1.4637936089123424, "grad_norm": 0.7062491992807611, "learning_rate": 0.0001397943906881911, "loss": 3.248420238494873, "step": 2497, "token_acc": 0.27059643147424234 }, { "epoch": 1.4643799472295513, "grad_norm": 0.7066871850317076, "learning_rate": 0.00013979387074319292, "loss": 3.2616121768951416, "step": 2498, "token_acc": 0.2704224685292246 }, { "epoch": 1.4649662855467604, "grad_norm": 0.7268616005454495, "learning_rate": 0.00013979335014257532, "loss": 3.2437503337860107, "step": 2499, "token_acc": 0.27294004555618245 }, { "epoch": 1.4655526238639696, "grad_norm": 0.6170402127330895, "learning_rate": 0.00013979282888634326, "loss": 3.2574422359466553, "step": 2500, "token_acc": 0.2713613598421612 }, { "epoch": 1.4661389621811787, "grad_norm": 0.5382448827022154, "learning_rate": 0.00013979230697450164, "loss": 3.207439661026001, "step": 2501, "token_acc": 0.27919188230383973 }, { "epoch": 1.4667253004983876, "grad_norm": 0.5079250397474497, "learning_rate": 0.00013979178440705535, "loss": 3.1744720935821533, "step": 2502, "token_acc": 0.2840293181344636 }, { "epoch": 1.4673116388155967, "grad_norm": 0.5535709138033333, "learning_rate": 0.00013979126118400927, "loss": 3.255910634994507, "step": 2503, "token_acc": 0.2704175567150695 }, { "epoch": 1.4678979771328056, "grad_norm": 0.5204048645281864, "learning_rate": 0.00013979073730536833, "loss": 3.203619956970215, "step": 2504, "token_acc": 0.2759190856775004 }, { "epoch": 1.4684843154500147, "grad_norm": 0.7119167447735097, "learning_rate": 0.00013979021277113748, "loss": 3.2156076431274414, "step": 2505, "token_acc": 0.2758602844715386 }, { "epoch": 1.4690706537672238, "grad_norm": 0.8657641734171144, "learning_rate": 0.0001397896875813216, "loss": 3.2258713245391846, "step": 2506, "token_acc": 0.27484766822870343 }, { "epoch": 1.4696569920844327, "grad_norm": 0.5779194822953634, "learning_rate": 0.00013978916173592565, "loss": 3.221388816833496, "step": 2507, "token_acc": 0.27556908924426654 }, { "epoch": 1.4702433304016418, "grad_norm": 0.49016828665022155, "learning_rate": 0.00013978863523495457, "loss": 3.2357470989227295, "step": 2508, "token_acc": 0.27485042927195685 }, { "epoch": 1.4708296687188507, "grad_norm": 0.7461309520005489, "learning_rate": 0.00013978810807841334, "loss": 3.213592529296875, "step": 2509, "token_acc": 0.2779036452153582 }, { "epoch": 1.4714160070360598, "grad_norm": 0.7562593173002234, "learning_rate": 0.00013978758026630681, "loss": 3.1985175609588623, "step": 2510, "token_acc": 0.2805806469933731 }, { "epoch": 1.472002345353269, "grad_norm": 0.6376132801211046, "learning_rate": 0.00013978705179864005, "loss": 3.215303421020508, "step": 2511, "token_acc": 0.2771642043717421 }, { "epoch": 1.4725886836704778, "grad_norm": 0.5532629199214752, "learning_rate": 0.00013978652267541798, "loss": 3.2638301849365234, "step": 2512, "token_acc": 0.2705271676658127 }, { "epoch": 1.473175021987687, "grad_norm": 0.5069247076286572, "learning_rate": 0.00013978599289664553, "loss": 3.2155191898345947, "step": 2513, "token_acc": 0.27790124674812156 }, { "epoch": 1.4737613603048958, "grad_norm": 0.5990413151469272, "learning_rate": 0.00013978546246232773, "loss": 3.2099194526672363, "step": 2514, "token_acc": 0.2781155368303658 }, { "epoch": 1.474347698622105, "grad_norm": 0.6019848848978954, "learning_rate": 0.00013978493137246957, "loss": 3.219420909881592, "step": 2515, "token_acc": 0.2762705175304535 }, { "epoch": 1.474934036939314, "grad_norm": 0.7194820460351192, "learning_rate": 0.000139784399627076, "loss": 3.2263007164001465, "step": 2516, "token_acc": 0.27486264065836513 }, { "epoch": 1.4755203752565231, "grad_norm": 0.5047145803987207, "learning_rate": 0.00013978386722615204, "loss": 3.2403948307037354, "step": 2517, "token_acc": 0.27288338678772667 }, { "epoch": 1.476106713573732, "grad_norm": 0.6764947504908534, "learning_rate": 0.00013978333416970266, "loss": 3.227323055267334, "step": 2518, "token_acc": 0.2747396129658709 }, { "epoch": 1.4766930518909411, "grad_norm": 0.7695876560517256, "learning_rate": 0.00013978280045773292, "loss": 3.1751251220703125, "step": 2519, "token_acc": 0.281470214288288 }, { "epoch": 1.47727939020815, "grad_norm": 0.8037666210516372, "learning_rate": 0.0001397822660902478, "loss": 3.236229181289673, "step": 2520, "token_acc": 0.27324519832958427 }, { "epoch": 1.4778657285253591, "grad_norm": 0.6063796890613761, "learning_rate": 0.0001397817310672523, "loss": 3.237603187561035, "step": 2521, "token_acc": 0.2726031982438979 }, { "epoch": 1.4784520668425682, "grad_norm": 0.675771075930974, "learning_rate": 0.0001397811953887515, "loss": 3.1562178134918213, "step": 2522, "token_acc": 0.28379850415656843 }, { "epoch": 1.4790384051597771, "grad_norm": 0.5892495820450392, "learning_rate": 0.00013978065905475036, "loss": 3.2719407081604004, "step": 2523, "token_acc": 0.2670912962455904 }, { "epoch": 1.4796247434769862, "grad_norm": 0.6090631597146475, "learning_rate": 0.00013978012206525398, "loss": 3.2301523685455322, "step": 2524, "token_acc": 0.27433591901401205 }, { "epoch": 1.4802110817941951, "grad_norm": 0.5986708011967483, "learning_rate": 0.00013977958442026737, "loss": 3.1978931427001953, "step": 2525, "token_acc": 0.2784000125317209 }, { "epoch": 1.4807974201114043, "grad_norm": 0.5931181978951686, "learning_rate": 0.00013977904611979562, "loss": 3.2427494525909424, "step": 2526, "token_acc": 0.2746901311747913 }, { "epoch": 1.4813837584286134, "grad_norm": 0.5895789615773188, "learning_rate": 0.00013977850716384373, "loss": 3.2395873069763184, "step": 2527, "token_acc": 0.2732250815900701 }, { "epoch": 1.4819700967458225, "grad_norm": 0.573596649649976, "learning_rate": 0.00013977796755241682, "loss": 3.2323994636535645, "step": 2528, "token_acc": 0.2738754014129737 }, { "epoch": 1.4825564350630314, "grad_norm": 0.5647731156625904, "learning_rate": 0.00013977742728551993, "loss": 3.2399463653564453, "step": 2529, "token_acc": 0.2724099852494297 }, { "epoch": 1.4831427733802405, "grad_norm": 0.5331510568883826, "learning_rate": 0.00013977688636315812, "loss": 3.203166961669922, "step": 2530, "token_acc": 0.27739621626468214 }, { "epoch": 1.4837291116974494, "grad_norm": 0.494995542538171, "learning_rate": 0.00013977634478533647, "loss": 3.221698760986328, "step": 2531, "token_acc": 0.27680370584829184 }, { "epoch": 1.4843154500146585, "grad_norm": 0.4839130654360426, "learning_rate": 0.00013977580255206012, "loss": 3.2197742462158203, "step": 2532, "token_acc": 0.27495191801540164 }, { "epoch": 1.4849017883318676, "grad_norm": 0.5315838931611194, "learning_rate": 0.00013977525966333412, "loss": 3.2125704288482666, "step": 2533, "token_acc": 0.27576937832351367 }, { "epoch": 1.4854881266490765, "grad_norm": 0.5340332320376607, "learning_rate": 0.0001397747161191636, "loss": 3.2286314964294434, "step": 2534, "token_acc": 0.275389078711613 }, { "epoch": 1.4860744649662856, "grad_norm": 0.5202661657795244, "learning_rate": 0.0001397741719195536, "loss": 3.2081031799316406, "step": 2535, "token_acc": 0.2769927679393088 }, { "epoch": 1.4866608032834945, "grad_norm": 0.47925131304338453, "learning_rate": 0.0001397736270645093, "loss": 3.240163564682007, "step": 2536, "token_acc": 0.2730591898646211 }, { "epoch": 1.4872471416007036, "grad_norm": 0.45236753685903575, "learning_rate": 0.00013977308155403581, "loss": 3.2147579193115234, "step": 2537, "token_acc": 0.2794105957106291 }, { "epoch": 1.4878334799179127, "grad_norm": 0.4634027635638455, "learning_rate": 0.0001397725353881382, "loss": 3.2071025371551514, "step": 2538, "token_acc": 0.2786465723040396 }, { "epoch": 1.4884198182351216, "grad_norm": 0.4087393809612668, "learning_rate": 0.00013977198856682168, "loss": 3.203801393508911, "step": 2539, "token_acc": 0.27953028281715436 }, { "epoch": 1.4890061565523307, "grad_norm": 0.46582580326654033, "learning_rate": 0.00013977144109009133, "loss": 3.2602481842041016, "step": 2540, "token_acc": 0.2701335676350733 }, { "epoch": 1.4895924948695396, "grad_norm": 0.4712333528120587, "learning_rate": 0.00013977089295795232, "loss": 3.245431900024414, "step": 2541, "token_acc": 0.27227248426495315 }, { "epoch": 1.4901788331867487, "grad_norm": 0.44486816320629674, "learning_rate": 0.00013977034417040975, "loss": 3.196918487548828, "step": 2542, "token_acc": 0.28048146319427664 }, { "epoch": 1.4907651715039578, "grad_norm": 0.4360632167334556, "learning_rate": 0.00013976979472746885, "loss": 3.240436553955078, "step": 2543, "token_acc": 0.27336708743581106 }, { "epoch": 1.491351509821167, "grad_norm": 0.43872272178873406, "learning_rate": 0.00013976924462913475, "loss": 3.211341381072998, "step": 2544, "token_acc": 0.27600242712757783 }, { "epoch": 1.4919378481383758, "grad_norm": 0.4862351547669937, "learning_rate": 0.0001397686938754126, "loss": 3.1960270404815674, "step": 2545, "token_acc": 0.2789416641829463 }, { "epoch": 1.492524186455585, "grad_norm": 0.5103539037336897, "learning_rate": 0.00013976814246630757, "loss": 3.250208854675293, "step": 2546, "token_acc": 0.27065510266342957 }, { "epoch": 1.4931105247727938, "grad_norm": 0.5342999525575971, "learning_rate": 0.00013976759040182487, "loss": 3.2290921211242676, "step": 2547, "token_acc": 0.2732762067827268 }, { "epoch": 1.493696863090003, "grad_norm": 0.5803633655434483, "learning_rate": 0.00013976703768196966, "loss": 3.2352733612060547, "step": 2548, "token_acc": 0.2729484084583218 }, { "epoch": 1.494283201407212, "grad_norm": 0.703839718059476, "learning_rate": 0.00013976648430674718, "loss": 3.2148866653442383, "step": 2549, "token_acc": 0.27710088606452504 }, { "epoch": 1.494869539724421, "grad_norm": 0.8732420956696452, "learning_rate": 0.00013976593027616255, "loss": 3.207047462463379, "step": 2550, "token_acc": 0.273959138177811 }, { "epoch": 1.49545587804163, "grad_norm": 0.9287503036010925, "learning_rate": 0.00013976537559022103, "loss": 3.1876115798950195, "step": 2551, "token_acc": 0.279940076481333 }, { "epoch": 1.496042216358839, "grad_norm": 0.9104196927077189, "learning_rate": 0.00013976482024892782, "loss": 3.2507317066192627, "step": 2552, "token_acc": 0.2715939221533291 }, { "epoch": 1.496628554676048, "grad_norm": 0.9164196052930418, "learning_rate": 0.00013976426425228814, "loss": 3.1873207092285156, "step": 2553, "token_acc": 0.28021149752672125 }, { "epoch": 1.4972148929932572, "grad_norm": 0.9170878486243227, "learning_rate": 0.0001397637076003072, "loss": 3.1836254596710205, "step": 2554, "token_acc": 0.28118229017619883 }, { "epoch": 1.4978012313104663, "grad_norm": 0.6773100640272034, "learning_rate": 0.0001397631502929902, "loss": 3.2218804359436035, "step": 2555, "token_acc": 0.2756324551055184 }, { "epoch": 1.4983875696276752, "grad_norm": 0.6590707684674438, "learning_rate": 0.00013976259233034244, "loss": 3.213777542114258, "step": 2556, "token_acc": 0.2779460464142201 }, { "epoch": 1.4989739079448843, "grad_norm": 0.6303619828252052, "learning_rate": 0.00013976203371236917, "loss": 3.264509439468384, "step": 2557, "token_acc": 0.27112036598160505 }, { "epoch": 1.4995602462620932, "grad_norm": 0.6791792897465988, "learning_rate": 0.00013976147443907556, "loss": 3.222604751586914, "step": 2558, "token_acc": 0.27526599575244787 }, { "epoch": 1.5001465845793023, "grad_norm": 0.5363781061411426, "learning_rate": 0.00013976091451046687, "loss": 3.199427604675293, "step": 2559, "token_acc": 0.2784718699000058 }, { "epoch": 1.5007329228965114, "grad_norm": 0.4913536477762996, "learning_rate": 0.00013976035392654842, "loss": 3.2020766735076904, "step": 2560, "token_acc": 0.278448969902151 }, { "epoch": 1.5013192612137203, "grad_norm": 0.7390118155711631, "learning_rate": 0.0001397597926873255, "loss": 3.1984505653381348, "step": 2561, "token_acc": 0.2784253175764316 }, { "epoch": 1.5019055995309294, "grad_norm": 0.5557912274998127, "learning_rate": 0.00013975923079280326, "loss": 3.2683253288269043, "step": 2562, "token_acc": 0.26881237170844274 }, { "epoch": 1.5024919378481383, "grad_norm": 0.718210430407268, "learning_rate": 0.00013975866824298707, "loss": 3.190762519836426, "step": 2563, "token_acc": 0.27946874983875714 }, { "epoch": 1.5030782761653474, "grad_norm": 0.8341838804040289, "learning_rate": 0.00013975810503788217, "loss": 3.218371629714966, "step": 2564, "token_acc": 0.277456997955713 }, { "epoch": 1.5036646144825565, "grad_norm": 0.6672144367073933, "learning_rate": 0.00013975754117749391, "loss": 3.208378791809082, "step": 2565, "token_acc": 0.27657754578900534 }, { "epoch": 1.5042509527997656, "grad_norm": 0.7602384122378035, "learning_rate": 0.00013975697666182752, "loss": 3.20758056640625, "step": 2566, "token_acc": 0.27617974237344867 }, { "epoch": 1.5048372911169745, "grad_norm": 0.5737244827267608, "learning_rate": 0.00013975641149088837, "loss": 3.2075133323669434, "step": 2567, "token_acc": 0.2775379136120207 }, { "epoch": 1.5054236294341834, "grad_norm": 0.5355928937599357, "learning_rate": 0.0001397558456646817, "loss": 3.2195966243743896, "step": 2568, "token_acc": 0.27568037934774386 }, { "epoch": 1.5060099677513925, "grad_norm": 0.5474300730251878, "learning_rate": 0.00013975527918321288, "loss": 3.2337286472320557, "step": 2569, "token_acc": 0.2724674299249901 }, { "epoch": 1.5065963060686016, "grad_norm": 0.5552195678643156, "learning_rate": 0.0001397547120464872, "loss": 3.1930017471313477, "step": 2570, "token_acc": 0.2784622825996505 }, { "epoch": 1.5071826443858107, "grad_norm": 0.5622014804370437, "learning_rate": 0.00013975414425451, "loss": 3.231153964996338, "step": 2571, "token_acc": 0.27402338180781294 }, { "epoch": 1.5077689827030196, "grad_norm": 0.5328862852165819, "learning_rate": 0.0001397535758072866, "loss": 3.2259230613708496, "step": 2572, "token_acc": 0.27617590723862656 }, { "epoch": 1.5083553210202285, "grad_norm": 0.5921937301181955, "learning_rate": 0.00013975300670482235, "loss": 3.2031431198120117, "step": 2573, "token_acc": 0.2786465179075032 }, { "epoch": 1.5089416593374376, "grad_norm": 0.5282550134232902, "learning_rate": 0.0001397524369471226, "loss": 3.2419960498809814, "step": 2574, "token_acc": 0.27262604849312555 }, { "epoch": 1.5095279976546467, "grad_norm": 0.4408835654341376, "learning_rate": 0.0001397518665341927, "loss": 3.238924026489258, "step": 2575, "token_acc": 0.2741587826383922 }, { "epoch": 1.5101143359718558, "grad_norm": 0.5485542825061888, "learning_rate": 0.000139751295466038, "loss": 3.20003604888916, "step": 2576, "token_acc": 0.2815849854340148 }, { "epoch": 1.5107006742890647, "grad_norm": 0.5138402363938792, "learning_rate": 0.00013975072374266387, "loss": 3.20914888381958, "step": 2577, "token_acc": 0.2778364342690868 }, { "epoch": 1.5112870126062738, "grad_norm": 0.4831710307680923, "learning_rate": 0.0001397501513640757, "loss": 3.266451597213745, "step": 2578, "token_acc": 0.26808576318183786 }, { "epoch": 1.5118733509234827, "grad_norm": 0.4123326723912222, "learning_rate": 0.00013974957833027881, "loss": 3.2247297763824463, "step": 2579, "token_acc": 0.27471056052011494 }, { "epoch": 1.5124596892406919, "grad_norm": 0.3966383392726758, "learning_rate": 0.00013974900464127865, "loss": 3.2108302116394043, "step": 2580, "token_acc": 0.2760295129636624 }, { "epoch": 1.513046027557901, "grad_norm": 0.4637874302659689, "learning_rate": 0.00013974843029708058, "loss": 3.260533094406128, "step": 2581, "token_acc": 0.2710347431013111 }, { "epoch": 1.51363236587511, "grad_norm": 0.5033068607067129, "learning_rate": 0.00013974785529768997, "loss": 3.264974594116211, "step": 2582, "token_acc": 0.26873702483826467 }, { "epoch": 1.514218704192319, "grad_norm": 0.5479386848991177, "learning_rate": 0.00013974727964311226, "loss": 3.247617721557617, "step": 2583, "token_acc": 0.27269674182833337 }, { "epoch": 1.5148050425095279, "grad_norm": 0.5511319581522282, "learning_rate": 0.00013974670333335285, "loss": 3.2106881141662598, "step": 2584, "token_acc": 0.2779150413512032 }, { "epoch": 1.515391380826737, "grad_norm": 0.42797504203201625, "learning_rate": 0.00013974612636841714, "loss": 3.2237043380737305, "step": 2585, "token_acc": 0.2761071667146331 }, { "epoch": 1.515977719143946, "grad_norm": 0.5094174097076807, "learning_rate": 0.00013974554874831053, "loss": 3.242367744445801, "step": 2586, "token_acc": 0.27059423683834916 }, { "epoch": 1.5165640574611552, "grad_norm": 0.475937285256912, "learning_rate": 0.00013974497047303851, "loss": 3.202700614929199, "step": 2587, "token_acc": 0.2780571784896095 }, { "epoch": 1.517150395778364, "grad_norm": 0.49121387181486104, "learning_rate": 0.00013974439154260647, "loss": 3.2149429321289062, "step": 2588, "token_acc": 0.27425128193927745 }, { "epoch": 1.5177367340955732, "grad_norm": 0.5629072001881763, "learning_rate": 0.00013974381195701986, "loss": 3.2113990783691406, "step": 2589, "token_acc": 0.27589473821955274 }, { "epoch": 1.518323072412782, "grad_norm": 0.5148768215410927, "learning_rate": 0.00013974323171628408, "loss": 3.206923007965088, "step": 2590, "token_acc": 0.2772624850729868 }, { "epoch": 1.5189094107299912, "grad_norm": 0.3961598795941667, "learning_rate": 0.00013974265082040467, "loss": 3.2035489082336426, "step": 2591, "token_acc": 0.27713900622708143 }, { "epoch": 1.5194957490472003, "grad_norm": 0.49056788178072985, "learning_rate": 0.000139742069269387, "loss": 3.22664213180542, "step": 2592, "token_acc": 0.27438000737030316 }, { "epoch": 1.5200820873644094, "grad_norm": 0.4650880503826025, "learning_rate": 0.0001397414870632366, "loss": 3.23069167137146, "step": 2593, "token_acc": 0.27507264060221903 }, { "epoch": 1.5206684256816183, "grad_norm": 0.5172017760352943, "learning_rate": 0.00013974090420195887, "loss": 3.2184481620788574, "step": 2594, "token_acc": 0.2763294718598924 }, { "epoch": 1.5212547639988272, "grad_norm": 0.48141327061611217, "learning_rate": 0.00013974032068555934, "loss": 3.244694232940674, "step": 2595, "token_acc": 0.2756314080179723 }, { "epoch": 1.5218411023160363, "grad_norm": 0.6805649871045893, "learning_rate": 0.00013973973651404346, "loss": 3.168689250946045, "step": 2596, "token_acc": 0.28162553431728365 }, { "epoch": 1.5224274406332454, "grad_norm": 0.7452226258591964, "learning_rate": 0.00013973915168741675, "loss": 3.232879877090454, "step": 2597, "token_acc": 0.27488484470178004 }, { "epoch": 1.5230137789504545, "grad_norm": 0.7406056835949393, "learning_rate": 0.00013973856620568467, "loss": 3.210618495941162, "step": 2598, "token_acc": 0.2747979555941398 }, { "epoch": 1.5236001172676634, "grad_norm": 0.802318184778563, "learning_rate": 0.00013973798006885276, "loss": 3.20450496673584, "step": 2599, "token_acc": 0.27701186277653095 }, { "epoch": 1.5241864555848723, "grad_norm": 0.7665844554070165, "learning_rate": 0.00013973739327692645, "loss": 3.254924774169922, "step": 2600, "token_acc": 0.26971113422559795 }, { "epoch": 1.5247727939020814, "grad_norm": 0.5736590224490088, "learning_rate": 0.00013973680582991135, "loss": 3.1640803813934326, "step": 2601, "token_acc": 0.28265301662634557 }, { "epoch": 1.5253591322192905, "grad_norm": 0.6473824667132135, "learning_rate": 0.0001397362177278129, "loss": 3.167708396911621, "step": 2602, "token_acc": 0.2832496297979363 }, { "epoch": 1.5259454705364996, "grad_norm": 0.6092904724266451, "learning_rate": 0.00013973562897063666, "loss": 3.194204568862915, "step": 2603, "token_acc": 0.27825271196966295 }, { "epoch": 1.5265318088537085, "grad_norm": 0.48113946267623403, "learning_rate": 0.00013973503955838816, "loss": 3.2055134773254395, "step": 2604, "token_acc": 0.2767244471010161 }, { "epoch": 1.5271181471709177, "grad_norm": 0.5905113277618578, "learning_rate": 0.00013973444949107294, "loss": 3.213676929473877, "step": 2605, "token_acc": 0.27729837995156503 }, { "epoch": 1.5277044854881265, "grad_norm": 0.5243213524984546, "learning_rate": 0.00013973385876869655, "loss": 3.2026448249816895, "step": 2606, "token_acc": 0.27694738281003556 }, { "epoch": 1.5282908238053357, "grad_norm": 0.5018075340712047, "learning_rate": 0.0001397332673912645, "loss": 3.2417831420898438, "step": 2607, "token_acc": 0.27177275534676787 }, { "epoch": 1.5288771621225448, "grad_norm": 0.5453507701816291, "learning_rate": 0.00013973267535878238, "loss": 3.208800792694092, "step": 2608, "token_acc": 0.2770779104389769 }, { "epoch": 1.5294635004397539, "grad_norm": 0.5521765681122655, "learning_rate": 0.00013973208267125572, "loss": 3.174708604812622, "step": 2609, "token_acc": 0.28094055611760016 }, { "epoch": 1.5300498387569628, "grad_norm": 0.6253174657455257, "learning_rate": 0.00013973148932869015, "loss": 3.2505578994750977, "step": 2610, "token_acc": 0.27095122784061143 }, { "epoch": 1.5306361770741717, "grad_norm": 0.5013950607113463, "learning_rate": 0.00013973089533109116, "loss": 3.2260923385620117, "step": 2611, "token_acc": 0.2761453060970745 }, { "epoch": 1.5312225153913808, "grad_norm": 0.616590712216024, "learning_rate": 0.00013973030067846438, "loss": 3.2257394790649414, "step": 2612, "token_acc": 0.2743145766431603 }, { "epoch": 1.5318088537085899, "grad_norm": 0.7497297528940498, "learning_rate": 0.00013972970537081542, "loss": 3.167748212814331, "step": 2613, "token_acc": 0.2811995696611081 }, { "epoch": 1.532395192025799, "grad_norm": 0.6103722596840169, "learning_rate": 0.0001397291094081498, "loss": 3.2516868114471436, "step": 2614, "token_acc": 0.2732351499046027 }, { "epoch": 1.5329815303430079, "grad_norm": 0.6428805358346728, "learning_rate": 0.00013972851279047318, "loss": 3.1654186248779297, "step": 2615, "token_acc": 0.28194262338071013 }, { "epoch": 1.533567868660217, "grad_norm": 0.6544186066295447, "learning_rate": 0.00013972791551779113, "loss": 3.1971282958984375, "step": 2616, "token_acc": 0.27876981489261005 }, { "epoch": 1.5341542069774259, "grad_norm": 0.4607028201270057, "learning_rate": 0.00013972731759010927, "loss": 3.247673988342285, "step": 2617, "token_acc": 0.27348627903902134 }, { "epoch": 1.534740545294635, "grad_norm": 0.5504314125793278, "learning_rate": 0.00013972671900743325, "loss": 3.2553300857543945, "step": 2618, "token_acc": 0.271510496671787 }, { "epoch": 1.535326883611844, "grad_norm": 0.5726110299754713, "learning_rate": 0.00013972611976976866, "loss": 3.2078166007995605, "step": 2619, "token_acc": 0.27762963040349775 }, { "epoch": 1.5359132219290532, "grad_norm": 0.48802102804155106, "learning_rate": 0.0001397255198771211, "loss": 3.193302631378174, "step": 2620, "token_acc": 0.27857141038599315 }, { "epoch": 1.536499560246262, "grad_norm": 0.5389231663669414, "learning_rate": 0.00013972491932949627, "loss": 3.2187061309814453, "step": 2621, "token_acc": 0.2755332529401849 }, { "epoch": 1.537085898563471, "grad_norm": 0.48219155120755386, "learning_rate": 0.00013972431812689975, "loss": 3.209031581878662, "step": 2622, "token_acc": 0.27597583041702933 }, { "epoch": 1.53767223688068, "grad_norm": 0.5451966788804595, "learning_rate": 0.0001397237162693372, "loss": 3.236480712890625, "step": 2623, "token_acc": 0.274015594687919 }, { "epoch": 1.5382585751978892, "grad_norm": 0.5544783305451163, "learning_rate": 0.00013972311375681434, "loss": 3.2720773220062256, "step": 2624, "token_acc": 0.2698395964345392 }, { "epoch": 1.5388449135150983, "grad_norm": 0.5261476409920377, "learning_rate": 0.00013972251058933676, "loss": 3.211249828338623, "step": 2625, "token_acc": 0.27527843642566313 }, { "epoch": 1.5394312518323072, "grad_norm": 0.5044610104357428, "learning_rate": 0.00013972190676691016, "loss": 3.2361197471618652, "step": 2626, "token_acc": 0.27251663804353427 }, { "epoch": 1.5400175901495161, "grad_norm": 0.48800613665888454, "learning_rate": 0.00013972130228954017, "loss": 3.193969488143921, "step": 2627, "token_acc": 0.27747698770618395 }, { "epoch": 1.5406039284667252, "grad_norm": 0.5805291189691962, "learning_rate": 0.0001397206971572325, "loss": 3.1746785640716553, "step": 2628, "token_acc": 0.28094163662317473 }, { "epoch": 1.5411902667839343, "grad_norm": 0.6026930720886005, "learning_rate": 0.00013972009136999284, "loss": 3.2027015686035156, "step": 2629, "token_acc": 0.2782227337884141 }, { "epoch": 1.5417766051011434, "grad_norm": 0.4738573533417063, "learning_rate": 0.00013971948492782685, "loss": 3.1654419898986816, "step": 2630, "token_acc": 0.2825279422787062 }, { "epoch": 1.5423629434183523, "grad_norm": 0.5022218588875001, "learning_rate": 0.00013971887783074027, "loss": 3.1926217079162598, "step": 2631, "token_acc": 0.27997435518595126 }, { "epoch": 1.5429492817355615, "grad_norm": 0.5382075467289994, "learning_rate": 0.00013971827007873877, "loss": 3.161971092224121, "step": 2632, "token_acc": 0.28338649356472473 }, { "epoch": 1.5435356200527703, "grad_norm": 0.5324507414149843, "learning_rate": 0.00013971766167182806, "loss": 3.1841793060302734, "step": 2633, "token_acc": 0.28069247660515734 }, { "epoch": 1.5441219583699795, "grad_norm": 0.5934542366965995, "learning_rate": 0.00013971705261001387, "loss": 3.2289042472839355, "step": 2634, "token_acc": 0.27506286156849097 }, { "epoch": 1.5447082966871886, "grad_norm": 0.5736611516335997, "learning_rate": 0.00013971644289330194, "loss": 3.2282755374908447, "step": 2635, "token_acc": 0.27538602327768397 }, { "epoch": 1.5452946350043977, "grad_norm": 0.5263259124731735, "learning_rate": 0.00013971583252169796, "loss": 3.1658763885498047, "step": 2636, "token_acc": 0.28384548932494136 }, { "epoch": 1.5458809733216066, "grad_norm": 0.5208041852420772, "learning_rate": 0.00013971522149520767, "loss": 3.219151020050049, "step": 2637, "token_acc": 0.2759978507630051 }, { "epoch": 1.5464673116388155, "grad_norm": 0.5038314288809179, "learning_rate": 0.0001397146098138368, "loss": 3.238635778427124, "step": 2638, "token_acc": 0.27330765952745467 }, { "epoch": 1.5470536499560246, "grad_norm": 0.5125176924383831, "learning_rate": 0.00013971399747759113, "loss": 3.188901424407959, "step": 2639, "token_acc": 0.2791226151112927 }, { "epoch": 1.5476399882732337, "grad_norm": 0.5283162790329297, "learning_rate": 0.0001397133844864764, "loss": 3.195950508117676, "step": 2640, "token_acc": 0.27902536666261757 }, { "epoch": 1.5482263265904428, "grad_norm": 0.6558315475509426, "learning_rate": 0.00013971277084049837, "loss": 3.274303913116455, "step": 2641, "token_acc": 0.2694335294941482 }, { "epoch": 1.5488126649076517, "grad_norm": 0.7085779800656861, "learning_rate": 0.00013971215653966278, "loss": 3.1904537677764893, "step": 2642, "token_acc": 0.280076866126644 }, { "epoch": 1.5493990032248608, "grad_norm": 0.6524284319210493, "learning_rate": 0.0001397115415839754, "loss": 3.215273857116699, "step": 2643, "token_acc": 0.276452617355032 }, { "epoch": 1.5499853415420697, "grad_norm": 0.6589293839963091, "learning_rate": 0.00013971092597344208, "loss": 3.208240032196045, "step": 2644, "token_acc": 0.2785690608879405 }, { "epoch": 1.5505716798592788, "grad_norm": 0.5590672785062665, "learning_rate": 0.00013971030970806852, "loss": 3.189129114151001, "step": 2645, "token_acc": 0.2780788712885639 }, { "epoch": 1.551158018176488, "grad_norm": 0.45446240869728055, "learning_rate": 0.0001397096927878605, "loss": 3.2364468574523926, "step": 2646, "token_acc": 0.2731970589768994 }, { "epoch": 1.551744356493697, "grad_norm": 0.4913708214508635, "learning_rate": 0.0001397090752128239, "loss": 3.19134259223938, "step": 2647, "token_acc": 0.27929070809222734 }, { "epoch": 1.552330694810906, "grad_norm": 0.5200967765371806, "learning_rate": 0.00013970845698296443, "loss": 3.1862666606903076, "step": 2648, "token_acc": 0.2810164661596496 }, { "epoch": 1.5529170331281148, "grad_norm": 0.46779491552111524, "learning_rate": 0.000139707838098288, "loss": 3.2281904220581055, "step": 2649, "token_acc": 0.2748163999805457 }, { "epoch": 1.553503371445324, "grad_norm": 0.49990038498996026, "learning_rate": 0.0001397072185588003, "loss": 3.2237701416015625, "step": 2650, "token_acc": 0.2756789928548511 }, { "epoch": 1.554089709762533, "grad_norm": 0.6189599194610861, "learning_rate": 0.00013970659836450724, "loss": 3.256709098815918, "step": 2651, "token_acc": 0.2712075950403077 }, { "epoch": 1.5546760480797421, "grad_norm": 0.5301405773276009, "learning_rate": 0.00013970597751541462, "loss": 3.193275213241577, "step": 2652, "token_acc": 0.27916180505187477 }, { "epoch": 1.555262386396951, "grad_norm": 0.5801232337212008, "learning_rate": 0.0001397053560115283, "loss": 3.2288732528686523, "step": 2653, "token_acc": 0.27589025498532344 }, { "epoch": 1.55584872471416, "grad_norm": 0.445167932547961, "learning_rate": 0.00013970473385285404, "loss": 3.2229137420654297, "step": 2654, "token_acc": 0.2759732479938658 }, { "epoch": 1.556435063031369, "grad_norm": 0.5181136068088518, "learning_rate": 0.00013970411103939775, "loss": 3.2555737495422363, "step": 2655, "token_acc": 0.27262459638741404 }, { "epoch": 1.5570214013485781, "grad_norm": 0.5451827853320111, "learning_rate": 0.00013970348757116527, "loss": 3.234294891357422, "step": 2656, "token_acc": 0.2733629562846809 }, { "epoch": 1.5576077396657872, "grad_norm": 0.4737621942648672, "learning_rate": 0.00013970286344816245, "loss": 3.2374162673950195, "step": 2657, "token_acc": 0.2737173777204621 }, { "epoch": 1.5581940779829961, "grad_norm": 0.5586768672720697, "learning_rate": 0.00013970223867039517, "loss": 3.209918737411499, "step": 2658, "token_acc": 0.2752484803679974 }, { "epoch": 1.5587804163002053, "grad_norm": 0.5367479916347963, "learning_rate": 0.00013970161323786925, "loss": 3.198747158050537, "step": 2659, "token_acc": 0.27792302879085673 }, { "epoch": 1.5593667546174141, "grad_norm": 0.47471411690306475, "learning_rate": 0.0001397009871505906, "loss": 3.206704616546631, "step": 2660, "token_acc": 0.2767899079009169 }, { "epoch": 1.5599530929346233, "grad_norm": 0.42285735979847067, "learning_rate": 0.00013970036040856511, "loss": 3.2120614051818848, "step": 2661, "token_acc": 0.27517074017191856 }, { "epoch": 1.5605394312518324, "grad_norm": 0.45244278774427144, "learning_rate": 0.00013969973301179868, "loss": 3.189943790435791, "step": 2662, "token_acc": 0.2794773841847151 }, { "epoch": 1.5611257695690415, "grad_norm": 0.5391173159679198, "learning_rate": 0.00013969910496029715, "loss": 3.17856502532959, "step": 2663, "token_acc": 0.2789332906470585 }, { "epoch": 1.5617121078862504, "grad_norm": 0.46566225921400406, "learning_rate": 0.00013969847625406646, "loss": 3.1868972778320312, "step": 2664, "token_acc": 0.2787448227234567 }, { "epoch": 1.5622984462034593, "grad_norm": 0.46469903409964847, "learning_rate": 0.0001396978468931125, "loss": 3.2437336444854736, "step": 2665, "token_acc": 0.2732312344994297 }, { "epoch": 1.5628847845206684, "grad_norm": 0.533421489289303, "learning_rate": 0.0001396972168774412, "loss": 3.1971049308776855, "step": 2666, "token_acc": 0.2775082155811092 }, { "epoch": 1.5634711228378775, "grad_norm": 0.5469994761503765, "learning_rate": 0.00013969658620705845, "loss": 3.2324881553649902, "step": 2667, "token_acc": 0.27489355727927817 }, { "epoch": 1.5640574611550866, "grad_norm": 0.621228929752348, "learning_rate": 0.00013969595488197022, "loss": 3.284426689147949, "step": 2668, "token_acc": 0.2679566467090722 }, { "epoch": 1.5646437994722955, "grad_norm": 0.5602255281147202, "learning_rate": 0.00013969532290218235, "loss": 3.2318644523620605, "step": 2669, "token_acc": 0.27433683044476587 }, { "epoch": 1.5652301377895046, "grad_norm": 0.5654545428151967, "learning_rate": 0.0001396946902677009, "loss": 3.1959280967712402, "step": 2670, "token_acc": 0.27546345860527577 }, { "epoch": 1.5658164761067135, "grad_norm": 0.7558832029167432, "learning_rate": 0.00013969405697853172, "loss": 3.2658772468566895, "step": 2671, "token_acc": 0.2701158276552468 }, { "epoch": 1.5664028144239226, "grad_norm": 0.7655953713312325, "learning_rate": 0.00013969342303468078, "loss": 3.1789746284484863, "step": 2672, "token_acc": 0.28080791386659343 }, { "epoch": 1.5669891527411317, "grad_norm": 0.6751482667912649, "learning_rate": 0.00013969278843615406, "loss": 3.2215323448181152, "step": 2673, "token_acc": 0.27552628356035763 }, { "epoch": 1.5675754910583408, "grad_norm": 0.5774194415395052, "learning_rate": 0.00013969215318295752, "loss": 3.1797993183135986, "step": 2674, "token_acc": 0.2818518250126815 }, { "epoch": 1.5681618293755497, "grad_norm": 0.5120096625019415, "learning_rate": 0.00013969151727509708, "loss": 3.1890034675598145, "step": 2675, "token_acc": 0.27998902003842985 }, { "epoch": 1.5687481676927586, "grad_norm": 0.5689822047980303, "learning_rate": 0.00013969088071257875, "loss": 3.2042322158813477, "step": 2676, "token_acc": 0.2760792999527977 }, { "epoch": 1.5693345060099677, "grad_norm": 0.5593549234821941, "learning_rate": 0.00013969024349540853, "loss": 3.229390859603882, "step": 2677, "token_acc": 0.2735966472268539 }, { "epoch": 1.5699208443271768, "grad_norm": 0.44162606885340083, "learning_rate": 0.00013968960562359234, "loss": 3.188323736190796, "step": 2678, "token_acc": 0.27845438075370155 }, { "epoch": 1.570507182644386, "grad_norm": 0.4570523309689249, "learning_rate": 0.00013968896709713624, "loss": 3.1839938163757324, "step": 2679, "token_acc": 0.28123097329738195 }, { "epoch": 1.5710935209615948, "grad_norm": 0.557876063071313, "learning_rate": 0.0001396883279160462, "loss": 3.171645164489746, "step": 2680, "token_acc": 0.2812261709003574 }, { "epoch": 1.5716798592788037, "grad_norm": 0.5182152893644817, "learning_rate": 0.00013968768808032825, "loss": 3.1851248741149902, "step": 2681, "token_acc": 0.28130034614218946 }, { "epoch": 1.5722661975960128, "grad_norm": 0.5579671628290019, "learning_rate": 0.00013968704758998834, "loss": 3.195298910140991, "step": 2682, "token_acc": 0.27879305586584835 }, { "epoch": 1.572852535913222, "grad_norm": 0.48531072530361874, "learning_rate": 0.00013968640644503253, "loss": 3.2068495750427246, "step": 2683, "token_acc": 0.2770602456143915 }, { "epoch": 1.573438874230431, "grad_norm": 0.4190035322296846, "learning_rate": 0.00013968576464546683, "loss": 3.203878879547119, "step": 2684, "token_acc": 0.27686847764423395 }, { "epoch": 1.57402521254764, "grad_norm": 0.5272757450008215, "learning_rate": 0.00013968512219129727, "loss": 3.218924045562744, "step": 2685, "token_acc": 0.27657591658275066 }, { "epoch": 1.574611550864849, "grad_norm": 0.5439370644627942, "learning_rate": 0.0001396844790825299, "loss": 3.212144136428833, "step": 2686, "token_acc": 0.2770561746358467 }, { "epoch": 1.575197889182058, "grad_norm": 0.4276587152425814, "learning_rate": 0.00013968383531917078, "loss": 3.232213020324707, "step": 2687, "token_acc": 0.27350318302034743 }, { "epoch": 1.575784227499267, "grad_norm": 0.4253823130056882, "learning_rate": 0.00013968319090122588, "loss": 3.1527292728424072, "step": 2688, "token_acc": 0.28541354601028296 }, { "epoch": 1.5763705658164762, "grad_norm": 0.5334675391936817, "learning_rate": 0.00013968254582870132, "loss": 3.2525062561035156, "step": 2689, "token_acc": 0.26966163247535696 }, { "epoch": 1.5769569041336853, "grad_norm": 0.45331283165405445, "learning_rate": 0.00013968190010160315, "loss": 3.2137365341186523, "step": 2690, "token_acc": 0.27467417023725776 }, { "epoch": 1.5775432424508942, "grad_norm": 0.40249759904470883, "learning_rate": 0.0001396812537199374, "loss": 3.222071647644043, "step": 2691, "token_acc": 0.27452846955594457 }, { "epoch": 1.578129580768103, "grad_norm": 0.446160344775638, "learning_rate": 0.00013968060668371018, "loss": 3.1708972454071045, "step": 2692, "token_acc": 0.2817640237499191 }, { "epoch": 1.5787159190853122, "grad_norm": 0.551596544767136, "learning_rate": 0.00013967995899292758, "loss": 3.18241810798645, "step": 2693, "token_acc": 0.2796375059655232 }, { "epoch": 1.5793022574025213, "grad_norm": 0.5779992223814839, "learning_rate": 0.00013967931064759565, "loss": 3.1835827827453613, "step": 2694, "token_acc": 0.2802087429719302 }, { "epoch": 1.5798885957197304, "grad_norm": 0.5832957485550658, "learning_rate": 0.00013967866164772046, "loss": 3.22414493560791, "step": 2695, "token_acc": 0.27510367998320123 }, { "epoch": 1.5804749340369393, "grad_norm": 0.5158200234322589, "learning_rate": 0.00013967801199330816, "loss": 3.184892177581787, "step": 2696, "token_acc": 0.2784660582735479 }, { "epoch": 1.5810612723541484, "grad_norm": 0.482295869367091, "learning_rate": 0.00013967736168436483, "loss": 3.223694324493408, "step": 2697, "token_acc": 0.27423646322420064 }, { "epoch": 1.5816476106713573, "grad_norm": 0.5342339275734188, "learning_rate": 0.0001396767107208966, "loss": 3.2016854286193848, "step": 2698, "token_acc": 0.2786818905251036 }, { "epoch": 1.5822339489885664, "grad_norm": 0.4936678068087708, "learning_rate": 0.0001396760591029095, "loss": 3.1807003021240234, "step": 2699, "token_acc": 0.2796874307191128 }, { "epoch": 1.5828202873057755, "grad_norm": 0.5479229909323583, "learning_rate": 0.00013967540683040977, "loss": 3.2324390411376953, "step": 2700, "token_acc": 0.2740212313781052 }, { "epoch": 1.5834066256229846, "grad_norm": 0.5662327287115109, "learning_rate": 0.00013967475390340344, "loss": 3.219440460205078, "step": 2701, "token_acc": 0.2752865225979476 }, { "epoch": 1.5839929639401935, "grad_norm": 0.5067514797158786, "learning_rate": 0.0001396741003218967, "loss": 3.204756259918213, "step": 2702, "token_acc": 0.2786231329296044 }, { "epoch": 1.5845793022574024, "grad_norm": 0.4636219209374442, "learning_rate": 0.00013967344608589572, "loss": 3.2148818969726562, "step": 2703, "token_acc": 0.27683304279768045 }, { "epoch": 1.5851656405746115, "grad_norm": 0.4306301730326108, "learning_rate": 0.00013967279119540655, "loss": 3.1739554405212402, "step": 2704, "token_acc": 0.2810820494446435 }, { "epoch": 1.5857519788918206, "grad_norm": 0.5202596264646511, "learning_rate": 0.0001396721356504354, "loss": 3.199911594390869, "step": 2705, "token_acc": 0.2771548372181004 }, { "epoch": 1.5863383172090297, "grad_norm": 0.46489748586407503, "learning_rate": 0.00013967147945098844, "loss": 3.1799440383911133, "step": 2706, "token_acc": 0.27870855101894165 }, { "epoch": 1.5869246555262386, "grad_norm": 0.43485151992400345, "learning_rate": 0.0001396708225970718, "loss": 3.2201790809631348, "step": 2707, "token_acc": 0.27671206055266157 }, { "epoch": 1.5875109938434475, "grad_norm": 0.4861233043403749, "learning_rate": 0.00013967016508869166, "loss": 3.150667905807495, "step": 2708, "token_acc": 0.2831345717977504 }, { "epoch": 1.5880973321606566, "grad_norm": 0.4539914368961883, "learning_rate": 0.00013966950692585422, "loss": 3.1869277954101562, "step": 2709, "token_acc": 0.2790298133023703 }, { "epoch": 1.5886836704778657, "grad_norm": 0.48651449268368896, "learning_rate": 0.00013966884810856563, "loss": 3.193302869796753, "step": 2710, "token_acc": 0.280412287901826 }, { "epoch": 1.5892700087950749, "grad_norm": 0.45521869240156493, "learning_rate": 0.00013966818863683208, "loss": 3.2114064693450928, "step": 2711, "token_acc": 0.276478556914192 }, { "epoch": 1.5898563471122837, "grad_norm": 0.45024023826380916, "learning_rate": 0.00013966752851065977, "loss": 3.203510046005249, "step": 2712, "token_acc": 0.2784533698723987 }, { "epoch": 1.5904426854294929, "grad_norm": 0.49262720463122334, "learning_rate": 0.00013966686773005495, "loss": 3.147939920425415, "step": 2713, "token_acc": 0.28395912450487976 }, { "epoch": 1.5910290237467017, "grad_norm": 0.495082891432347, "learning_rate": 0.00013966620629502375, "loss": 3.203338861465454, "step": 2714, "token_acc": 0.2771842173364247 }, { "epoch": 1.5916153620639109, "grad_norm": 0.47087702278270255, "learning_rate": 0.00013966554420557242, "loss": 3.256025552749634, "step": 2715, "token_acc": 0.27033430051127116 }, { "epoch": 1.59220170038112, "grad_norm": 0.5172837062159663, "learning_rate": 0.00013966488146170718, "loss": 3.243058204650879, "step": 2716, "token_acc": 0.27293518034500786 }, { "epoch": 1.592788038698329, "grad_norm": 0.5399900968566896, "learning_rate": 0.00013966421806343426, "loss": 3.2418084144592285, "step": 2717, "token_acc": 0.27424793621598453 }, { "epoch": 1.593374377015538, "grad_norm": 0.5258309131959893, "learning_rate": 0.0001396635540107599, "loss": 3.1864821910858154, "step": 2718, "token_acc": 0.28033251371904055 }, { "epoch": 1.5939607153327469, "grad_norm": 0.5997194349395937, "learning_rate": 0.0001396628893036903, "loss": 3.1837596893310547, "step": 2719, "token_acc": 0.27872559911125216 }, { "epoch": 1.594547053649956, "grad_norm": 0.675793619010458, "learning_rate": 0.00013966222394223173, "loss": 3.2070446014404297, "step": 2720, "token_acc": 0.2770892645655716 }, { "epoch": 1.595133391967165, "grad_norm": 0.522843521991948, "learning_rate": 0.00013966155792639046, "loss": 3.1932358741760254, "step": 2721, "token_acc": 0.27688507513485083 }, { "epoch": 1.5957197302843742, "grad_norm": 0.4270349720290733, "learning_rate": 0.0001396608912561727, "loss": 3.196841239929199, "step": 2722, "token_acc": 0.27971137628872517 }, { "epoch": 1.596306068601583, "grad_norm": 0.5833833956154665, "learning_rate": 0.00013966022393158473, "loss": 3.2072596549987793, "step": 2723, "token_acc": 0.2758335107404888 }, { "epoch": 1.5968924069187922, "grad_norm": 0.682242413805912, "learning_rate": 0.00013965955595263285, "loss": 3.222935438156128, "step": 2724, "token_acc": 0.2738472950076335 }, { "epoch": 1.597478745236001, "grad_norm": 0.4612932820499706, "learning_rate": 0.0001396588873193233, "loss": 3.172611951828003, "step": 2725, "token_acc": 0.28070929806188155 }, { "epoch": 1.5980650835532102, "grad_norm": 0.520011141771683, "learning_rate": 0.00013965821803166233, "loss": 3.2100110054016113, "step": 2726, "token_acc": 0.27539065550979175 }, { "epoch": 1.5986514218704193, "grad_norm": 0.5972673251339681, "learning_rate": 0.0001396575480896563, "loss": 3.2253332138061523, "step": 2727, "token_acc": 0.2758635004981734 }, { "epoch": 1.5992377601876284, "grad_norm": 0.6352499263432669, "learning_rate": 0.00013965687749331149, "loss": 3.191011905670166, "step": 2728, "token_acc": 0.2788634131472889 }, { "epoch": 1.5998240985048373, "grad_norm": 0.7112319344339046, "learning_rate": 0.00013965620624263416, "loss": 3.3048880100250244, "step": 2729, "token_acc": 0.2640569734487061 }, { "epoch": 1.6004104368220462, "grad_norm": 0.4849191580174677, "learning_rate": 0.00013965553433763065, "loss": 3.164024591445923, "step": 2730, "token_acc": 0.2824449357242283 }, { "epoch": 1.6009967751392553, "grad_norm": 0.7342063589068669, "learning_rate": 0.00013965486177830723, "loss": 3.197573661804199, "step": 2731, "token_acc": 0.2776699758720548 }, { "epoch": 1.6015831134564644, "grad_norm": 0.7611103716885684, "learning_rate": 0.00013965418856467027, "loss": 3.235909938812256, "step": 2732, "token_acc": 0.274509753348482 }, { "epoch": 1.6021694517736735, "grad_norm": 0.5904509818956125, "learning_rate": 0.00013965351469672605, "loss": 3.190512180328369, "step": 2733, "token_acc": 0.2791747194076582 }, { "epoch": 1.6027557900908824, "grad_norm": 0.5907846175429431, "learning_rate": 0.00013965284017448094, "loss": 3.2232768535614014, "step": 2734, "token_acc": 0.27475108556798056 }, { "epoch": 1.6033421284080913, "grad_norm": 0.4636028391864659, "learning_rate": 0.00013965216499794124, "loss": 3.239224910736084, "step": 2735, "token_acc": 0.2703204493239914 }, { "epoch": 1.6039284667253004, "grad_norm": 0.5382538201530409, "learning_rate": 0.00013965148916711328, "loss": 3.229917049407959, "step": 2736, "token_acc": 0.2745889590592335 }, { "epoch": 1.6045148050425095, "grad_norm": 0.5707164978923229, "learning_rate": 0.00013965081268200346, "loss": 3.2606852054595947, "step": 2737, "token_acc": 0.2694431726039591 }, { "epoch": 1.6051011433597187, "grad_norm": 0.5338354633061114, "learning_rate": 0.00013965013554261812, "loss": 3.186835765838623, "step": 2738, "token_acc": 0.2792937442137283 }, { "epoch": 1.6056874816769275, "grad_norm": 0.5919801764184374, "learning_rate": 0.00013964945774896362, "loss": 3.17567777633667, "step": 2739, "token_acc": 0.2811807808186103 }, { "epoch": 1.6062738199941367, "grad_norm": 0.6276235174683067, "learning_rate": 0.00013964877930104628, "loss": 3.1805691719055176, "step": 2740, "token_acc": 0.27817791094754146 }, { "epoch": 1.6068601583113455, "grad_norm": 0.5645733494323174, "learning_rate": 0.00013964810019887256, "loss": 3.190262794494629, "step": 2741, "token_acc": 0.28169470180804174 }, { "epoch": 1.6074464966285547, "grad_norm": 0.5558738467254616, "learning_rate": 0.00013964742044244877, "loss": 3.171491861343384, "step": 2742, "token_acc": 0.28051672066620953 }, { "epoch": 1.6080328349457638, "grad_norm": 0.5455773732472631, "learning_rate": 0.0001396467400317813, "loss": 3.182415723800659, "step": 2743, "token_acc": 0.2800098489740233 }, { "epoch": 1.6086191732629729, "grad_norm": 0.4797316762069997, "learning_rate": 0.00013964605896687657, "loss": 3.1327149868011475, "step": 2744, "token_acc": 0.28742276102744013 }, { "epoch": 1.6092055115801818, "grad_norm": 0.6799038648856757, "learning_rate": 0.00013964537724774098, "loss": 3.1731319427490234, "step": 2745, "token_acc": 0.28145694499912555 }, { "epoch": 1.6097918498973907, "grad_norm": 0.532791765186325, "learning_rate": 0.0001396446948743809, "loss": 3.2059335708618164, "step": 2746, "token_acc": 0.2765414092126548 }, { "epoch": 1.6103781882145998, "grad_norm": 0.4482968429900987, "learning_rate": 0.00013964401184680275, "loss": 3.2309787273406982, "step": 2747, "token_acc": 0.2733296241970563 }, { "epoch": 1.6109645265318089, "grad_norm": 0.41854285620645476, "learning_rate": 0.00013964332816501296, "loss": 3.2101831436157227, "step": 2748, "token_acc": 0.27553718695107327 }, { "epoch": 1.611550864849018, "grad_norm": 0.4597137933538926, "learning_rate": 0.00013964264382901795, "loss": 3.2345402240753174, "step": 2749, "token_acc": 0.27235935487762974 }, { "epoch": 1.6121372031662269, "grad_norm": 0.40995225982007355, "learning_rate": 0.00013964195883882418, "loss": 3.2008018493652344, "step": 2750, "token_acc": 0.27811980313867724 }, { "epoch": 1.612723541483436, "grad_norm": 0.46317095467920927, "learning_rate": 0.00013964127319443802, "loss": 3.16324520111084, "step": 2751, "token_acc": 0.28194677153394954 }, { "epoch": 1.6133098798006449, "grad_norm": 0.4601293041107578, "learning_rate": 0.00013964058689586593, "loss": 3.1708736419677734, "step": 2752, "token_acc": 0.28267571144412657 }, { "epoch": 1.613896218117854, "grad_norm": 0.3530017130378245, "learning_rate": 0.00013963989994311438, "loss": 3.181173801422119, "step": 2753, "token_acc": 0.2804247194987021 }, { "epoch": 1.614482556435063, "grad_norm": 0.437318710448947, "learning_rate": 0.00013963921233618983, "loss": 3.175046920776367, "step": 2754, "token_acc": 0.2804499210781128 }, { "epoch": 1.6150688947522722, "grad_norm": 0.4850054206775922, "learning_rate": 0.00013963852407509867, "loss": 3.214479923248291, "step": 2755, "token_acc": 0.2767477937307782 }, { "epoch": 1.6156552330694811, "grad_norm": 0.42455507234917195, "learning_rate": 0.00013963783515984747, "loss": 3.2068638801574707, "step": 2756, "token_acc": 0.27822030329627956 }, { "epoch": 1.61624157138669, "grad_norm": 0.37455412404072197, "learning_rate": 0.0001396371455904426, "loss": 3.2219433784484863, "step": 2757, "token_acc": 0.2760048091627206 }, { "epoch": 1.6168279097038991, "grad_norm": 0.3535944796059334, "learning_rate": 0.00013963645536689063, "loss": 3.196207284927368, "step": 2758, "token_acc": 0.27640335572573316 }, { "epoch": 1.6174142480211082, "grad_norm": 0.5448609349252622, "learning_rate": 0.00013963576448919798, "loss": 3.2348995208740234, "step": 2759, "token_acc": 0.270530565313946 }, { "epoch": 1.6180005863383173, "grad_norm": 0.6827737165246361, "learning_rate": 0.00013963507295737114, "loss": 3.218559741973877, "step": 2760, "token_acc": 0.2762573284579086 }, { "epoch": 1.6185869246555262, "grad_norm": 0.6984321318031556, "learning_rate": 0.00013963438077141665, "loss": 3.219879150390625, "step": 2761, "token_acc": 0.27504082316243433 }, { "epoch": 1.6191732629727351, "grad_norm": 0.6278640522164307, "learning_rate": 0.00013963368793134097, "loss": 3.1340649127960205, "step": 2762, "token_acc": 0.2865340444582863 }, { "epoch": 1.6197596012899442, "grad_norm": 0.8259758537444716, "learning_rate": 0.00013963299443715065, "loss": 3.167959213256836, "step": 2763, "token_acc": 0.2819945812415332 }, { "epoch": 1.6203459396071533, "grad_norm": 0.8187990397862871, "learning_rate": 0.00013963230028885215, "loss": 3.2088723182678223, "step": 2764, "token_acc": 0.2753867070261396 }, { "epoch": 1.6209322779243625, "grad_norm": 0.5543251553641797, "learning_rate": 0.00013963160548645202, "loss": 3.1469736099243164, "step": 2765, "token_acc": 0.28422297736525326 }, { "epoch": 1.6215186162415713, "grad_norm": 0.573506057465699, "learning_rate": 0.0001396309100299568, "loss": 3.178621768951416, "step": 2766, "token_acc": 0.2806689445290533 }, { "epoch": 1.6221049545587805, "grad_norm": 0.6534757843808497, "learning_rate": 0.00013963021391937298, "loss": 3.1959521770477295, "step": 2767, "token_acc": 0.2804754450664615 }, { "epoch": 1.6226912928759893, "grad_norm": 0.4852435553240719, "learning_rate": 0.00013962951715470716, "loss": 3.1643478870391846, "step": 2768, "token_acc": 0.28307289744366415 }, { "epoch": 1.6232776311931985, "grad_norm": 0.505429628143384, "learning_rate": 0.00013962881973596586, "loss": 3.221673011779785, "step": 2769, "token_acc": 0.27435543831729553 }, { "epoch": 1.6238639695104076, "grad_norm": 0.490235153647356, "learning_rate": 0.0001396281216631556, "loss": 3.1926498413085938, "step": 2770, "token_acc": 0.2793360322581512 }, { "epoch": 1.6244503078276167, "grad_norm": 0.5032803750847454, "learning_rate": 0.00013962742293628297, "loss": 3.1885128021240234, "step": 2771, "token_acc": 0.279042662763593 }, { "epoch": 1.6250366461448256, "grad_norm": 0.494799300505114, "learning_rate": 0.00013962672355535453, "loss": 3.1932482719421387, "step": 2772, "token_acc": 0.2792116365208841 }, { "epoch": 1.6256229844620345, "grad_norm": 0.5800864261851814, "learning_rate": 0.00013962602352037684, "loss": 3.2201380729675293, "step": 2773, "token_acc": 0.2740198767751184 }, { "epoch": 1.6262093227792436, "grad_norm": 0.6734912719486764, "learning_rate": 0.00013962532283135647, "loss": 3.264524459838867, "step": 2774, "token_acc": 0.26961632819042064 }, { "epoch": 1.6267956610964527, "grad_norm": 0.5918088352228305, "learning_rate": 0.00013962462148830006, "loss": 3.212893009185791, "step": 2775, "token_acc": 0.27533666374236276 }, { "epoch": 1.6273819994136618, "grad_norm": 0.4830231820932037, "learning_rate": 0.00013962391949121409, "loss": 3.2309601306915283, "step": 2776, "token_acc": 0.2750350481570782 }, { "epoch": 1.6279683377308707, "grad_norm": 0.5080155525980855, "learning_rate": 0.00013962321684010524, "loss": 3.171574831008911, "step": 2777, "token_acc": 0.2811130440169109 }, { "epoch": 1.6285546760480798, "grad_norm": 0.5375617014935056, "learning_rate": 0.0001396225135349801, "loss": 3.1722097396850586, "step": 2778, "token_acc": 0.2818057170807348 }, { "epoch": 1.6291410143652887, "grad_norm": 0.4939016112059976, "learning_rate": 0.00013962180957584526, "loss": 3.18725323677063, "step": 2779, "token_acc": 0.2779995923721329 }, { "epoch": 1.6297273526824978, "grad_norm": 0.4674234132395943, "learning_rate": 0.00013962110496270735, "loss": 3.2355170249938965, "step": 2780, "token_acc": 0.27360591200698287 }, { "epoch": 1.630313690999707, "grad_norm": 0.4171044963550156, "learning_rate": 0.00013962039969557294, "loss": 3.182727813720703, "step": 2781, "token_acc": 0.2794037531427519 }, { "epoch": 1.630900029316916, "grad_norm": 0.467334039402825, "learning_rate": 0.0001396196937744487, "loss": 3.1953125, "step": 2782, "token_acc": 0.2793755454783246 }, { "epoch": 1.631486367634125, "grad_norm": 0.49378748855939913, "learning_rate": 0.00013961898719934125, "loss": 3.1549072265625, "step": 2783, "token_acc": 0.284544796260944 }, { "epoch": 1.6320727059513338, "grad_norm": 0.5011754345291495, "learning_rate": 0.00013961827997025723, "loss": 3.1880993843078613, "step": 2784, "token_acc": 0.2799037819197672 }, { "epoch": 1.632659044268543, "grad_norm": 0.3785572311228852, "learning_rate": 0.0001396175720872033, "loss": 3.19978928565979, "step": 2785, "token_acc": 0.2790436165967965 }, { "epoch": 1.633245382585752, "grad_norm": 0.444949052672144, "learning_rate": 0.00013961686355018604, "loss": 3.1619184017181396, "step": 2786, "token_acc": 0.28072792893542936 }, { "epoch": 1.6338317209029611, "grad_norm": 0.5981178007039663, "learning_rate": 0.0001396161543592122, "loss": 3.180338144302368, "step": 2787, "token_acc": 0.2810062286142695 }, { "epoch": 1.63441805922017, "grad_norm": 0.565508354408448, "learning_rate": 0.0001396154445142884, "loss": 3.2334165573120117, "step": 2788, "token_acc": 0.2735622078086173 }, { "epoch": 1.635004397537379, "grad_norm": 0.35395689179785456, "learning_rate": 0.0001396147340154213, "loss": 3.231954574584961, "step": 2789, "token_acc": 0.2735847583352854 }, { "epoch": 1.635590735854588, "grad_norm": 0.3909312353810073, "learning_rate": 0.00013961402286261757, "loss": 3.2122340202331543, "step": 2790, "token_acc": 0.2757272923176382 }, { "epoch": 1.6361770741717971, "grad_norm": 0.44224363044343873, "learning_rate": 0.0001396133110558839, "loss": 3.2273662090301514, "step": 2791, "token_acc": 0.27367477305838844 }, { "epoch": 1.6367634124890063, "grad_norm": 0.44102929664807083, "learning_rate": 0.000139612598595227, "loss": 3.1619436740875244, "step": 2792, "token_acc": 0.28070656502743374 }, { "epoch": 1.6373497508062151, "grad_norm": 0.44576533621555076, "learning_rate": 0.0001396118854806535, "loss": 3.276240825653076, "step": 2793, "token_acc": 0.26777507753370716 }, { "epoch": 1.6379360891234243, "grad_norm": 0.554241648591905, "learning_rate": 0.0001396111717121702, "loss": 3.2208337783813477, "step": 2794, "token_acc": 0.2754424352127777 }, { "epoch": 1.6385224274406331, "grad_norm": 0.5942441086993988, "learning_rate": 0.0001396104572897837, "loss": 3.173607110977173, "step": 2795, "token_acc": 0.28088986200216265 }, { "epoch": 1.6391087657578423, "grad_norm": 0.5742152802886762, "learning_rate": 0.00013960974221350077, "loss": 3.2082912921905518, "step": 2796, "token_acc": 0.2778953488056202 }, { "epoch": 1.6396951040750514, "grad_norm": 0.41530914580527023, "learning_rate": 0.0001396090264833281, "loss": 3.246802806854248, "step": 2797, "token_acc": 0.2713307925643618 }, { "epoch": 1.6402814423922605, "grad_norm": 0.4250795269189028, "learning_rate": 0.00013960831009927243, "loss": 3.2449562549591064, "step": 2798, "token_acc": 0.27141191951282617 }, { "epoch": 1.6408677807094694, "grad_norm": 0.4908870937666687, "learning_rate": 0.0001396075930613405, "loss": 3.2376413345336914, "step": 2799, "token_acc": 0.2719256857120746 }, { "epoch": 1.6414541190266783, "grad_norm": 0.4515410434006724, "learning_rate": 0.000139606875369539, "loss": 3.2563443183898926, "step": 2800, "token_acc": 0.2697288735675021 }, { "epoch": 1.6420404573438874, "grad_norm": 0.4513458647871023, "learning_rate": 0.00013960615702387472, "loss": 3.190453052520752, "step": 2801, "token_acc": 0.27799955306354623 }, { "epoch": 1.6426267956610965, "grad_norm": 0.48235937748981295, "learning_rate": 0.0001396054380243544, "loss": 3.134134292602539, "step": 2802, "token_acc": 0.28566873366792483 }, { "epoch": 1.6432131339783056, "grad_norm": 0.3907157073921604, "learning_rate": 0.00013960471837098478, "loss": 3.1899261474609375, "step": 2803, "token_acc": 0.27903039085203063 }, { "epoch": 1.6437994722955145, "grad_norm": 0.49388729068348364, "learning_rate": 0.0001396039980637726, "loss": 3.210207462310791, "step": 2804, "token_acc": 0.27620284617122204 }, { "epoch": 1.6443858106127234, "grad_norm": 0.5658747770635152, "learning_rate": 0.0001396032771027247, "loss": 3.200040340423584, "step": 2805, "token_acc": 0.27748167803571294 }, { "epoch": 1.6449721489299325, "grad_norm": 0.6253117280581331, "learning_rate": 0.00013960255548784776, "loss": 3.2142438888549805, "step": 2806, "token_acc": 0.27707643763281603 }, { "epoch": 1.6455584872471416, "grad_norm": 0.5587258123217902, "learning_rate": 0.00013960183321914862, "loss": 3.1908645629882812, "step": 2807, "token_acc": 0.2785951883654367 }, { "epoch": 1.6461448255643507, "grad_norm": 0.5929474032280909, "learning_rate": 0.00013960111029663402, "loss": 3.201730966567993, "step": 2808, "token_acc": 0.2761325750147088 }, { "epoch": 1.6467311638815598, "grad_norm": 0.5180526507361575, "learning_rate": 0.0001396003867203108, "loss": 3.177999496459961, "step": 2809, "token_acc": 0.2808789462202508 }, { "epoch": 1.6473175021987687, "grad_norm": 0.5157969139759756, "learning_rate": 0.00013959966249018575, "loss": 3.1972265243530273, "step": 2810, "token_acc": 0.2783909713184379 }, { "epoch": 1.6479038405159776, "grad_norm": 0.6545660161880537, "learning_rate": 0.00013959893760626563, "loss": 3.125580310821533, "step": 2811, "token_acc": 0.28896460141379726 }, { "epoch": 1.6484901788331867, "grad_norm": 0.7082665301387975, "learning_rate": 0.0001395982120685573, "loss": 3.1641459465026855, "step": 2812, "token_acc": 0.28295544526272753 }, { "epoch": 1.6490765171503958, "grad_norm": 0.5126528682643482, "learning_rate": 0.00013959748587706754, "loss": 3.224123001098633, "step": 2813, "token_acc": 0.2772183822406136 }, { "epoch": 1.649662855467605, "grad_norm": 0.41304218403096193, "learning_rate": 0.0001395967590318032, "loss": 3.1737208366394043, "step": 2814, "token_acc": 0.27829346072240785 }, { "epoch": 1.6502491937848138, "grad_norm": 0.4673682467824826, "learning_rate": 0.00013959603153277105, "loss": 3.2188334465026855, "step": 2815, "token_acc": 0.2758300101588595 }, { "epoch": 1.6508355321020227, "grad_norm": 0.42444108445130196, "learning_rate": 0.000139595303379978, "loss": 3.1897037029266357, "step": 2816, "token_acc": 0.2789581259911065 }, { "epoch": 1.6514218704192318, "grad_norm": 0.45261859045936914, "learning_rate": 0.00013959457457343085, "loss": 3.1412386894226074, "step": 2817, "token_acc": 0.2850506843412062 }, { "epoch": 1.652008208736441, "grad_norm": 0.41976741896377545, "learning_rate": 0.00013959384511313643, "loss": 3.198582649230957, "step": 2818, "token_acc": 0.27758918716287984 }, { "epoch": 1.65259454705365, "grad_norm": 0.4586116146240429, "learning_rate": 0.00013959311499910163, "loss": 3.136387348175049, "step": 2819, "token_acc": 0.28622609214131145 }, { "epoch": 1.653180885370859, "grad_norm": 0.532615142147196, "learning_rate": 0.00013959238423133332, "loss": 3.2432801723480225, "step": 2820, "token_acc": 0.2705881419076246 }, { "epoch": 1.653767223688068, "grad_norm": 0.41890660933425833, "learning_rate": 0.0001395916528098383, "loss": 3.201712131500244, "step": 2821, "token_acc": 0.2751431868984326 }, { "epoch": 1.654353562005277, "grad_norm": 0.42273798633213167, "learning_rate": 0.00013959092073462348, "loss": 3.155172824859619, "step": 2822, "token_acc": 0.2840702908862005 }, { "epoch": 1.654939900322486, "grad_norm": 0.4290900407598739, "learning_rate": 0.00013959018800569577, "loss": 3.1677842140197754, "step": 2823, "token_acc": 0.28307488934051894 }, { "epoch": 1.6555262386396952, "grad_norm": 0.45353305655906834, "learning_rate": 0.00013958945462306198, "loss": 3.190258026123047, "step": 2824, "token_acc": 0.2787606199461607 }, { "epoch": 1.6561125769569043, "grad_norm": 0.4220465906693771, "learning_rate": 0.00013958872058672905, "loss": 3.206526756286621, "step": 2825, "token_acc": 0.27541651064547945 }, { "epoch": 1.6566989152741132, "grad_norm": 0.5002023750301355, "learning_rate": 0.00013958798589670387, "loss": 3.1819491386413574, "step": 2826, "token_acc": 0.2785203823556115 }, { "epoch": 1.657285253591322, "grad_norm": 0.6574782883823983, "learning_rate": 0.00013958725055299333, "loss": 3.194183349609375, "step": 2827, "token_acc": 0.27834771781582895 }, { "epoch": 1.6578715919085312, "grad_norm": 0.6000243826460515, "learning_rate": 0.00013958651455560434, "loss": 3.181319236755371, "step": 2828, "token_acc": 0.2798694733684817 }, { "epoch": 1.6584579302257403, "grad_norm": 0.5457597968862198, "learning_rate": 0.0001395857779045438, "loss": 3.1828832626342773, "step": 2829, "token_acc": 0.280551688963033 }, { "epoch": 1.6590442685429494, "grad_norm": 0.4815398414469595, "learning_rate": 0.00013958504059981866, "loss": 3.2162904739379883, "step": 2830, "token_acc": 0.27401734731336364 }, { "epoch": 1.6596306068601583, "grad_norm": 0.45103237904296917, "learning_rate": 0.0001395843026414358, "loss": 3.1280031204223633, "step": 2831, "token_acc": 0.28766479467877487 }, { "epoch": 1.6602169451773672, "grad_norm": 0.5525690725721473, "learning_rate": 0.00013958356402940224, "loss": 3.175048828125, "step": 2832, "token_acc": 0.2814473398618541 }, { "epoch": 1.6608032834945763, "grad_norm": 0.4197294575570815, "learning_rate": 0.0001395828247637248, "loss": 3.1838550567626953, "step": 2833, "token_acc": 0.2806887666308205 }, { "epoch": 1.6613896218117854, "grad_norm": 0.5041845872963586, "learning_rate": 0.00013958208484441054, "loss": 3.1758861541748047, "step": 2834, "token_acc": 0.2798607440351421 }, { "epoch": 1.6619759601289945, "grad_norm": 0.5176367818538399, "learning_rate": 0.00013958134427146632, "loss": 3.2046308517456055, "step": 2835, "token_acc": 0.2771461434947238 }, { "epoch": 1.6625622984462036, "grad_norm": 0.5413695710973725, "learning_rate": 0.00013958060304489916, "loss": 3.1764774322509766, "step": 2836, "token_acc": 0.2796540653868833 }, { "epoch": 1.6631486367634125, "grad_norm": 0.436111820619208, "learning_rate": 0.00013957986116471595, "loss": 3.1729514598846436, "step": 2837, "token_acc": 0.2802992815894671 }, { "epoch": 1.6637349750806214, "grad_norm": 0.4158011340181606, "learning_rate": 0.00013957911863092374, "loss": 3.2421836853027344, "step": 2838, "token_acc": 0.27218788890333573 }, { "epoch": 1.6643213133978305, "grad_norm": 0.4770905298981569, "learning_rate": 0.00013957837544352947, "loss": 3.204979658126831, "step": 2839, "token_acc": 0.2788817638932935 }, { "epoch": 1.6649076517150396, "grad_norm": 0.5109351220372688, "learning_rate": 0.00013957763160254012, "loss": 3.1848835945129395, "step": 2840, "token_acc": 0.27939617008133427 }, { "epoch": 1.6654939900322487, "grad_norm": 0.4953433308211593, "learning_rate": 0.00013957688710796267, "loss": 3.150716781616211, "step": 2841, "token_acc": 0.2833807326075835 }, { "epoch": 1.6660803283494576, "grad_norm": 0.48260382194700224, "learning_rate": 0.0001395761419598041, "loss": 3.1916308403015137, "step": 2842, "token_acc": 0.2792819662784928 }, { "epoch": 1.6666666666666665, "grad_norm": 0.5321098351866895, "learning_rate": 0.00013957539615807148, "loss": 3.201857566833496, "step": 2843, "token_acc": 0.2777333688604672 }, { "epoch": 1.6672530049838756, "grad_norm": 0.6769952248120007, "learning_rate": 0.00013957464970277173, "loss": 3.209993362426758, "step": 2844, "token_acc": 0.2771785037853946 }, { "epoch": 1.6678393433010847, "grad_norm": 0.6247904783730667, "learning_rate": 0.00013957390259391192, "loss": 3.2478137016296387, "step": 2845, "token_acc": 0.2704606028945226 }, { "epoch": 1.6684256816182939, "grad_norm": 0.4865061364277161, "learning_rate": 0.00013957315483149904, "loss": 3.1949496269226074, "step": 2846, "token_acc": 0.27928944103289166 }, { "epoch": 1.6690120199355027, "grad_norm": 0.5200610537075193, "learning_rate": 0.00013957240641554014, "loss": 3.2192654609680176, "step": 2847, "token_acc": 0.2751930834810365 }, { "epoch": 1.6695983582527119, "grad_norm": 0.5952264104454312, "learning_rate": 0.0001395716573460422, "loss": 3.1681227684020996, "step": 2848, "token_acc": 0.28217689127771 }, { "epoch": 1.6701846965699207, "grad_norm": 0.5103471191897048, "learning_rate": 0.0001395709076230123, "loss": 3.1720833778381348, "step": 2849, "token_acc": 0.2811926652786081 }, { "epoch": 1.6707710348871299, "grad_norm": 0.43647430855270536, "learning_rate": 0.00013957015724645747, "loss": 3.162659168243408, "step": 2850, "token_acc": 0.28165469295553824 }, { "epoch": 1.671357373204339, "grad_norm": 0.5900512499680479, "learning_rate": 0.00013956940621638475, "loss": 3.199403762817383, "step": 2851, "token_acc": 0.27671144490470106 }, { "epoch": 1.671943711521548, "grad_norm": 0.4557668446225046, "learning_rate": 0.0001395686545328012, "loss": 3.25905704498291, "step": 2852, "token_acc": 0.26825369280684874 }, { "epoch": 1.672530049838757, "grad_norm": 0.45188940155679713, "learning_rate": 0.00013956790219571392, "loss": 3.210366725921631, "step": 2853, "token_acc": 0.2767666036122412 }, { "epoch": 1.6731163881559659, "grad_norm": 0.6357963831783037, "learning_rate": 0.00013956714920512991, "loss": 3.197892665863037, "step": 2854, "token_acc": 0.27773870230349923 }, { "epoch": 1.673702726473175, "grad_norm": 0.5131130881000916, "learning_rate": 0.0001395663955610563, "loss": 3.172527551651001, "step": 2855, "token_acc": 0.28195824829466515 }, { "epoch": 1.674289064790384, "grad_norm": 0.5389225955579078, "learning_rate": 0.00013956564126350011, "loss": 3.216134548187256, "step": 2856, "token_acc": 0.27647101513551 }, { "epoch": 1.6748754031075932, "grad_norm": 0.6669937565185201, "learning_rate": 0.0001395648863124685, "loss": 3.2324583530426025, "step": 2857, "token_acc": 0.2731597230490009 }, { "epoch": 1.675461741424802, "grad_norm": 0.5443823672929545, "learning_rate": 0.00013956413070796852, "loss": 3.1986217498779297, "step": 2858, "token_acc": 0.27671375199395515 }, { "epoch": 1.676048079742011, "grad_norm": 0.5074711439197201, "learning_rate": 0.00013956337445000726, "loss": 3.181631326675415, "step": 2859, "token_acc": 0.280343556190192 }, { "epoch": 1.67663441805922, "grad_norm": 0.6022047213146416, "learning_rate": 0.00013956261753859183, "loss": 3.174705982208252, "step": 2860, "token_acc": 0.279127960785658 }, { "epoch": 1.6772207563764292, "grad_norm": 0.44771633588894383, "learning_rate": 0.00013956185997372934, "loss": 3.1722846031188965, "step": 2861, "token_acc": 0.2809243925577745 }, { "epoch": 1.6778070946936383, "grad_norm": 0.5434841368538486, "learning_rate": 0.00013956110175542693, "loss": 3.2112722396850586, "step": 2862, "token_acc": 0.2757631284007533 }, { "epoch": 1.6783934330108474, "grad_norm": 0.5576693480976403, "learning_rate": 0.00013956034288369168, "loss": 3.179370403289795, "step": 2863, "token_acc": 0.2806193801019351 }, { "epoch": 1.6789797713280563, "grad_norm": 0.4765049603529197, "learning_rate": 0.00013955958335853076, "loss": 3.220949411392212, "step": 2864, "token_acc": 0.27347386901364834 }, { "epoch": 1.6795661096452652, "grad_norm": 0.539187467647457, "learning_rate": 0.00013955882317995128, "loss": 3.200206756591797, "step": 2865, "token_acc": 0.2786758172657356 }, { "epoch": 1.6801524479624743, "grad_norm": 0.43785284937773744, "learning_rate": 0.0001395580623479604, "loss": 3.176138401031494, "step": 2866, "token_acc": 0.28006461242980313 }, { "epoch": 1.6807387862796834, "grad_norm": 0.5549124981230663, "learning_rate": 0.00013955730086256525, "loss": 3.2419896125793457, "step": 2867, "token_acc": 0.2722278194493589 }, { "epoch": 1.6813251245968925, "grad_norm": 0.45518441574360713, "learning_rate": 0.000139556538723773, "loss": 3.207124948501587, "step": 2868, "token_acc": 0.2775053152288597 }, { "epoch": 1.6819114629141014, "grad_norm": 0.46256694489936956, "learning_rate": 0.0001395557759315908, "loss": 3.153456211090088, "step": 2869, "token_acc": 0.2822733824929877 }, { "epoch": 1.6824978012313103, "grad_norm": 0.46829319016249255, "learning_rate": 0.00013955501248602575, "loss": 3.240262031555176, "step": 2870, "token_acc": 0.27202005562301695 }, { "epoch": 1.6830841395485194, "grad_norm": 0.5865728864018644, "learning_rate": 0.00013955424838708514, "loss": 3.18632435798645, "step": 2871, "token_acc": 0.2772832721398916 }, { "epoch": 1.6836704778657285, "grad_norm": 0.6486525746288135, "learning_rate": 0.00013955348363477608, "loss": 3.1936917304992676, "step": 2872, "token_acc": 0.27675760479294115 }, { "epoch": 1.6842568161829377, "grad_norm": 0.49420738072095316, "learning_rate": 0.00013955271822910576, "loss": 3.1764581203460693, "step": 2873, "token_acc": 0.27913451106425663 }, { "epoch": 1.6848431545001465, "grad_norm": 0.5068494526181408, "learning_rate": 0.00013955195217008138, "loss": 3.197007179260254, "step": 2874, "token_acc": 0.2773357746815887 }, { "epoch": 1.6854294928173557, "grad_norm": 0.5318251249586854, "learning_rate": 0.00013955118545771014, "loss": 3.192561626434326, "step": 2875, "token_acc": 0.27919389694586927 }, { "epoch": 1.6860158311345645, "grad_norm": 0.5794675902339713, "learning_rate": 0.00013955041809199923, "loss": 3.2102742195129395, "step": 2876, "token_acc": 0.27673261222594464 }, { "epoch": 1.6866021694517737, "grad_norm": 0.5757665589101576, "learning_rate": 0.00013954965007295588, "loss": 3.2481653690338135, "step": 2877, "token_acc": 0.2705823846522585 }, { "epoch": 1.6871885077689828, "grad_norm": 0.6058025866406351, "learning_rate": 0.00013954888140058725, "loss": 3.1760916709899902, "step": 2878, "token_acc": 0.279160861156777 }, { "epoch": 1.6877748460861919, "grad_norm": 0.6610942810111985, "learning_rate": 0.00013954811207490063, "loss": 3.1604933738708496, "step": 2879, "token_acc": 0.28304454758020936 }, { "epoch": 1.6883611844034008, "grad_norm": 0.5414882859518526, "learning_rate": 0.00013954734209590318, "loss": 3.190805673599243, "step": 2880, "token_acc": 0.2784567913602757 }, { "epoch": 1.6889475227206097, "grad_norm": 0.4637886839462668, "learning_rate": 0.00013954657146360218, "loss": 3.1659021377563477, "step": 2881, "token_acc": 0.2838228980934497 }, { "epoch": 1.6895338610378188, "grad_norm": 0.6234768544578989, "learning_rate": 0.0001395458001780049, "loss": 3.219486951828003, "step": 2882, "token_acc": 0.2742258761715868 }, { "epoch": 1.6901201993550279, "grad_norm": 0.5641465734382756, "learning_rate": 0.0001395450282391185, "loss": 3.1865673065185547, "step": 2883, "token_acc": 0.27912131690101283 }, { "epoch": 1.690706537672237, "grad_norm": 0.4670298810627386, "learning_rate": 0.00013954425564695027, "loss": 3.1794309616088867, "step": 2884, "token_acc": 0.27910618973040074 }, { "epoch": 1.6912928759894459, "grad_norm": 0.47803505756969444, "learning_rate": 0.00013954348240150747, "loss": 3.2148330211639404, "step": 2885, "token_acc": 0.2742665479185221 }, { "epoch": 1.6918792143066548, "grad_norm": 0.44426121021753384, "learning_rate": 0.00013954270850279735, "loss": 3.1834311485290527, "step": 2886, "token_acc": 0.27809286779779996 }, { "epoch": 1.692465552623864, "grad_norm": 0.521505522671949, "learning_rate": 0.00013954193395082724, "loss": 3.164466619491577, "step": 2887, "token_acc": 0.28132161803183997 }, { "epoch": 1.693051890941073, "grad_norm": 0.4922144177860282, "learning_rate": 0.00013954115874560433, "loss": 3.1448230743408203, "step": 2888, "token_acc": 0.28569908297838653 }, { "epoch": 1.6936382292582821, "grad_norm": 0.41341266771283247, "learning_rate": 0.00013954038288713596, "loss": 3.1173534393310547, "step": 2889, "token_acc": 0.28761181175998185 }, { "epoch": 1.6942245675754912, "grad_norm": 0.3961812581171599, "learning_rate": 0.0001395396063754294, "loss": 3.176060438156128, "step": 2890, "token_acc": 0.27926382420166534 }, { "epoch": 1.6948109058927001, "grad_norm": 0.3699812721394128, "learning_rate": 0.00013953882921049194, "loss": 3.2018561363220215, "step": 2891, "token_acc": 0.27722891074145556 }, { "epoch": 1.695397244209909, "grad_norm": 0.45204004423481264, "learning_rate": 0.00013953805139233088, "loss": 3.2166409492492676, "step": 2892, "token_acc": 0.27446675505769785 }, { "epoch": 1.6959835825271181, "grad_norm": 0.5329460584295618, "learning_rate": 0.00013953727292095354, "loss": 3.2047715187072754, "step": 2893, "token_acc": 0.2760543403468903 }, { "epoch": 1.6965699208443272, "grad_norm": 0.4508953580350445, "learning_rate": 0.0001395364937963672, "loss": 3.195629119873047, "step": 2894, "token_acc": 0.276438663748726 }, { "epoch": 1.6971562591615363, "grad_norm": 0.44147930439888555, "learning_rate": 0.00013953571401857925, "loss": 3.17089581489563, "step": 2895, "token_acc": 0.2804166494397817 }, { "epoch": 1.6977425974787452, "grad_norm": 0.4524174428338693, "learning_rate": 0.00013953493358759693, "loss": 3.175448417663574, "step": 2896, "token_acc": 0.27918275915340646 }, { "epoch": 1.6983289357959541, "grad_norm": 0.39442655294684786, "learning_rate": 0.0001395341525034276, "loss": 3.1915037631988525, "step": 2897, "token_acc": 0.27754377626196486 }, { "epoch": 1.6989152741131632, "grad_norm": 0.5189595783498936, "learning_rate": 0.00013953337076607863, "loss": 3.1603307723999023, "step": 2898, "token_acc": 0.28175767589006967 }, { "epoch": 1.6995016124303723, "grad_norm": 0.42544850144807234, "learning_rate": 0.00013953258837555733, "loss": 3.2102208137512207, "step": 2899, "token_acc": 0.27531493541155116 }, { "epoch": 1.7000879507475815, "grad_norm": 0.4737847539077121, "learning_rate": 0.00013953180533187107, "loss": 3.2122111320495605, "step": 2900, "token_acc": 0.27554507323479355 }, { "epoch": 1.7006742890647903, "grad_norm": 0.4175068425662153, "learning_rate": 0.0001395310216350272, "loss": 3.193995475769043, "step": 2901, "token_acc": 0.2781991379242496 }, { "epoch": 1.7012606273819995, "grad_norm": 0.4786782901048263, "learning_rate": 0.00013953023728503303, "loss": 3.2242555618286133, "step": 2902, "token_acc": 0.2742550568371782 }, { "epoch": 1.7018469656992083, "grad_norm": 0.3466876758419877, "learning_rate": 0.00013952945228189602, "loss": 3.188281297683716, "step": 2903, "token_acc": 0.27830980561919944 }, { "epoch": 1.7024333040164175, "grad_norm": 0.4158824169137935, "learning_rate": 0.00013952866662562347, "loss": 3.164700984954834, "step": 2904, "token_acc": 0.2823380959266026 }, { "epoch": 1.7030196423336266, "grad_norm": 0.4693047461777087, "learning_rate": 0.0001395278803162228, "loss": 3.196770191192627, "step": 2905, "token_acc": 0.27674774090904364 }, { "epoch": 1.7036059806508357, "grad_norm": 0.4141556079474966, "learning_rate": 0.0001395270933537014, "loss": 3.162008762359619, "step": 2906, "token_acc": 0.2816121656330264 }, { "epoch": 1.7041923189680446, "grad_norm": 0.3867629567941042, "learning_rate": 0.00013952630573806662, "loss": 3.1594057083129883, "step": 2907, "token_acc": 0.2841587279258721 }, { "epoch": 1.7047786572852535, "grad_norm": 0.44393715671487, "learning_rate": 0.0001395255174693259, "loss": 3.1479883193969727, "step": 2908, "token_acc": 0.28350913572932424 }, { "epoch": 1.7053649956024626, "grad_norm": 0.42292607668602766, "learning_rate": 0.0001395247285474866, "loss": 3.217233180999756, "step": 2909, "token_acc": 0.274435907099328 }, { "epoch": 1.7059513339196717, "grad_norm": 0.4332760783092111, "learning_rate": 0.00013952393897255621, "loss": 3.211113452911377, "step": 2910, "token_acc": 0.275207556250752 }, { "epoch": 1.7065376722368808, "grad_norm": 0.4424804360715772, "learning_rate": 0.00013952314874454206, "loss": 3.2433886528015137, "step": 2911, "token_acc": 0.27105401355141984 }, { "epoch": 1.7071240105540897, "grad_norm": 0.41509452549170445, "learning_rate": 0.00013952235786345162, "loss": 3.1641430854797363, "step": 2912, "token_acc": 0.281877070067027 }, { "epoch": 1.7077103488712986, "grad_norm": 0.43893946826783337, "learning_rate": 0.00013952156632929234, "loss": 3.1805288791656494, "step": 2913, "token_acc": 0.2804994035528903 }, { "epoch": 1.7082966871885077, "grad_norm": 0.436286982701825, "learning_rate": 0.0001395207741420716, "loss": 3.205930233001709, "step": 2914, "token_acc": 0.27706742033431747 }, { "epoch": 1.7088830255057168, "grad_norm": 0.47046190282830164, "learning_rate": 0.00013951998130179688, "loss": 3.17008376121521, "step": 2915, "token_acc": 0.2816604637560894 }, { "epoch": 1.709469363822926, "grad_norm": 0.5599994404828619, "learning_rate": 0.0001395191878084756, "loss": 3.1617746353149414, "step": 2916, "token_acc": 0.2810939172014868 }, { "epoch": 1.7100557021401348, "grad_norm": 0.4988045832552724, "learning_rate": 0.00013951839366211524, "loss": 3.169635772705078, "step": 2917, "token_acc": 0.28143373659005566 }, { "epoch": 1.710642040457344, "grad_norm": 0.4080732724867815, "learning_rate": 0.00013951759886272325, "loss": 3.180755138397217, "step": 2918, "token_acc": 0.279463860236553 }, { "epoch": 1.7112283787745528, "grad_norm": 0.4625266067108884, "learning_rate": 0.00013951680341030707, "loss": 3.2208359241485596, "step": 2919, "token_acc": 0.27521771170076004 }, { "epoch": 1.711814717091762, "grad_norm": 0.5395260563757204, "learning_rate": 0.00013951600730487422, "loss": 3.1578030586242676, "step": 2920, "token_acc": 0.28264173147769134 }, { "epoch": 1.712401055408971, "grad_norm": 0.4591262318914862, "learning_rate": 0.00013951521054643214, "loss": 3.1954503059387207, "step": 2921, "token_acc": 0.2763648047815842 }, { "epoch": 1.7129873937261801, "grad_norm": 0.43640060244321155, "learning_rate": 0.00013951441313498836, "loss": 3.209993839263916, "step": 2922, "token_acc": 0.2748840943821389 }, { "epoch": 1.713573732043389, "grad_norm": 0.4616115772308012, "learning_rate": 0.0001395136150705503, "loss": 3.176500082015991, "step": 2923, "token_acc": 0.2788897791905844 }, { "epoch": 1.714160070360598, "grad_norm": 0.494265904624796, "learning_rate": 0.00013951281635312554, "loss": 3.2003817558288574, "step": 2924, "token_acc": 0.27823480956406754 }, { "epoch": 1.714746408677807, "grad_norm": 0.5635356302275609, "learning_rate": 0.0001395120169827215, "loss": 3.1935057640075684, "step": 2925, "token_acc": 0.28065587915744833 }, { "epoch": 1.7153327469950161, "grad_norm": 0.5127042386437264, "learning_rate": 0.00013951121695934574, "loss": 3.1515421867370605, "step": 2926, "token_acc": 0.2841111085085507 }, { "epoch": 1.7159190853122253, "grad_norm": 0.4841648993396093, "learning_rate": 0.00013951041628300579, "loss": 3.154813528060913, "step": 2927, "token_acc": 0.2831344999361349 }, { "epoch": 1.7165054236294341, "grad_norm": 0.48802315656140666, "learning_rate": 0.0001395096149537091, "loss": 3.231116771697998, "step": 2928, "token_acc": 0.27359472862183754 }, { "epoch": 1.7170917619466433, "grad_norm": 0.4583969628442934, "learning_rate": 0.00013950881297146328, "loss": 3.1783218383789062, "step": 2929, "token_acc": 0.2786178434564167 }, { "epoch": 1.7176781002638521, "grad_norm": 0.4457269334996295, "learning_rate": 0.0001395080103362758, "loss": 3.1708686351776123, "step": 2930, "token_acc": 0.2817298153088552 }, { "epoch": 1.7182644385810613, "grad_norm": 0.5076840065368774, "learning_rate": 0.00013950720704815426, "loss": 3.167386531829834, "step": 2931, "token_acc": 0.28267361662574625 }, { "epoch": 1.7188507768982704, "grad_norm": 0.38586640155236157, "learning_rate": 0.00013950640310710617, "loss": 3.2086544036865234, "step": 2932, "token_acc": 0.2757667722636479 }, { "epoch": 1.7194371152154795, "grad_norm": 0.47240231386336135, "learning_rate": 0.00013950559851313906, "loss": 3.215143918991089, "step": 2933, "token_acc": 0.2740819670375566 }, { "epoch": 1.7200234535326884, "grad_norm": 0.5098640838817804, "learning_rate": 0.00013950479326626052, "loss": 3.2364985942840576, "step": 2934, "token_acc": 0.272879769156781 }, { "epoch": 1.7206097918498973, "grad_norm": 0.5119325405472774, "learning_rate": 0.0001395039873664781, "loss": 3.208951473236084, "step": 2935, "token_acc": 0.277185153777741 }, { "epoch": 1.7211961301671064, "grad_norm": 0.4316573538361131, "learning_rate": 0.00013950318081379937, "loss": 3.143176317214966, "step": 2936, "token_acc": 0.2840859237904237 }, { "epoch": 1.7217824684843155, "grad_norm": 0.5975299782852675, "learning_rate": 0.00013950237360823192, "loss": 3.141960620880127, "step": 2937, "token_acc": 0.2846559847756267 }, { "epoch": 1.7223688068015246, "grad_norm": 0.5502406246931524, "learning_rate": 0.00013950156574978336, "loss": 3.179710865020752, "step": 2938, "token_acc": 0.2816832027757104 }, { "epoch": 1.7229551451187335, "grad_norm": 0.36996452104275485, "learning_rate": 0.0001395007572384612, "loss": 3.1909027099609375, "step": 2939, "token_acc": 0.27840222008619886 }, { "epoch": 1.7235414834359424, "grad_norm": 0.5222221648602318, "learning_rate": 0.0001394999480742731, "loss": 3.1930480003356934, "step": 2940, "token_acc": 0.27837943364432205 }, { "epoch": 1.7241278217531515, "grad_norm": 0.5038578229363613, "learning_rate": 0.00013949913825722664, "loss": 3.1991958618164062, "step": 2941, "token_acc": 0.275468227729711 }, { "epoch": 1.7247141600703606, "grad_norm": 0.3647906137176386, "learning_rate": 0.0001394983277873294, "loss": 3.1558191776275635, "step": 2942, "token_acc": 0.2833918374179902 }, { "epoch": 1.7253004983875697, "grad_norm": 0.45509694732618405, "learning_rate": 0.00013949751666458905, "loss": 3.2349202632904053, "step": 2943, "token_acc": 0.27253721855126095 }, { "epoch": 1.7258868367047786, "grad_norm": 0.4320660772498575, "learning_rate": 0.00013949670488901317, "loss": 3.1631388664245605, "step": 2944, "token_acc": 0.28173722578132554 }, { "epoch": 1.7264731750219877, "grad_norm": 0.4350348818562048, "learning_rate": 0.0001394958924606094, "loss": 3.154768228530884, "step": 2945, "token_acc": 0.2834255377863047 }, { "epoch": 1.7270595133391966, "grad_norm": 0.42446626514713987, "learning_rate": 0.00013949507937938537, "loss": 3.151427984237671, "step": 2946, "token_acc": 0.2828361886540859 }, { "epoch": 1.7276458516564057, "grad_norm": 0.5093761388172953, "learning_rate": 0.0001394942656453487, "loss": 3.2252631187438965, "step": 2947, "token_acc": 0.27546640077667867 }, { "epoch": 1.7282321899736148, "grad_norm": 0.5503227731438836, "learning_rate": 0.00013949345125850707, "loss": 3.2185187339782715, "step": 2948, "token_acc": 0.27485536942632044 }, { "epoch": 1.728818528290824, "grad_norm": 0.4997795171607372, "learning_rate": 0.0001394926362188681, "loss": 3.205934762954712, "step": 2949, "token_acc": 0.27448801966004505 }, { "epoch": 1.7294048666080328, "grad_norm": 0.550953038707095, "learning_rate": 0.00013949182052643946, "loss": 3.1781582832336426, "step": 2950, "token_acc": 0.28031440154329285 }, { "epoch": 1.7299912049252417, "grad_norm": 0.6086567028349118, "learning_rate": 0.0001394910041812288, "loss": 3.195383071899414, "step": 2951, "token_acc": 0.2771617887842816 }, { "epoch": 1.7305775432424508, "grad_norm": 0.5316814126561198, "learning_rate": 0.0001394901871832438, "loss": 3.1902315616607666, "step": 2952, "token_acc": 0.2773252560188636 }, { "epoch": 1.73116388155966, "grad_norm": 0.5605691783151437, "learning_rate": 0.0001394893695324921, "loss": 3.2069578170776367, "step": 2953, "token_acc": 0.27646928936812276 }, { "epoch": 1.731750219876869, "grad_norm": 0.5356918339824384, "learning_rate": 0.00013948855122898146, "loss": 3.163416624069214, "step": 2954, "token_acc": 0.2821670249815554 }, { "epoch": 1.732336558194078, "grad_norm": 0.5237445660874269, "learning_rate": 0.00013948773227271947, "loss": 3.1913068294525146, "step": 2955, "token_acc": 0.2784569291100432 }, { "epoch": 1.732922896511287, "grad_norm": 0.6240798691395383, "learning_rate": 0.00013948691266371392, "loss": 3.184932231903076, "step": 2956, "token_acc": 0.27978044917183287 }, { "epoch": 1.733509234828496, "grad_norm": 0.4967175673780042, "learning_rate": 0.00013948609240197244, "loss": 3.1733694076538086, "step": 2957, "token_acc": 0.2814316899158769 }, { "epoch": 1.734095573145705, "grad_norm": 0.5216363535558579, "learning_rate": 0.00013948527148750276, "loss": 3.158154010772705, "step": 2958, "token_acc": 0.2809403226648567 }, { "epoch": 1.7346819114629142, "grad_norm": 0.44785391395681656, "learning_rate": 0.00013948444992031256, "loss": 3.224536418914795, "step": 2959, "token_acc": 0.27393745072829534 }, { "epoch": 1.7352682497801233, "grad_norm": 0.3939378560534969, "learning_rate": 0.00013948362770040961, "loss": 3.218186378479004, "step": 2960, "token_acc": 0.2743979484664147 }, { "epoch": 1.7358545880973322, "grad_norm": 0.4097630621572904, "learning_rate": 0.00013948280482780162, "loss": 3.1982107162475586, "step": 2961, "token_acc": 0.27736682731488355 }, { "epoch": 1.736440926414541, "grad_norm": 0.4257859017118626, "learning_rate": 0.0001394819813024963, "loss": 3.1899404525756836, "step": 2962, "token_acc": 0.27826653893235404 }, { "epoch": 1.7370272647317502, "grad_norm": 0.4624072721485425, "learning_rate": 0.00013948115712450137, "loss": 3.1601107120513916, "step": 2963, "token_acc": 0.2818528379681282 }, { "epoch": 1.7376136030489593, "grad_norm": 0.4405406604915479, "learning_rate": 0.0001394803322938246, "loss": 3.167111396789551, "step": 2964, "token_acc": 0.28262688596605917 }, { "epoch": 1.7381999413661684, "grad_norm": 0.5062994772396637, "learning_rate": 0.00013947950681047377, "loss": 3.2587504386901855, "step": 2965, "token_acc": 0.26847137745736654 }, { "epoch": 1.7387862796833773, "grad_norm": 0.5400153325101194, "learning_rate": 0.00013947868067445656, "loss": 3.2397029399871826, "step": 2966, "token_acc": 0.2705121842855324 }, { "epoch": 1.7393726180005862, "grad_norm": 0.5733149632288351, "learning_rate": 0.0001394778538857808, "loss": 3.2459867000579834, "step": 2967, "token_acc": 0.270364892688701 }, { "epoch": 1.7399589563177953, "grad_norm": 0.5708080986055255, "learning_rate": 0.00013947702644445418, "loss": 3.160269021987915, "step": 2968, "token_acc": 0.2834344190023245 }, { "epoch": 1.7405452946350044, "grad_norm": 0.553060852098264, "learning_rate": 0.00013947619835048456, "loss": 3.182892322540283, "step": 2969, "token_acc": 0.2796223575289047 }, { "epoch": 1.7411316329522135, "grad_norm": 0.48805625600322816, "learning_rate": 0.00013947536960387966, "loss": 3.192620038986206, "step": 2970, "token_acc": 0.27661618739129074 }, { "epoch": 1.7417179712694224, "grad_norm": 0.5672247990786181, "learning_rate": 0.0001394745402046473, "loss": 3.1784310340881348, "step": 2971, "token_acc": 0.276687059600326 }, { "epoch": 1.7423043095866315, "grad_norm": 0.4188954713964969, "learning_rate": 0.00013947371015279522, "loss": 3.1774685382843018, "step": 2972, "token_acc": 0.2790063807998319 }, { "epoch": 1.7428906479038404, "grad_norm": 0.5455829113753253, "learning_rate": 0.00013947287944833127, "loss": 3.228214979171753, "step": 2973, "token_acc": 0.2729039841690077 }, { "epoch": 1.7434769862210495, "grad_norm": 0.415223071559554, "learning_rate": 0.00013947204809126323, "loss": 3.116982936859131, "step": 2974, "token_acc": 0.2868892478535701 }, { "epoch": 1.7440633245382586, "grad_norm": 0.5000350945195606, "learning_rate": 0.0001394712160815989, "loss": 3.116703510284424, "step": 2975, "token_acc": 0.29025964941370325 }, { "epoch": 1.7446496628554677, "grad_norm": 0.5189122809087943, "learning_rate": 0.00013947038341934612, "loss": 3.1880321502685547, "step": 2976, "token_acc": 0.2803163158855919 }, { "epoch": 1.7452360011726766, "grad_norm": 0.47862244713686014, "learning_rate": 0.00013946955010451273, "loss": 3.1766796112060547, "step": 2977, "token_acc": 0.2812102597946518 }, { "epoch": 1.7458223394898855, "grad_norm": 0.4903419555635945, "learning_rate": 0.00013946871613710647, "loss": 3.1763789653778076, "step": 2978, "token_acc": 0.2796772453359855 }, { "epoch": 1.7464086778070946, "grad_norm": 0.4768659895339211, "learning_rate": 0.00013946788151713527, "loss": 3.192786931991577, "step": 2979, "token_acc": 0.27897821179104754 }, { "epoch": 1.7469950161243037, "grad_norm": 0.48316155906323704, "learning_rate": 0.00013946704624460694, "loss": 3.2073707580566406, "step": 2980, "token_acc": 0.27564174334337593 }, { "epoch": 1.7475813544415129, "grad_norm": 0.4171233387417619, "learning_rate": 0.0001394662103195293, "loss": 3.199120283126831, "step": 2981, "token_acc": 0.2773482712762483 }, { "epoch": 1.7481676927587217, "grad_norm": 0.45221568146353547, "learning_rate": 0.00013946537374191022, "loss": 3.1399085521698, "step": 2982, "token_acc": 0.28386914107744976 }, { "epoch": 1.7487540310759309, "grad_norm": 0.4769872415687329, "learning_rate": 0.00013946453651175758, "loss": 3.165022850036621, "step": 2983, "token_acc": 0.28225883909800115 }, { "epoch": 1.7493403693931397, "grad_norm": 0.5051013868212123, "learning_rate": 0.0001394636986290792, "loss": 3.1957502365112305, "step": 2984, "token_acc": 0.275668699332427 }, { "epoch": 1.7499267077103489, "grad_norm": 0.40459847663417486, "learning_rate": 0.00013946286009388297, "loss": 3.159456253051758, "step": 2985, "token_acc": 0.2827267916069983 }, { "epoch": 1.750513046027558, "grad_norm": 0.4676264988888676, "learning_rate": 0.0001394620209061768, "loss": 3.1672000885009766, "step": 2986, "token_acc": 0.2810569637682441 }, { "epoch": 1.751099384344767, "grad_norm": 0.5396543774404716, "learning_rate": 0.00013946118106596852, "loss": 3.126330614089966, "step": 2987, "token_acc": 0.2854535028697386 }, { "epoch": 1.751685722661976, "grad_norm": 0.5863893412891895, "learning_rate": 0.00013946034057326606, "loss": 3.2014126777648926, "step": 2988, "token_acc": 0.27612037606209866 }, { "epoch": 1.7522720609791849, "grad_norm": 0.4906860736010148, "learning_rate": 0.0001394594994280773, "loss": 3.1664700508117676, "step": 2989, "token_acc": 0.28078000180675466 }, { "epoch": 1.752858399296394, "grad_norm": 0.49839301063087105, "learning_rate": 0.00013945865763041014, "loss": 3.1140999794006348, "step": 2990, "token_acc": 0.29097877821586215 }, { "epoch": 1.753444737613603, "grad_norm": 0.5304102678540147, "learning_rate": 0.00013945781518027246, "loss": 3.157644748687744, "step": 2991, "token_acc": 0.2822222625363151 }, { "epoch": 1.7540310759308122, "grad_norm": 0.5632956151026114, "learning_rate": 0.00013945697207767222, "loss": 3.15358829498291, "step": 2992, "token_acc": 0.28113072146935547 }, { "epoch": 1.754617414248021, "grad_norm": 0.4310130893586471, "learning_rate": 0.00013945612832261733, "loss": 3.1892380714416504, "step": 2993, "token_acc": 0.27931632189888167 }, { "epoch": 1.75520375256523, "grad_norm": 0.4666505466780538, "learning_rate": 0.0001394552839151157, "loss": 3.202209949493408, "step": 2994, "token_acc": 0.2760693672087078 }, { "epoch": 1.755790090882439, "grad_norm": 0.5529548083585759, "learning_rate": 0.00013945443885517527, "loss": 3.1285104751586914, "step": 2995, "token_acc": 0.2852018265987976 }, { "epoch": 1.7563764291996482, "grad_norm": 0.41934765247948497, "learning_rate": 0.000139453593142804, "loss": 3.1727442741394043, "step": 2996, "token_acc": 0.2803262238514074 }, { "epoch": 1.7569627675168573, "grad_norm": 0.5349107559300248, "learning_rate": 0.0001394527467780098, "loss": 3.1704483032226562, "step": 2997, "token_acc": 0.28015986414625826 }, { "epoch": 1.7575491058340662, "grad_norm": 0.5099112084966309, "learning_rate": 0.0001394518997608006, "loss": 3.235971450805664, "step": 2998, "token_acc": 0.270647931303669 }, { "epoch": 1.7581354441512753, "grad_norm": 0.511299699510549, "learning_rate": 0.00013945105209118444, "loss": 3.153714418411255, "step": 2999, "token_acc": 0.28118182620647914 }, { "epoch": 1.7587217824684842, "grad_norm": 0.4967359037942677, "learning_rate": 0.0001394502037691692, "loss": 3.1984848976135254, "step": 3000, "token_acc": 0.2775382521570002 }, { "epoch": 1.7593081207856933, "grad_norm": 0.4227366080248958, "learning_rate": 0.0001394493547947629, "loss": 3.1918160915374756, "step": 3001, "token_acc": 0.2781750766791329 }, { "epoch": 1.7598944591029024, "grad_norm": 0.5150412828720263, "learning_rate": 0.00013944850516797348, "loss": 3.182196855545044, "step": 3002, "token_acc": 0.28004044987105264 }, { "epoch": 1.7604807974201115, "grad_norm": 0.37625441542999466, "learning_rate": 0.00013944765488880893, "loss": 3.169992446899414, "step": 3003, "token_acc": 0.2814530872512892 }, { "epoch": 1.7610671357373204, "grad_norm": 0.434386105441463, "learning_rate": 0.00013944680395727726, "loss": 3.172844171524048, "step": 3004, "token_acc": 0.27954541152803053 }, { "epoch": 1.7616534740545293, "grad_norm": 0.3943852902861319, "learning_rate": 0.00013944595237338646, "loss": 3.1710798740386963, "step": 3005, "token_acc": 0.27874392416185956 }, { "epoch": 1.7622398123717384, "grad_norm": 0.47467158255216385, "learning_rate": 0.00013944510013714448, "loss": 3.1470108032226562, "step": 3006, "token_acc": 0.2832109614535113 }, { "epoch": 1.7628261506889475, "grad_norm": 0.39725120203159514, "learning_rate": 0.00013944424724855937, "loss": 3.1844558715820312, "step": 3007, "token_acc": 0.2771128710457774 }, { "epoch": 1.7634124890061567, "grad_norm": 0.37136013478850843, "learning_rate": 0.00013944339370763916, "loss": 3.1456289291381836, "step": 3008, "token_acc": 0.2834135860746811 }, { "epoch": 1.7639988273233655, "grad_norm": 0.43207921243122227, "learning_rate": 0.00013944253951439183, "loss": 3.1810903549194336, "step": 3009, "token_acc": 0.27998436482084693 }, { "epoch": 1.7645851656405747, "grad_norm": 0.4221732135480788, "learning_rate": 0.00013944168466882543, "loss": 3.1915488243103027, "step": 3010, "token_acc": 0.2781500864334936 }, { "epoch": 1.7651715039577835, "grad_norm": 0.47347331784116814, "learning_rate": 0.00013944082917094795, "loss": 3.1884632110595703, "step": 3011, "token_acc": 0.2793217467344373 }, { "epoch": 1.7657578422749927, "grad_norm": 0.43010201307217233, "learning_rate": 0.00013943997302076747, "loss": 3.1599695682525635, "step": 3012, "token_acc": 0.28175054704595187 }, { "epoch": 1.7663441805922018, "grad_norm": 0.3591985550750633, "learning_rate": 0.000139439116218292, "loss": 3.1863489151000977, "step": 3013, "token_acc": 0.27946587552701 }, { "epoch": 1.7669305189094109, "grad_norm": 0.5044686289104184, "learning_rate": 0.00013943825876352962, "loss": 3.190765857696533, "step": 3014, "token_acc": 0.277968006691411 }, { "epoch": 1.7675168572266198, "grad_norm": 0.5064167124304537, "learning_rate": 0.00013943740065648836, "loss": 3.166590690612793, "step": 3015, "token_acc": 0.28248198596430835 }, { "epoch": 1.7681031955438287, "grad_norm": 0.5949833317644018, "learning_rate": 0.0001394365418971763, "loss": 3.1810693740844727, "step": 3016, "token_acc": 0.28107884139788425 }, { "epoch": 1.7686895338610378, "grad_norm": 0.6401682602803276, "learning_rate": 0.0001394356824856015, "loss": 3.190455436706543, "step": 3017, "token_acc": 0.2764243300501564 }, { "epoch": 1.7692758721782469, "grad_norm": 0.511690273723341, "learning_rate": 0.000139434822421772, "loss": 3.1694912910461426, "step": 3018, "token_acc": 0.28136341278598304 }, { "epoch": 1.769862210495456, "grad_norm": 0.5526399574080202, "learning_rate": 0.0001394339617056959, "loss": 3.1650853157043457, "step": 3019, "token_acc": 0.2824990288611954 }, { "epoch": 1.770448548812665, "grad_norm": 0.605855518965393, "learning_rate": 0.00013943310033738134, "loss": 3.2166709899902344, "step": 3020, "token_acc": 0.274029546297441 }, { "epoch": 1.7710348871298738, "grad_norm": 0.48482368847720975, "learning_rate": 0.00013943223831683633, "loss": 3.206500768661499, "step": 3021, "token_acc": 0.27586553086394966 }, { "epoch": 1.771621225447083, "grad_norm": 0.4961074485617399, "learning_rate": 0.00013943137564406902, "loss": 3.1531145572662354, "step": 3022, "token_acc": 0.282132487433008 }, { "epoch": 1.772207563764292, "grad_norm": 0.48361406338130825, "learning_rate": 0.00013943051231908747, "loss": 3.1888012886047363, "step": 3023, "token_acc": 0.2773923001329244 }, { "epoch": 1.7727939020815011, "grad_norm": 0.4330257959736322, "learning_rate": 0.00013942964834189986, "loss": 3.1647696495056152, "step": 3024, "token_acc": 0.28169150673744386 }, { "epoch": 1.77338024039871, "grad_norm": 0.50388100863475, "learning_rate": 0.00013942878371251424, "loss": 3.159562110900879, "step": 3025, "token_acc": 0.28265711632308416 }, { "epoch": 1.7739665787159191, "grad_norm": 0.4441763983328152, "learning_rate": 0.00013942791843093874, "loss": 3.2118473052978516, "step": 3026, "token_acc": 0.2745061063664018 }, { "epoch": 1.774552917033128, "grad_norm": 0.458876206146385, "learning_rate": 0.0001394270524971815, "loss": 3.1720352172851562, "step": 3027, "token_acc": 0.2802787467395745 }, { "epoch": 1.7751392553503371, "grad_norm": 0.4600232339135988, "learning_rate": 0.00013942618591125067, "loss": 3.1900811195373535, "step": 3028, "token_acc": 0.2795505570715754 }, { "epoch": 1.7757255936675462, "grad_norm": 0.3889859574534732, "learning_rate": 0.00013942531867315437, "loss": 3.123332977294922, "step": 3029, "token_acc": 0.2884469449169174 }, { "epoch": 1.7763119319847553, "grad_norm": 0.48405913544379675, "learning_rate": 0.00013942445078290078, "loss": 3.1768553256988525, "step": 3030, "token_acc": 0.2789639352247728 }, { "epoch": 1.7768982703019642, "grad_norm": 0.4722057542885997, "learning_rate": 0.00013942358224049799, "loss": 3.175858974456787, "step": 3031, "token_acc": 0.2792553675459416 }, { "epoch": 1.7774846086191731, "grad_norm": 0.4350024296163273, "learning_rate": 0.0001394227130459542, "loss": 3.190156936645508, "step": 3032, "token_acc": 0.2774042337667707 }, { "epoch": 1.7780709469363822, "grad_norm": 0.5046064488540578, "learning_rate": 0.00013942184319927758, "loss": 3.142198324203491, "step": 3033, "token_acc": 0.2827607620142467 }, { "epoch": 1.7786572852535913, "grad_norm": 0.5800926879351861, "learning_rate": 0.00013942097270047628, "loss": 3.166781425476074, "step": 3034, "token_acc": 0.2803894737123989 }, { "epoch": 1.7792436235708005, "grad_norm": 0.5709123615696609, "learning_rate": 0.0001394201015495585, "loss": 3.206200122833252, "step": 3035, "token_acc": 0.2765094669325494 }, { "epoch": 1.7798299618880093, "grad_norm": 0.3972953256325521, "learning_rate": 0.0001394192297465324, "loss": 3.1656837463378906, "step": 3036, "token_acc": 0.2807137919426006 }, { "epoch": 1.7804163002052185, "grad_norm": 0.39585383171947364, "learning_rate": 0.0001394183572914062, "loss": 3.1558756828308105, "step": 3037, "token_acc": 0.28343611574075284 }, { "epoch": 1.7810026385224274, "grad_norm": 0.39699858045426384, "learning_rate": 0.00013941748418418805, "loss": 3.1890172958374023, "step": 3038, "token_acc": 0.2774416237359775 }, { "epoch": 1.7815889768396365, "grad_norm": 0.42707524167193733, "learning_rate": 0.00013941661042488618, "loss": 3.1542139053344727, "step": 3039, "token_acc": 0.2819716446668392 }, { "epoch": 1.7821753151568456, "grad_norm": 0.39691244402946485, "learning_rate": 0.00013941573601350879, "loss": 3.1789612770080566, "step": 3040, "token_acc": 0.2800704547927733 }, { "epoch": 1.7827616534740547, "grad_norm": 0.38438277205573457, "learning_rate": 0.00013941486095006412, "loss": 3.180464744567871, "step": 3041, "token_acc": 0.27963271697214465 }, { "epoch": 1.7833479917912636, "grad_norm": 0.4089080870739832, "learning_rate": 0.00013941398523456037, "loss": 3.1594676971435547, "step": 3042, "token_acc": 0.28323771278162574 }, { "epoch": 1.7839343301084725, "grad_norm": 0.3971799476222808, "learning_rate": 0.00013941310886700576, "loss": 3.214721202850342, "step": 3043, "token_acc": 0.2776881145788799 }, { "epoch": 1.7845206684256816, "grad_norm": 0.4160673802639119, "learning_rate": 0.00013941223184740849, "loss": 3.1675150394439697, "step": 3044, "token_acc": 0.2799491331879996 }, { "epoch": 1.7851070067428907, "grad_norm": 0.4317510544713243, "learning_rate": 0.0001394113541757769, "loss": 3.1531519889831543, "step": 3045, "token_acc": 0.28322325035704343 }, { "epoch": 1.7856933450600998, "grad_norm": 0.47623892992211325, "learning_rate": 0.00013941047585211912, "loss": 3.185746192932129, "step": 3046, "token_acc": 0.27996067324421753 }, { "epoch": 1.7862796833773087, "grad_norm": 0.47806636759503185, "learning_rate": 0.00013940959687644349, "loss": 3.2195448875427246, "step": 3047, "token_acc": 0.27533950478468266 }, { "epoch": 1.7868660216945176, "grad_norm": 0.37498587410266465, "learning_rate": 0.00013940871724875818, "loss": 3.2072198390960693, "step": 3048, "token_acc": 0.27563439388392835 }, { "epoch": 1.7874523600117267, "grad_norm": 0.5279993112248405, "learning_rate": 0.00013940783696907153, "loss": 3.138835906982422, "step": 3049, "token_acc": 0.2844757545889143 }, { "epoch": 1.7880386983289358, "grad_norm": 0.5394221774295284, "learning_rate": 0.0001394069560373918, "loss": 3.173534393310547, "step": 3050, "token_acc": 0.2802749729676081 }, { "epoch": 1.788625036646145, "grad_norm": 0.4729912055712406, "learning_rate": 0.00013940607445372721, "loss": 3.1679487228393555, "step": 3051, "token_acc": 0.2818582286089384 }, { "epoch": 1.7892113749633538, "grad_norm": 0.43670485127496467, "learning_rate": 0.0001394051922180861, "loss": 3.1542611122131348, "step": 3052, "token_acc": 0.28249610188527 }, { "epoch": 1.789797713280563, "grad_norm": 0.41545786021961273, "learning_rate": 0.00013940430933047672, "loss": 3.1426453590393066, "step": 3053, "token_acc": 0.28462744720250904 }, { "epoch": 1.7903840515977718, "grad_norm": 0.4025877851962264, "learning_rate": 0.00013940342579090738, "loss": 3.2097442150115967, "step": 3054, "token_acc": 0.27398770371611714 }, { "epoch": 1.790970389914981, "grad_norm": 0.33880724094291215, "learning_rate": 0.00013940254159938638, "loss": 3.130758285522461, "step": 3055, "token_acc": 0.28541543971667116 }, { "epoch": 1.79155672823219, "grad_norm": 0.3971113752701744, "learning_rate": 0.00013940165675592201, "loss": 3.192953586578369, "step": 3056, "token_acc": 0.27773112548170964 }, { "epoch": 1.7921430665493991, "grad_norm": 0.420127374757093, "learning_rate": 0.00013940077126052262, "loss": 3.1329848766326904, "step": 3057, "token_acc": 0.2867229576384336 }, { "epoch": 1.792729404866608, "grad_norm": 0.3710304145496616, "learning_rate": 0.00013939988511319648, "loss": 3.1280245780944824, "step": 3058, "token_acc": 0.2880393914660744 }, { "epoch": 1.793315743183817, "grad_norm": 0.4699728139457792, "learning_rate": 0.00013939899831395195, "loss": 3.1629183292388916, "step": 3059, "token_acc": 0.28276094480846903 }, { "epoch": 1.793902081501026, "grad_norm": 0.5094621372538382, "learning_rate": 0.00013939811086279735, "loss": 3.2229623794555664, "step": 3060, "token_acc": 0.2734696375450341 }, { "epoch": 1.7944884198182351, "grad_norm": 0.4539228788186506, "learning_rate": 0.000139397222759741, "loss": 3.1880416870117188, "step": 3061, "token_acc": 0.27749388967380656 }, { "epoch": 1.7950747581354443, "grad_norm": 0.5387703876310291, "learning_rate": 0.00013939633400479126, "loss": 3.174793243408203, "step": 3062, "token_acc": 0.2801326237255446 }, { "epoch": 1.7956610964526531, "grad_norm": 0.5736186198601464, "learning_rate": 0.0001393954445979565, "loss": 3.2441062927246094, "step": 3063, "token_acc": 0.26925142081931536 }, { "epoch": 1.7962474347698623, "grad_norm": 0.5551946202069394, "learning_rate": 0.000139394554539245, "loss": 3.2213916778564453, "step": 3064, "token_acc": 0.27582292265965314 }, { "epoch": 1.7968337730870712, "grad_norm": 0.49036338976591426, "learning_rate": 0.00013939366382866519, "loss": 3.1287078857421875, "step": 3065, "token_acc": 0.2872081311235235 }, { "epoch": 1.7974201114042803, "grad_norm": 0.46533985001211564, "learning_rate": 0.00013939277246622543, "loss": 3.19309139251709, "step": 3066, "token_acc": 0.27740108145223585 }, { "epoch": 1.7980064497214894, "grad_norm": 0.5381523928879589, "learning_rate": 0.00013939188045193406, "loss": 3.1795382499694824, "step": 3067, "token_acc": 0.2788989192359249 }, { "epoch": 1.7985927880386985, "grad_norm": 0.5659396468033879, "learning_rate": 0.0001393909877857995, "loss": 3.215360641479492, "step": 3068, "token_acc": 0.27581955342741665 }, { "epoch": 1.7991791263559074, "grad_norm": 0.39846348849663715, "learning_rate": 0.00013939009446783013, "loss": 3.2298998832702637, "step": 3069, "token_acc": 0.2732455091052682 }, { "epoch": 1.7997654646731163, "grad_norm": 0.6023182591545795, "learning_rate": 0.00013938920049803432, "loss": 3.1581497192382812, "step": 3070, "token_acc": 0.2816277031571653 }, { "epoch": 1.8003518029903254, "grad_norm": 0.5973667899463941, "learning_rate": 0.00013938830587642044, "loss": 3.2382943630218506, "step": 3071, "token_acc": 0.27295622356008137 }, { "epoch": 1.8009381413075345, "grad_norm": 0.40716133912826363, "learning_rate": 0.00013938741060299693, "loss": 3.1783199310302734, "step": 3072, "token_acc": 0.2781033493078453 }, { "epoch": 1.8015244796247436, "grad_norm": 0.5260938963136131, "learning_rate": 0.00013938651467777224, "loss": 3.1620545387268066, "step": 3073, "token_acc": 0.28243666097336956 }, { "epoch": 1.8021108179419525, "grad_norm": 0.49606053059243854, "learning_rate": 0.00013938561810075472, "loss": 3.1735994815826416, "step": 3074, "token_acc": 0.2802943378046917 }, { "epoch": 1.8026971562591614, "grad_norm": 0.4231313636004261, "learning_rate": 0.00013938472087195283, "loss": 3.1567726135253906, "step": 3075, "token_acc": 0.28127438886231465 }, { "epoch": 1.8032834945763705, "grad_norm": 0.5405233238987444, "learning_rate": 0.00013938382299137495, "loss": 3.158040761947632, "step": 3076, "token_acc": 0.2812863280584251 }, { "epoch": 1.8038698328935796, "grad_norm": 0.5304781267798307, "learning_rate": 0.00013938292445902958, "loss": 3.213930606842041, "step": 3077, "token_acc": 0.2746973863267896 }, { "epoch": 1.8044561712107887, "grad_norm": 0.44431819676251194, "learning_rate": 0.00013938202527492513, "loss": 3.1612045764923096, "step": 3078, "token_acc": 0.2809203090880267 }, { "epoch": 1.8050425095279976, "grad_norm": 0.4717217570765823, "learning_rate": 0.00013938112543907005, "loss": 3.175443172454834, "step": 3079, "token_acc": 0.2806741897838353 }, { "epoch": 1.8056288478452067, "grad_norm": 0.46229025408759195, "learning_rate": 0.00013938022495147274, "loss": 3.2002222537994385, "step": 3080, "token_acc": 0.27617259006430717 }, { "epoch": 1.8062151861624156, "grad_norm": 0.5083890099407025, "learning_rate": 0.00013937932381214175, "loss": 3.1847095489501953, "step": 3081, "token_acc": 0.2784684602430661 }, { "epoch": 1.8068015244796247, "grad_norm": 0.5533686622046305, "learning_rate": 0.0001393784220210855, "loss": 3.1746768951416016, "step": 3082, "token_acc": 0.27897653152164087 }, { "epoch": 1.8073878627968338, "grad_norm": 0.4142104940355026, "learning_rate": 0.00013937751957831247, "loss": 3.173844814300537, "step": 3083, "token_acc": 0.2810900214749703 }, { "epoch": 1.807974201114043, "grad_norm": 0.4978544453029577, "learning_rate": 0.00013937661648383114, "loss": 3.1158652305603027, "step": 3084, "token_acc": 0.2877712113934888 }, { "epoch": 1.8085605394312518, "grad_norm": 0.404520868960592, "learning_rate": 0.00013937571273764995, "loss": 3.187324047088623, "step": 3085, "token_acc": 0.2792948247078464 }, { "epoch": 1.8091468777484607, "grad_norm": 0.522672732730257, "learning_rate": 0.00013937480833977744, "loss": 3.159419298171997, "step": 3086, "token_acc": 0.2824156212272322 }, { "epoch": 1.8097332160656698, "grad_norm": 0.415376939666003, "learning_rate": 0.00013937390329022206, "loss": 3.1354565620422363, "step": 3087, "token_acc": 0.28479857895070737 }, { "epoch": 1.810319554382879, "grad_norm": 0.4642213383858837, "learning_rate": 0.0001393729975889924, "loss": 3.205690383911133, "step": 3088, "token_acc": 0.2772781288465066 }, { "epoch": 1.810905892700088, "grad_norm": 0.49813799232559103, "learning_rate": 0.00013937209123609688, "loss": 3.117408514022827, "step": 3089, "token_acc": 0.28825134026694493 }, { "epoch": 1.811492231017297, "grad_norm": 0.47138060972549517, "learning_rate": 0.00013937118423154403, "loss": 3.1561031341552734, "step": 3090, "token_acc": 0.2823397551123988 }, { "epoch": 1.812078569334506, "grad_norm": 0.44806060526564306, "learning_rate": 0.0001393702765753424, "loss": 3.219968795776367, "step": 3091, "token_acc": 0.27336022516504754 }, { "epoch": 1.812664907651715, "grad_norm": 0.35604252760771127, "learning_rate": 0.00013936936826750048, "loss": 3.159332036972046, "step": 3092, "token_acc": 0.2835389278910419 }, { "epoch": 1.813251245968924, "grad_norm": 0.43141545658571695, "learning_rate": 0.00013936845930802685, "loss": 3.113966464996338, "step": 3093, "token_acc": 0.2870039080453933 }, { "epoch": 1.8138375842861332, "grad_norm": 0.3702649756938618, "learning_rate": 0.00013936754969693, "loss": 3.1587395668029785, "step": 3094, "token_acc": 0.28325766620373 }, { "epoch": 1.8144239226033423, "grad_norm": 0.3448607896054455, "learning_rate": 0.0001393666394342185, "loss": 3.1702499389648438, "step": 3095, "token_acc": 0.2804072404243649 }, { "epoch": 1.8150102609205512, "grad_norm": 0.3900493020572125, "learning_rate": 0.0001393657285199009, "loss": 3.1721315383911133, "step": 3096, "token_acc": 0.28116943302154135 }, { "epoch": 1.81559659923776, "grad_norm": 0.3957939271157844, "learning_rate": 0.00013936481695398572, "loss": 3.139617919921875, "step": 3097, "token_acc": 0.2866223370152589 }, { "epoch": 1.8161829375549692, "grad_norm": 0.4577551980560736, "learning_rate": 0.00013936390473648157, "loss": 3.1384427547454834, "step": 3098, "token_acc": 0.28515393626683166 }, { "epoch": 1.8167692758721783, "grad_norm": 0.4177773953964753, "learning_rate": 0.00013936299186739702, "loss": 3.2024097442626953, "step": 3099, "token_acc": 0.2757726054828157 }, { "epoch": 1.8173556141893874, "grad_norm": 0.5427176212870789, "learning_rate": 0.00013936207834674063, "loss": 3.1971421241760254, "step": 3100, "token_acc": 0.2774537474175481 }, { "epoch": 1.8179419525065963, "grad_norm": 0.5671069807234658, "learning_rate": 0.000139361164174521, "loss": 3.2120771408081055, "step": 3101, "token_acc": 0.274823438187648 }, { "epoch": 1.8185282908238052, "grad_norm": 0.47576236019797347, "learning_rate": 0.00013936024935074667, "loss": 3.1772897243499756, "step": 3102, "token_acc": 0.2794671620745376 }, { "epoch": 1.8191146291410143, "grad_norm": 0.38272089477308774, "learning_rate": 0.00013935933387542625, "loss": 3.1722116470336914, "step": 3103, "token_acc": 0.28010020602613195 }, { "epoch": 1.8197009674582234, "grad_norm": 0.38335341776311815, "learning_rate": 0.00013935841774856837, "loss": 3.166057586669922, "step": 3104, "token_acc": 0.28029580340741556 }, { "epoch": 1.8202873057754325, "grad_norm": 0.3951982151991867, "learning_rate": 0.0001393575009701816, "loss": 3.131580352783203, "step": 3105, "token_acc": 0.2866240309235814 }, { "epoch": 1.8208736440926414, "grad_norm": 0.37854925932490785, "learning_rate": 0.0001393565835402746, "loss": 3.1695775985717773, "step": 3106, "token_acc": 0.2811941346726824 }, { "epoch": 1.8214599824098505, "grad_norm": 0.380870748421684, "learning_rate": 0.00013935566545885593, "loss": 3.1792449951171875, "step": 3107, "token_acc": 0.2798451775068595 }, { "epoch": 1.8220463207270594, "grad_norm": 0.37751737685559617, "learning_rate": 0.00013935474672593424, "loss": 3.1232857704162598, "step": 3108, "token_acc": 0.2874499431770081 }, { "epoch": 1.8226326590442685, "grad_norm": 0.4675124369873627, "learning_rate": 0.00013935382734151818, "loss": 3.152430534362793, "step": 3109, "token_acc": 0.2824377167182058 }, { "epoch": 1.8232189973614776, "grad_norm": 0.47256996763411707, "learning_rate": 0.00013935290730561636, "loss": 3.1945910453796387, "step": 3110, "token_acc": 0.2794085098959242 }, { "epoch": 1.8238053356786867, "grad_norm": 0.5349976757661753, "learning_rate": 0.00013935198661823743, "loss": 3.1444740295410156, "step": 3111, "token_acc": 0.28274354771317456 }, { "epoch": 1.8243916739958956, "grad_norm": 0.5885466420989289, "learning_rate": 0.00013935106527939004, "loss": 3.1792304515838623, "step": 3112, "token_acc": 0.2787217847628339 }, { "epoch": 1.8249780123131045, "grad_norm": 0.394028174009091, "learning_rate": 0.00013935014328908283, "loss": 3.175494432449341, "step": 3113, "token_acc": 0.28276836837478725 }, { "epoch": 1.8255643506303136, "grad_norm": 0.42584787829155984, "learning_rate": 0.00013934922064732448, "loss": 3.1496691703796387, "step": 3114, "token_acc": 0.28202434044452057 }, { "epoch": 1.8261506889475227, "grad_norm": 0.5159808963665635, "learning_rate": 0.00013934829735412366, "loss": 3.166433334350586, "step": 3115, "token_acc": 0.28145037902030823 }, { "epoch": 1.8267370272647319, "grad_norm": 0.5305346169379062, "learning_rate": 0.00013934737340948905, "loss": 3.1666316986083984, "step": 3116, "token_acc": 0.28240817254059414 }, { "epoch": 1.8273233655819408, "grad_norm": 0.44382065124134346, "learning_rate": 0.00013934644881342928, "loss": 3.1748905181884766, "step": 3117, "token_acc": 0.2804614817851157 }, { "epoch": 1.8279097038991499, "grad_norm": 0.3736691576577399, "learning_rate": 0.00013934552356595307, "loss": 3.132499933242798, "step": 3118, "token_acc": 0.2841951227187735 }, { "epoch": 1.8284960422163588, "grad_norm": 0.41432946873712084, "learning_rate": 0.00013934459766706914, "loss": 3.117541790008545, "step": 3119, "token_acc": 0.286692869174621 }, { "epoch": 1.8290823805335679, "grad_norm": 0.3962454139031622, "learning_rate": 0.0001393436711167861, "loss": 3.1921987533569336, "step": 3120, "token_acc": 0.27692729307339203 }, { "epoch": 1.829668718850777, "grad_norm": 0.49240817896377026, "learning_rate": 0.00013934274391511276, "loss": 3.1450676918029785, "step": 3121, "token_acc": 0.2848091244477988 }, { "epoch": 1.830255057167986, "grad_norm": 0.4981208730177976, "learning_rate": 0.0001393418160620578, "loss": 3.2035679817199707, "step": 3122, "token_acc": 0.2743361426813617 }, { "epoch": 1.830841395485195, "grad_norm": 0.4730297181081027, "learning_rate": 0.00013934088755762988, "loss": 3.1686644554138184, "step": 3123, "token_acc": 0.28061125882125565 }, { "epoch": 1.8314277338024039, "grad_norm": 0.49353159956789816, "learning_rate": 0.00013933995840183778, "loss": 3.096209764480591, "step": 3124, "token_acc": 0.28976383232791636 }, { "epoch": 1.832014072119613, "grad_norm": 0.5098706284039898, "learning_rate": 0.0001393390285946902, "loss": 3.1716156005859375, "step": 3125, "token_acc": 0.2803987204855593 }, { "epoch": 1.832600410436822, "grad_norm": 0.3983377277263639, "learning_rate": 0.0001393380981361959, "loss": 3.181307792663574, "step": 3126, "token_acc": 0.27931707624250246 }, { "epoch": 1.8331867487540312, "grad_norm": 0.4685289541200483, "learning_rate": 0.00013933716702636354, "loss": 3.1936874389648438, "step": 3127, "token_acc": 0.27790426311713406 }, { "epoch": 1.83377308707124, "grad_norm": 0.4850728489300537, "learning_rate": 0.00013933623526520198, "loss": 3.216696262359619, "step": 3128, "token_acc": 0.27244670394131887 }, { "epoch": 1.834359425388449, "grad_norm": 0.4442784435578466, "learning_rate": 0.0001393353028527199, "loss": 3.152035713195801, "step": 3129, "token_acc": 0.28279672085886315 }, { "epoch": 1.834945763705658, "grad_norm": 0.3398012479935769, "learning_rate": 0.00013933436978892611, "loss": 3.134960651397705, "step": 3130, "token_acc": 0.2829116580117453 }, { "epoch": 1.8355321020228672, "grad_norm": 0.4807958262620541, "learning_rate": 0.00013933343607382934, "loss": 3.1734981536865234, "step": 3131, "token_acc": 0.2793506831734474 }, { "epoch": 1.8361184403400763, "grad_norm": 0.479538214670079, "learning_rate": 0.00013933250170743836, "loss": 3.1691830158233643, "step": 3132, "token_acc": 0.28349071587397845 }, { "epoch": 1.8367047786572852, "grad_norm": 0.41577059028038776, "learning_rate": 0.00013933156668976193, "loss": 3.190570831298828, "step": 3133, "token_acc": 0.2777780676126878 }, { "epoch": 1.8372911169744943, "grad_norm": 0.42132792114377215, "learning_rate": 0.00013933063102080888, "loss": 3.1849000453948975, "step": 3134, "token_acc": 0.2796674071205853 }, { "epoch": 1.8378774552917032, "grad_norm": 0.4553283949667844, "learning_rate": 0.00013932969470058796, "loss": 3.182206153869629, "step": 3135, "token_acc": 0.2787902522103891 }, { "epoch": 1.8384637936089123, "grad_norm": 0.5656155033583683, "learning_rate": 0.00013932875772910798, "loss": 3.164655923843384, "step": 3136, "token_acc": 0.28017347693825534 }, { "epoch": 1.8390501319261214, "grad_norm": 0.5555682484932005, "learning_rate": 0.00013932782010637776, "loss": 3.1872076988220215, "step": 3137, "token_acc": 0.2785982675461254 }, { "epoch": 1.8396364702433305, "grad_norm": 0.41618370745012034, "learning_rate": 0.0001393268818324061, "loss": 3.156522274017334, "step": 3138, "token_acc": 0.2824550642700944 }, { "epoch": 1.8402228085605394, "grad_norm": 0.5558849173494799, "learning_rate": 0.00013932594290720177, "loss": 3.1615028381347656, "step": 3139, "token_acc": 0.2817987218868425 }, { "epoch": 1.8408091468777483, "grad_norm": 0.4390032545505644, "learning_rate": 0.00013932500333077363, "loss": 3.1430325508117676, "step": 3140, "token_acc": 0.2852734937745245 }, { "epoch": 1.8413954851949574, "grad_norm": 0.4319138272206243, "learning_rate": 0.00013932406310313052, "loss": 3.185291290283203, "step": 3141, "token_acc": 0.27892822863013883 }, { "epoch": 1.8419818235121665, "grad_norm": 0.4538933071360432, "learning_rate": 0.00013932312222428127, "loss": 3.1958789825439453, "step": 3142, "token_acc": 0.27818737141865674 }, { "epoch": 1.8425681618293757, "grad_norm": 0.3800487750062563, "learning_rate": 0.00013932218069423466, "loss": 3.1312551498413086, "step": 3143, "token_acc": 0.2856742920095911 }, { "epoch": 1.8431545001465846, "grad_norm": 0.43832071318606297, "learning_rate": 0.00013932123851299958, "loss": 3.1768927574157715, "step": 3144, "token_acc": 0.27985662361315905 }, { "epoch": 1.8437408384637937, "grad_norm": 0.41746990910120824, "learning_rate": 0.0001393202956805849, "loss": 3.174783229827881, "step": 3145, "token_acc": 0.27985528934086384 }, { "epoch": 1.8443271767810026, "grad_norm": 0.4379011159990005, "learning_rate": 0.00013931935219699943, "loss": 3.1576087474823, "step": 3146, "token_acc": 0.2816806261582582 }, { "epoch": 1.8449135150982117, "grad_norm": 0.4210637540099297, "learning_rate": 0.00013931840806225208, "loss": 3.1915342807769775, "step": 3147, "token_acc": 0.2771453010600491 }, { "epoch": 1.8454998534154208, "grad_norm": 0.49838357316306653, "learning_rate": 0.00013931746327635166, "loss": 3.147874355316162, "step": 3148, "token_acc": 0.28525543381202006 }, { "epoch": 1.8460861917326299, "grad_norm": 0.4679593221950554, "learning_rate": 0.0001393165178393071, "loss": 3.163898229598999, "step": 3149, "token_acc": 0.2820557675041997 }, { "epoch": 1.8466725300498388, "grad_norm": 0.47297095236215914, "learning_rate": 0.00013931557175112728, "loss": 3.1384949684143066, "step": 3150, "token_acc": 0.2856092373299035 }, { "epoch": 1.8472588683670477, "grad_norm": 0.5010868354231831, "learning_rate": 0.00013931462501182103, "loss": 3.1679749488830566, "step": 3151, "token_acc": 0.2815936718739666 }, { "epoch": 1.8478452066842568, "grad_norm": 0.4474457647663326, "learning_rate": 0.0001393136776213973, "loss": 3.1668801307678223, "step": 3152, "token_acc": 0.2814158541305466 }, { "epoch": 1.848431545001466, "grad_norm": 0.4426601992627646, "learning_rate": 0.00013931272957986497, "loss": 3.1903252601623535, "step": 3153, "token_acc": 0.27782354228054873 }, { "epoch": 1.849017883318675, "grad_norm": 0.4661470586098385, "learning_rate": 0.00013931178088723292, "loss": 3.1708498001098633, "step": 3154, "token_acc": 0.27983128653205636 }, { "epoch": 1.849604221635884, "grad_norm": 0.3996256301843792, "learning_rate": 0.00013931083154351012, "loss": 3.1814990043640137, "step": 3155, "token_acc": 0.278026604327858 }, { "epoch": 1.8501905599530928, "grad_norm": 0.37533383920605207, "learning_rate": 0.00013930988154870543, "loss": 3.16428804397583, "step": 3156, "token_acc": 0.27941226065368363 }, { "epoch": 1.850776898270302, "grad_norm": 0.4246557418440358, "learning_rate": 0.00013930893090282782, "loss": 3.176447868347168, "step": 3157, "token_acc": 0.28066625680614893 }, { "epoch": 1.851363236587511, "grad_norm": 0.408151642288997, "learning_rate": 0.00013930797960588618, "loss": 3.1404223442077637, "step": 3158, "token_acc": 0.28213637167146094 }, { "epoch": 1.8519495749047201, "grad_norm": 0.4028073887243625, "learning_rate": 0.0001393070276578895, "loss": 3.1863162517547607, "step": 3159, "token_acc": 0.2778931168401917 }, { "epoch": 1.852535913221929, "grad_norm": 0.3869435824996025, "learning_rate": 0.00013930607505884665, "loss": 3.1493799686431885, "step": 3160, "token_acc": 0.28486303708517363 }, { "epoch": 1.8531222515391381, "grad_norm": 0.3994008424027253, "learning_rate": 0.00013930512180876663, "loss": 3.195265293121338, "step": 3161, "token_acc": 0.27772275372483096 }, { "epoch": 1.853708589856347, "grad_norm": 0.438679374910476, "learning_rate": 0.00013930416790765838, "loss": 3.1393446922302246, "step": 3162, "token_acc": 0.28480518238245045 }, { "epoch": 1.8542949281735561, "grad_norm": 0.35642283953295867, "learning_rate": 0.00013930321335553085, "loss": 3.1942851543426514, "step": 3163, "token_acc": 0.2790757272729675 }, { "epoch": 1.8548812664907652, "grad_norm": 0.36933449704949317, "learning_rate": 0.00013930225815239305, "loss": 3.217050075531006, "step": 3164, "token_acc": 0.2745982572471888 }, { "epoch": 1.8554676048079743, "grad_norm": 0.38915299533280945, "learning_rate": 0.0001393013022982539, "loss": 3.1804919242858887, "step": 3165, "token_acc": 0.278897950119047 }, { "epoch": 1.8560539431251832, "grad_norm": 0.4343528363117653, "learning_rate": 0.0001393003457931224, "loss": 3.136134624481201, "step": 3166, "token_acc": 0.2850956709681521 }, { "epoch": 1.8566402814423921, "grad_norm": 0.5016250329219616, "learning_rate": 0.00013929938863700754, "loss": 3.1551413536071777, "step": 3167, "token_acc": 0.2822694998556 }, { "epoch": 1.8572266197596012, "grad_norm": 0.4248031394944281, "learning_rate": 0.00013929843082991828, "loss": 3.1437995433807373, "step": 3168, "token_acc": 0.2851472519429076 }, { "epoch": 1.8578129580768104, "grad_norm": 0.4040668038723804, "learning_rate": 0.00013929747237186366, "loss": 3.1509008407592773, "step": 3169, "token_acc": 0.2834553724402382 }, { "epoch": 1.8583992963940195, "grad_norm": 0.31637779847916714, "learning_rate": 0.00013929651326285267, "loss": 3.1650071144104004, "step": 3170, "token_acc": 0.2813639007625848 }, { "epoch": 1.8589856347112284, "grad_norm": 0.44539377667623115, "learning_rate": 0.00013929555350289432, "loss": 3.1280226707458496, "step": 3171, "token_acc": 0.28717283980252367 }, { "epoch": 1.8595719730284375, "grad_norm": 0.4582479016867659, "learning_rate": 0.00013929459309199762, "loss": 3.1732263565063477, "step": 3172, "token_acc": 0.2801640531269985 }, { "epoch": 1.8601583113456464, "grad_norm": 0.45264600986543596, "learning_rate": 0.0001392936320301716, "loss": 3.16690993309021, "step": 3173, "token_acc": 0.2807256500217292 }, { "epoch": 1.8607446496628555, "grad_norm": 0.46004897631861874, "learning_rate": 0.00013929267031742527, "loss": 3.169872760772705, "step": 3174, "token_acc": 0.28023176388987786 }, { "epoch": 1.8613309879800646, "grad_norm": 0.38790402064626045, "learning_rate": 0.00013929170795376768, "loss": 3.1654186248779297, "step": 3175, "token_acc": 0.280828693661595 }, { "epoch": 1.8619173262972737, "grad_norm": 0.47520985549150313, "learning_rate": 0.00013929074493920787, "loss": 3.1156277656555176, "step": 3176, "token_acc": 0.2891200693541396 }, { "epoch": 1.8625036646144826, "grad_norm": 0.5006846008239417, "learning_rate": 0.00013928978127375488, "loss": 3.197873592376709, "step": 3177, "token_acc": 0.2771237267979939 }, { "epoch": 1.8630900029316915, "grad_norm": 0.43510501661829165, "learning_rate": 0.00013928881695741773, "loss": 3.1668832302093506, "step": 3178, "token_acc": 0.2804988527522332 }, { "epoch": 1.8636763412489006, "grad_norm": 0.42840004165706247, "learning_rate": 0.00013928785199020556, "loss": 3.2006068229675293, "step": 3179, "token_acc": 0.2771065186972507 }, { "epoch": 1.8642626795661097, "grad_norm": 0.5227774874469465, "learning_rate": 0.00013928688637212736, "loss": 3.147080659866333, "step": 3180, "token_acc": 0.2832838859282804 }, { "epoch": 1.8648490178833188, "grad_norm": 0.6243791859283473, "learning_rate": 0.00013928592010319224, "loss": 3.1829938888549805, "step": 3181, "token_acc": 0.2796941193741873 }, { "epoch": 1.8654353562005277, "grad_norm": 0.5694853019269515, "learning_rate": 0.00013928495318340925, "loss": 3.188652276992798, "step": 3182, "token_acc": 0.2783027453359624 }, { "epoch": 1.8660216945177366, "grad_norm": 0.47143333055759234, "learning_rate": 0.0001392839856127875, "loss": 3.1679487228393555, "step": 3183, "token_acc": 0.27880433957201656 }, { "epoch": 1.8666080328349457, "grad_norm": 0.4723448652573872, "learning_rate": 0.0001392830173913361, "loss": 3.146944999694824, "step": 3184, "token_acc": 0.28359464466812195 }, { "epoch": 1.8671943711521548, "grad_norm": 0.4800870738967834, "learning_rate": 0.00013928204851906408, "loss": 3.157043933868408, "step": 3185, "token_acc": 0.28231543517269936 }, { "epoch": 1.867780709469364, "grad_norm": 0.5061764019688572, "learning_rate": 0.00013928107899598054, "loss": 3.1980795860290527, "step": 3186, "token_acc": 0.27643952278045597 }, { "epoch": 1.8683670477865728, "grad_norm": 0.5514986383159833, "learning_rate": 0.00013928010882209468, "loss": 3.191307783126831, "step": 3187, "token_acc": 0.2783021634842221 }, { "epoch": 1.868953386103782, "grad_norm": 0.43693590323965487, "learning_rate": 0.00013927913799741552, "loss": 3.135305881500244, "step": 3188, "token_acc": 0.28483880611104995 }, { "epoch": 1.8695397244209908, "grad_norm": 0.4443579387867809, "learning_rate": 0.0001392781665219522, "loss": 3.1411781311035156, "step": 3189, "token_acc": 0.28354050072090575 }, { "epoch": 1.8701260627382, "grad_norm": 0.42033430316866605, "learning_rate": 0.00013927719439571387, "loss": 3.1558656692504883, "step": 3190, "token_acc": 0.2805567791384019 }, { "epoch": 1.870712401055409, "grad_norm": 0.4561818756032974, "learning_rate": 0.00013927622161870966, "loss": 3.164008140563965, "step": 3191, "token_acc": 0.2834006810683134 }, { "epoch": 1.8712987393726181, "grad_norm": 0.40538453492410115, "learning_rate": 0.0001392752481909487, "loss": 3.1403615474700928, "step": 3192, "token_acc": 0.2836600090423127 }, { "epoch": 1.871885077689827, "grad_norm": 0.37927015827380234, "learning_rate": 0.00013927427411244013, "loss": 3.1708359718322754, "step": 3193, "token_acc": 0.27973665836450984 }, { "epoch": 1.872471416007036, "grad_norm": 0.396055522820391, "learning_rate": 0.0001392732993831931, "loss": 3.149012565612793, "step": 3194, "token_acc": 0.2835968616057532 }, { "epoch": 1.873057754324245, "grad_norm": 0.3699032754830193, "learning_rate": 0.00013927232400321677, "loss": 3.138062000274658, "step": 3195, "token_acc": 0.28395361655551465 }, { "epoch": 1.8736440926414542, "grad_norm": 0.4216996926870833, "learning_rate": 0.0001392713479725203, "loss": 3.140707015991211, "step": 3196, "token_acc": 0.2831224327204627 }, { "epoch": 1.8742304309586633, "grad_norm": 0.47806893854204235, "learning_rate": 0.00013927037129111289, "loss": 3.167876720428467, "step": 3197, "token_acc": 0.28166361777682125 }, { "epoch": 1.8748167692758722, "grad_norm": 0.40773732353368186, "learning_rate": 0.00013926939395900363, "loss": 3.168194532394409, "step": 3198, "token_acc": 0.2792231536664702 }, { "epoch": 1.875403107593081, "grad_norm": 0.49929190999179485, "learning_rate": 0.0001392684159762018, "loss": 3.137099266052246, "step": 3199, "token_acc": 0.28466421209601855 }, { "epoch": 1.8759894459102902, "grad_norm": 0.3939360740493163, "learning_rate": 0.0001392674373427165, "loss": 3.1563568115234375, "step": 3200, "token_acc": 0.2837894725967784 }, { "epoch": 1.8765757842274993, "grad_norm": 0.44235563129322336, "learning_rate": 0.000139266458058557, "loss": 3.1277499198913574, "step": 3201, "token_acc": 0.28550824086901355 }, { "epoch": 1.8771621225447084, "grad_norm": 0.3699179531293641, "learning_rate": 0.00013926547812373246, "loss": 3.0988478660583496, "step": 3202, "token_acc": 0.28933124035833174 }, { "epoch": 1.8777484608619175, "grad_norm": 0.38433421388597616, "learning_rate": 0.00013926449753825208, "loss": 3.174110174179077, "step": 3203, "token_acc": 0.27928009149545613 }, { "epoch": 1.8783347991791264, "grad_norm": 0.35849157503337414, "learning_rate": 0.0001392635163021251, "loss": 3.1657798290252686, "step": 3204, "token_acc": 0.28283916928462777 }, { "epoch": 1.8789211374963353, "grad_norm": 0.45925139587220076, "learning_rate": 0.0001392625344153607, "loss": 3.170564651489258, "step": 3205, "token_acc": 0.2785159679870972 }, { "epoch": 1.8795074758135444, "grad_norm": 0.3627784208088287, "learning_rate": 0.0001392615518779681, "loss": 3.1906752586364746, "step": 3206, "token_acc": 0.2794275919799099 }, { "epoch": 1.8800938141307535, "grad_norm": 0.38103563519565514, "learning_rate": 0.00013926056868995658, "loss": 3.133211135864258, "step": 3207, "token_acc": 0.2842498812896897 }, { "epoch": 1.8806801524479626, "grad_norm": 0.36515337250559926, "learning_rate": 0.00013925958485133536, "loss": 3.157428503036499, "step": 3208, "token_acc": 0.2828138787344006 }, { "epoch": 1.8812664907651715, "grad_norm": 0.3807589227799556, "learning_rate": 0.00013925860036211366, "loss": 3.154573917388916, "step": 3209, "token_acc": 0.28180255499719653 }, { "epoch": 1.8818528290823804, "grad_norm": 0.4306466813557704, "learning_rate": 0.0001392576152223007, "loss": 3.1789722442626953, "step": 3210, "token_acc": 0.2789900445842593 }, { "epoch": 1.8824391673995895, "grad_norm": 0.39154732834637207, "learning_rate": 0.0001392566294319058, "loss": 3.1569294929504395, "step": 3211, "token_acc": 0.28095053160368794 }, { "epoch": 1.8830255057167986, "grad_norm": 0.421677878031165, "learning_rate": 0.0001392556429909382, "loss": 3.17936372756958, "step": 3212, "token_acc": 0.278695482908155 }, { "epoch": 1.8836118440340077, "grad_norm": 0.4372732286099418, "learning_rate": 0.00013925465589940714, "loss": 3.176467180252075, "step": 3213, "token_acc": 0.27942310618182964 }, { "epoch": 1.8841981823512166, "grad_norm": 0.4140991247351585, "learning_rate": 0.00013925366815732194, "loss": 3.1532692909240723, "step": 3214, "token_acc": 0.28295799433803465 }, { "epoch": 1.8847845206684257, "grad_norm": 0.4508223053126904, "learning_rate": 0.0001392526797646918, "loss": 3.2087998390197754, "step": 3215, "token_acc": 0.2774270002036773 }, { "epoch": 1.8853708589856346, "grad_norm": 0.5012089860045833, "learning_rate": 0.00013925169072152608, "loss": 3.1582112312316895, "step": 3216, "token_acc": 0.27963699829031996 }, { "epoch": 1.8859571973028437, "grad_norm": 0.46565720167085073, "learning_rate": 0.00013925070102783406, "loss": 3.1830849647521973, "step": 3217, "token_acc": 0.2777029480618817 }, { "epoch": 1.8865435356200528, "grad_norm": 0.4211693954561409, "learning_rate": 0.000139249710683625, "loss": 3.1555519104003906, "step": 3218, "token_acc": 0.28383141480056123 }, { "epoch": 1.887129873937262, "grad_norm": 0.436791452156444, "learning_rate": 0.0001392487196889082, "loss": 3.1979591846466064, "step": 3219, "token_acc": 0.27799840939333026 }, { "epoch": 1.8877162122544708, "grad_norm": 0.47964134596519054, "learning_rate": 0.00013924772804369302, "loss": 3.169191598892212, "step": 3220, "token_acc": 0.2798651810937302 }, { "epoch": 1.8883025505716797, "grad_norm": 0.5242219429844494, "learning_rate": 0.00013924673574798875, "loss": 3.178762912750244, "step": 3221, "token_acc": 0.28010421790048784 }, { "epoch": 1.8888888888888888, "grad_norm": 0.5259833030520964, "learning_rate": 0.0001392457428018047, "loss": 3.1936635971069336, "step": 3222, "token_acc": 0.277296928048222 }, { "epoch": 1.889475227206098, "grad_norm": 0.5260827468692263, "learning_rate": 0.00013924474920515021, "loss": 3.190840244293213, "step": 3223, "token_acc": 0.2771848775567145 }, { "epoch": 1.890061565523307, "grad_norm": 0.47998355469632054, "learning_rate": 0.00013924375495803464, "loss": 3.1654210090637207, "step": 3224, "token_acc": 0.2803497821683053 }, { "epoch": 1.890647903840516, "grad_norm": 0.42930586885380156, "learning_rate": 0.00013924276006046726, "loss": 3.163123607635498, "step": 3225, "token_acc": 0.28264162684062616 }, { "epoch": 1.8912342421577248, "grad_norm": 0.5900401610410939, "learning_rate": 0.00013924176451245745, "loss": 3.209183692932129, "step": 3226, "token_acc": 0.27388793164972525 }, { "epoch": 1.891820580474934, "grad_norm": 0.3710060984014983, "learning_rate": 0.0001392407683140146, "loss": 3.2026174068450928, "step": 3227, "token_acc": 0.2759523853002789 }, { "epoch": 1.892406918792143, "grad_norm": 0.45531589579279325, "learning_rate": 0.00013923977146514802, "loss": 3.1405768394470215, "step": 3228, "token_acc": 0.28363631721391 }, { "epoch": 1.8929932571093522, "grad_norm": 0.3929252099195596, "learning_rate": 0.00013923877396586706, "loss": 3.132526397705078, "step": 3229, "token_acc": 0.28557895270549744 }, { "epoch": 1.8935795954265613, "grad_norm": 0.3819646646849763, "learning_rate": 0.00013923777581618114, "loss": 3.0966849327087402, "step": 3230, "token_acc": 0.28866570635936833 }, { "epoch": 1.8941659337437702, "grad_norm": 0.41937028930101194, "learning_rate": 0.00013923677701609962, "loss": 3.1625704765319824, "step": 3231, "token_acc": 0.2815315972120965 }, { "epoch": 1.894752272060979, "grad_norm": 0.48558169730096007, "learning_rate": 0.00013923577756563187, "loss": 3.2017264366149902, "step": 3232, "token_acc": 0.27718883013205114 }, { "epoch": 1.8953386103781882, "grad_norm": 0.4044825468728186, "learning_rate": 0.0001392347774647873, "loss": 3.195657730102539, "step": 3233, "token_acc": 0.27687345480231856 }, { "epoch": 1.8959249486953973, "grad_norm": 0.3636799516545554, "learning_rate": 0.00013923377671357527, "loss": 3.1659018993377686, "step": 3234, "token_acc": 0.2814515510589405 }, { "epoch": 1.8965112870126064, "grad_norm": 0.43506610084262193, "learning_rate": 0.00013923277531200525, "loss": 3.1271121501922607, "step": 3235, "token_acc": 0.287426762744966 }, { "epoch": 1.8970976253298153, "grad_norm": 0.39835074128630904, "learning_rate": 0.00013923177326008655, "loss": 3.164862632751465, "step": 3236, "token_acc": 0.27948463059870615 }, { "epoch": 1.8976839636470242, "grad_norm": 0.398647678967304, "learning_rate": 0.00013923077055782862, "loss": 3.1710309982299805, "step": 3237, "token_acc": 0.27940895912209096 }, { "epoch": 1.8982703019642333, "grad_norm": 0.43493695556800605, "learning_rate": 0.00013922976720524092, "loss": 3.1558518409729004, "step": 3238, "token_acc": 0.2812582835195153 }, { "epoch": 1.8988566402814424, "grad_norm": 0.47465953459841326, "learning_rate": 0.00013922876320233285, "loss": 3.1219582557678223, "step": 3239, "token_acc": 0.28650379629151385 }, { "epoch": 1.8994429785986515, "grad_norm": 0.39889173750064055, "learning_rate": 0.00013922775854911384, "loss": 3.159717559814453, "step": 3240, "token_acc": 0.28235672306922555 }, { "epoch": 1.9000293169158604, "grad_norm": 0.3882967828517272, "learning_rate": 0.00013922675324559328, "loss": 3.1364872455596924, "step": 3241, "token_acc": 0.28430565659032864 }, { "epoch": 1.9006156552330695, "grad_norm": 0.386053519364716, "learning_rate": 0.0001392257472917807, "loss": 3.1468558311462402, "step": 3242, "token_acc": 0.28261187384537684 }, { "epoch": 1.9012019935502784, "grad_norm": 0.3535354835029746, "learning_rate": 0.0001392247406876855, "loss": 3.1676108837127686, "step": 3243, "token_acc": 0.28007552837906935 }, { "epoch": 1.9017883318674875, "grad_norm": 0.3647392294318364, "learning_rate": 0.00013922373343331715, "loss": 3.1686136722564697, "step": 3244, "token_acc": 0.2785155376704673 }, { "epoch": 1.9023746701846966, "grad_norm": 0.3295374863320513, "learning_rate": 0.00013922272552868508, "loss": 3.190880298614502, "step": 3245, "token_acc": 0.2781064935468662 }, { "epoch": 1.9029610085019057, "grad_norm": 0.3716464974255558, "learning_rate": 0.0001392217169737988, "loss": 3.132190465927124, "step": 3246, "token_acc": 0.28722933698293673 }, { "epoch": 1.9035473468191146, "grad_norm": 0.3723810947186311, "learning_rate": 0.00013922070776866774, "loss": 3.123586654663086, "step": 3247, "token_acc": 0.28549912584545756 }, { "epoch": 1.9041336851363235, "grad_norm": 0.3314649450858669, "learning_rate": 0.00013921969791330145, "loss": 3.1549324989318848, "step": 3248, "token_acc": 0.2806800601690515 }, { "epoch": 1.9047200234535326, "grad_norm": 0.35860992804761826, "learning_rate": 0.00013921868740770935, "loss": 3.1849069595336914, "step": 3249, "token_acc": 0.27700294177448453 }, { "epoch": 1.9053063617707418, "grad_norm": 0.4300130726346776, "learning_rate": 0.00013921767625190096, "loss": 3.1899471282958984, "step": 3250, "token_acc": 0.27732966468521814 }, { "epoch": 1.9058927000879509, "grad_norm": 0.4630545263141743, "learning_rate": 0.00013921666444588577, "loss": 3.1187996864318848, "step": 3251, "token_acc": 0.2866724923207287 }, { "epoch": 1.9064790384051598, "grad_norm": 0.40185097618652516, "learning_rate": 0.00013921565198967328, "loss": 3.203143835067749, "step": 3252, "token_acc": 0.27640630589064746 }, { "epoch": 1.9070653767223686, "grad_norm": 0.5057966622113312, "learning_rate": 0.00013921463888327303, "loss": 3.180072069168091, "step": 3253, "token_acc": 0.2780814131432819 }, { "epoch": 1.9076517150395778, "grad_norm": 0.6114914422264391, "learning_rate": 0.00013921362512669448, "loss": 3.1662051677703857, "step": 3254, "token_acc": 0.2801447278453474 }, { "epoch": 1.9082380533567869, "grad_norm": 0.4777811847193061, "learning_rate": 0.0001392126107199472, "loss": 3.1516036987304688, "step": 3255, "token_acc": 0.2816334235197886 }, { "epoch": 1.908824391673996, "grad_norm": 0.39579552402232804, "learning_rate": 0.00013921159566304074, "loss": 3.1827802658081055, "step": 3256, "token_acc": 0.27978707658476765 }, { "epoch": 1.909410729991205, "grad_norm": 0.41516264424378074, "learning_rate": 0.00013921057995598457, "loss": 3.157862424850464, "step": 3257, "token_acc": 0.2819306851676474 }, { "epoch": 1.909997068308414, "grad_norm": 0.42452023110846204, "learning_rate": 0.00013920956359878827, "loss": 3.1134355068206787, "step": 3258, "token_acc": 0.2886879107671642 }, { "epoch": 1.9105834066256229, "grad_norm": 0.43351651653653855, "learning_rate": 0.00013920854659146137, "loss": 3.146402359008789, "step": 3259, "token_acc": 0.2827022319617334 }, { "epoch": 1.911169744942832, "grad_norm": 0.5159748991774191, "learning_rate": 0.00013920752893401347, "loss": 3.1085493564605713, "step": 3260, "token_acc": 0.28829555092812637 }, { "epoch": 1.911756083260041, "grad_norm": 0.47485556005404345, "learning_rate": 0.00013920651062645408, "loss": 3.1511948108673096, "step": 3261, "token_acc": 0.2816732024717888 }, { "epoch": 1.9123424215772502, "grad_norm": 0.5364071028164041, "learning_rate": 0.00013920549166879278, "loss": 3.191603660583496, "step": 3262, "token_acc": 0.27772889615460705 }, { "epoch": 1.912928759894459, "grad_norm": 0.47463936261131123, "learning_rate": 0.0001392044720610391, "loss": 3.135042190551758, "step": 3263, "token_acc": 0.28402370029558494 }, { "epoch": 1.913515098211668, "grad_norm": 0.49892655898849975, "learning_rate": 0.00013920345180320272, "loss": 3.132401466369629, "step": 3264, "token_acc": 0.2864922204616688 }, { "epoch": 1.914101436528877, "grad_norm": 0.48148554776460956, "learning_rate": 0.00013920243089529313, "loss": 3.1199803352355957, "step": 3265, "token_acc": 0.28821712727451937 }, { "epoch": 1.9146877748460862, "grad_norm": 0.48642291445733177, "learning_rate": 0.00013920140933731996, "loss": 3.1713573932647705, "step": 3266, "token_acc": 0.27960152593781595 }, { "epoch": 1.9152741131632953, "grad_norm": 0.424112545953526, "learning_rate": 0.0001392003871292928, "loss": 3.1691460609436035, "step": 3267, "token_acc": 0.2814102202562718 }, { "epoch": 1.9158604514805042, "grad_norm": 0.5505198714378431, "learning_rate": 0.00013919936427122127, "loss": 3.163496971130371, "step": 3268, "token_acc": 0.2814821116209333 }, { "epoch": 1.9164467897977133, "grad_norm": 0.5247317034728544, "learning_rate": 0.00013919834076311493, "loss": 3.1559205055236816, "step": 3269, "token_acc": 0.28346764437661687 }, { "epoch": 1.9170331281149222, "grad_norm": 0.40305588343407733, "learning_rate": 0.00013919731660498342, "loss": 3.1450202465057373, "step": 3270, "token_acc": 0.2829689843960355 }, { "epoch": 1.9176194664321313, "grad_norm": 0.43542750350141607, "learning_rate": 0.00013919629179683638, "loss": 3.1738524436950684, "step": 3271, "token_acc": 0.2793582447791656 }, { "epoch": 1.9182058047493404, "grad_norm": 0.41845032690860806, "learning_rate": 0.00013919526633868342, "loss": 3.1482574939727783, "step": 3272, "token_acc": 0.2841056976561606 }, { "epoch": 1.9187921430665495, "grad_norm": 0.42860755232729386, "learning_rate": 0.00013919424023053418, "loss": 3.126512050628662, "step": 3273, "token_acc": 0.28690584313278983 }, { "epoch": 1.9193784813837584, "grad_norm": 0.4244661969338137, "learning_rate": 0.00013919321347239828, "loss": 3.119539499282837, "step": 3274, "token_acc": 0.28811144996880667 }, { "epoch": 1.9199648197009673, "grad_norm": 0.5076369354379808, "learning_rate": 0.0001391921860642854, "loss": 3.123659610748291, "step": 3275, "token_acc": 0.2858278598975952 }, { "epoch": 1.9205511580181764, "grad_norm": 0.41525805077185407, "learning_rate": 0.00013919115800620517, "loss": 3.1809468269348145, "step": 3276, "token_acc": 0.28006693214688755 }, { "epoch": 1.9211374963353856, "grad_norm": 0.4149317804297892, "learning_rate": 0.00013919012929816723, "loss": 3.1191177368164062, "step": 3277, "token_acc": 0.28689471859470705 }, { "epoch": 1.9217238346525947, "grad_norm": 0.5536141576094403, "learning_rate": 0.00013918909994018125, "loss": 3.1598801612854004, "step": 3278, "token_acc": 0.2796153381155799 }, { "epoch": 1.9223101729698036, "grad_norm": 0.4419464666248388, "learning_rate": 0.00013918806993225695, "loss": 3.167031764984131, "step": 3279, "token_acc": 0.2798491915896873 }, { "epoch": 1.9228965112870124, "grad_norm": 0.5302034122116437, "learning_rate": 0.0001391870392744039, "loss": 3.1663784980773926, "step": 3280, "token_acc": 0.2823344383973931 }, { "epoch": 1.9234828496042216, "grad_norm": 0.48053295401526613, "learning_rate": 0.0001391860079666319, "loss": 3.1544718742370605, "step": 3281, "token_acc": 0.2801588885059765 }, { "epoch": 1.9240691879214307, "grad_norm": 0.4241278880950176, "learning_rate": 0.0001391849760089506, "loss": 3.1778786182403564, "step": 3282, "token_acc": 0.2807347838950001 }, { "epoch": 1.9246555262386398, "grad_norm": 0.33828674699365086, "learning_rate": 0.00013918394340136964, "loss": 3.138490676879883, "step": 3283, "token_acc": 0.2830146094326697 }, { "epoch": 1.9252418645558487, "grad_norm": 0.39755368308773864, "learning_rate": 0.00013918291014389876, "loss": 3.175848960876465, "step": 3284, "token_acc": 0.2785844516999787 }, { "epoch": 1.9258282028730578, "grad_norm": 0.38604145871464735, "learning_rate": 0.00013918187623654767, "loss": 3.171117067337036, "step": 3285, "token_acc": 0.2802097013457254 }, { "epoch": 1.9264145411902667, "grad_norm": 0.4116837400507202, "learning_rate": 0.0001391808416793261, "loss": 3.1548666954040527, "step": 3286, "token_acc": 0.28296070929997125 }, { "epoch": 1.9270008795074758, "grad_norm": 0.47925215408818306, "learning_rate": 0.00013917980647224369, "loss": 3.126538038253784, "step": 3287, "token_acc": 0.2863094284941763 }, { "epoch": 1.927587217824685, "grad_norm": 0.40561069819793283, "learning_rate": 0.00013917877061531025, "loss": 3.130110502243042, "step": 3288, "token_acc": 0.28508181734137295 }, { "epoch": 1.928173556141894, "grad_norm": 0.40045760768739375, "learning_rate": 0.0001391777341085355, "loss": 3.0992798805236816, "step": 3289, "token_acc": 0.2909193402388192 }, { "epoch": 1.928759894459103, "grad_norm": 0.46216424801239697, "learning_rate": 0.00013917669695192914, "loss": 3.197916030883789, "step": 3290, "token_acc": 0.27465419039869815 }, { "epoch": 1.9293462327763118, "grad_norm": 0.49159086503092136, "learning_rate": 0.0001391756591455009, "loss": 3.1789021492004395, "step": 3291, "token_acc": 0.27898984248338593 }, { "epoch": 1.929932571093521, "grad_norm": 0.416132083820143, "learning_rate": 0.0001391746206892606, "loss": 3.1742429733276367, "step": 3292, "token_acc": 0.2802672264094932 }, { "epoch": 1.93051890941073, "grad_norm": 0.40880889684883, "learning_rate": 0.00013917358158321795, "loss": 3.1671812534332275, "step": 3293, "token_acc": 0.28001754540839974 }, { "epoch": 1.9311052477279391, "grad_norm": 0.42395819801369944, "learning_rate": 0.0001391725418273827, "loss": 3.1937198638916016, "step": 3294, "token_acc": 0.2752581537832047 }, { "epoch": 1.931691586045148, "grad_norm": 0.4761573407644075, "learning_rate": 0.00013917150142176462, "loss": 3.1711363792419434, "step": 3295, "token_acc": 0.2802139722604824 }, { "epoch": 1.9322779243623571, "grad_norm": 0.5038722462137357, "learning_rate": 0.0001391704603663735, "loss": 3.1723551750183105, "step": 3296, "token_acc": 0.2794002681811828 }, { "epoch": 1.932864262679566, "grad_norm": 0.4791020318949933, "learning_rate": 0.0001391694186612191, "loss": 3.1585421562194824, "step": 3297, "token_acc": 0.28360109560955293 }, { "epoch": 1.9334506009967751, "grad_norm": 0.4443220857718283, "learning_rate": 0.00013916837630631126, "loss": 3.156704902648926, "step": 3298, "token_acc": 0.2818789814817265 }, { "epoch": 1.9340369393139842, "grad_norm": 0.39094139863593524, "learning_rate": 0.0001391673333016597, "loss": 3.1366348266601562, "step": 3299, "token_acc": 0.28436502208808995 }, { "epoch": 1.9346232776311933, "grad_norm": 0.4443070477594585, "learning_rate": 0.00013916628964727427, "loss": 3.148031711578369, "step": 3300, "token_acc": 0.2835102051368225 }, { "epoch": 1.9352096159484022, "grad_norm": 0.35307826443732476, "learning_rate": 0.00013916524534316472, "loss": 3.178837776184082, "step": 3301, "token_acc": 0.28024760687899924 }, { "epoch": 1.9357959542656111, "grad_norm": 0.3964903356877723, "learning_rate": 0.00013916420038934094, "loss": 3.147000312805176, "step": 3302, "token_acc": 0.2851044529228142 }, { "epoch": 1.9363822925828202, "grad_norm": 0.3631784184651412, "learning_rate": 0.00013916315478581265, "loss": 3.1657633781433105, "step": 3303, "token_acc": 0.2800417862010086 }, { "epoch": 1.9369686309000294, "grad_norm": 0.43325326832762157, "learning_rate": 0.00013916210853258973, "loss": 3.1562752723693848, "step": 3304, "token_acc": 0.28346237119233086 }, { "epoch": 1.9375549692172385, "grad_norm": 0.4253623584273094, "learning_rate": 0.000139161061629682, "loss": 3.15427827835083, "step": 3305, "token_acc": 0.2819169161396049 }, { "epoch": 1.9381413075344474, "grad_norm": 0.3793038725521701, "learning_rate": 0.00013916001407709928, "loss": 3.151179075241089, "step": 3306, "token_acc": 0.28333660898079094 }, { "epoch": 1.9387276458516562, "grad_norm": 0.3051401964319418, "learning_rate": 0.00013915896587485147, "loss": 3.152759552001953, "step": 3307, "token_acc": 0.28244004624835156 }, { "epoch": 1.9393139841688654, "grad_norm": 0.3482694610961628, "learning_rate": 0.00013915791702294832, "loss": 3.1778111457824707, "step": 3308, "token_acc": 0.2780676358419388 }, { "epoch": 1.9399003224860745, "grad_norm": 0.3646645050090139, "learning_rate": 0.00013915686752139975, "loss": 3.1917200088500977, "step": 3309, "token_acc": 0.2775195205132174 }, { "epoch": 1.9404866608032836, "grad_norm": 0.3494030587340589, "learning_rate": 0.00013915581737021558, "loss": 3.1626620292663574, "step": 3310, "token_acc": 0.28083597491239704 }, { "epoch": 1.9410729991204925, "grad_norm": 0.36655816396878566, "learning_rate": 0.00013915476656940572, "loss": 3.1225814819335938, "step": 3311, "token_acc": 0.286216775425752 }, { "epoch": 1.9416593374377016, "grad_norm": 0.39445667455175293, "learning_rate": 0.00013915371511898, "loss": 3.1573116779327393, "step": 3312, "token_acc": 0.2815383588168525 }, { "epoch": 1.9422456757549105, "grad_norm": 0.4746391513523968, "learning_rate": 0.00013915266301894834, "loss": 3.2001843452453613, "step": 3313, "token_acc": 0.27618760720892077 }, { "epoch": 1.9428320140721196, "grad_norm": 0.38343661437801124, "learning_rate": 0.00013915161026932055, "loss": 3.1935229301452637, "step": 3314, "token_acc": 0.2758296615133841 }, { "epoch": 1.9434183523893287, "grad_norm": 0.4441171186075295, "learning_rate": 0.00013915055687010658, "loss": 3.1794397830963135, "step": 3315, "token_acc": 0.2792302577375575 }, { "epoch": 1.9440046907065378, "grad_norm": 0.5073690761317307, "learning_rate": 0.00013914950282131633, "loss": 3.1501359939575195, "step": 3316, "token_acc": 0.2815280370741534 }, { "epoch": 1.9445910290237467, "grad_norm": 0.4383736735160132, "learning_rate": 0.00013914844812295966, "loss": 3.1700868606567383, "step": 3317, "token_acc": 0.2775822979008603 }, { "epoch": 1.9451773673409556, "grad_norm": 0.37026449453039234, "learning_rate": 0.0001391473927750465, "loss": 3.1751694679260254, "step": 3318, "token_acc": 0.27938548444573497 }, { "epoch": 1.9457637056581647, "grad_norm": 0.4752379004371895, "learning_rate": 0.0001391463367775868, "loss": 3.151136875152588, "step": 3319, "token_acc": 0.2839719688530707 }, { "epoch": 1.9463500439753738, "grad_norm": 0.3752967177179502, "learning_rate": 0.00013914528013059038, "loss": 3.1590545177459717, "step": 3320, "token_acc": 0.2810434689878564 }, { "epoch": 1.946936382292583, "grad_norm": 0.3418851457918463, "learning_rate": 0.00013914422283406726, "loss": 3.09909725189209, "step": 3321, "token_acc": 0.2896089031755309 }, { "epoch": 1.9475227206097918, "grad_norm": 0.3777334735992987, "learning_rate": 0.00013914316488802735, "loss": 3.158996105194092, "step": 3322, "token_acc": 0.2797934545688525 }, { "epoch": 1.948109058927001, "grad_norm": 0.35720934881011457, "learning_rate": 0.00013914210629248057, "loss": 3.1173105239868164, "step": 3323, "token_acc": 0.2873663315617596 }, { "epoch": 1.9486953972442098, "grad_norm": 0.3415138545348308, "learning_rate": 0.00013914104704743684, "loss": 3.2053141593933105, "step": 3324, "token_acc": 0.2748350126143588 }, { "epoch": 1.949281735561419, "grad_norm": 0.37447937251340824, "learning_rate": 0.0001391399871529062, "loss": 3.108191967010498, "step": 3325, "token_acc": 0.2892518934174117 }, { "epoch": 1.949868073878628, "grad_norm": 0.46367929391170054, "learning_rate": 0.0001391389266088985, "loss": 3.1765410900115967, "step": 3326, "token_acc": 0.27848255112045006 }, { "epoch": 1.9504544121958372, "grad_norm": 0.5380620646381263, "learning_rate": 0.00013913786541542376, "loss": 3.1276051998138428, "step": 3327, "token_acc": 0.2853071229706956 }, { "epoch": 1.951040750513046, "grad_norm": 0.42845334335984475, "learning_rate": 0.00013913680357249196, "loss": 3.164273262023926, "step": 3328, "token_acc": 0.28181472852512157 }, { "epoch": 1.951627088830255, "grad_norm": 0.37886830214080564, "learning_rate": 0.00013913574108011302, "loss": 3.163154125213623, "step": 3329, "token_acc": 0.282292258485365 }, { "epoch": 1.952213427147464, "grad_norm": 0.5329529024726649, "learning_rate": 0.00013913467793829696, "loss": 3.174312114715576, "step": 3330, "token_acc": 0.27895905889005795 }, { "epoch": 1.9527997654646732, "grad_norm": 0.47476736074615217, "learning_rate": 0.0001391336141470538, "loss": 3.166505813598633, "step": 3331, "token_acc": 0.2800059025458612 }, { "epoch": 1.9533861037818823, "grad_norm": 0.5498986721983645, "learning_rate": 0.00013913254970639345, "loss": 3.110722064971924, "step": 3332, "token_acc": 0.28798181706624126 }, { "epoch": 1.9539724420990912, "grad_norm": 0.5331648008460982, "learning_rate": 0.00013913148461632598, "loss": 3.1444947719573975, "step": 3333, "token_acc": 0.2831374362261653 }, { "epoch": 1.9545587804163, "grad_norm": 0.4269616937981869, "learning_rate": 0.00013913041887686137, "loss": 3.156773090362549, "step": 3334, "token_acc": 0.28320404489919576 }, { "epoch": 1.9551451187335092, "grad_norm": 0.5078127698860585, "learning_rate": 0.0001391293524880096, "loss": 3.176126003265381, "step": 3335, "token_acc": 0.27880362899892536 }, { "epoch": 1.9557314570507183, "grad_norm": 0.5181925024168382, "learning_rate": 0.00013912828544978076, "loss": 3.1840755939483643, "step": 3336, "token_acc": 0.27890622000998067 }, { "epoch": 1.9563177953679274, "grad_norm": 0.48023841551979124, "learning_rate": 0.0001391272177621848, "loss": 3.1813955307006836, "step": 3337, "token_acc": 0.2765049721330503 }, { "epoch": 1.9569041336851363, "grad_norm": 0.4516200328695524, "learning_rate": 0.00013912614942523176, "loss": 3.1648964881896973, "step": 3338, "token_acc": 0.28140642297567153 }, { "epoch": 1.9574904720023454, "grad_norm": 0.42577968928190246, "learning_rate": 0.00013912508043893173, "loss": 3.140504837036133, "step": 3339, "token_acc": 0.2831769042032293 }, { "epoch": 1.9580768103195543, "grad_norm": 0.5111732639417289, "learning_rate": 0.0001391240108032947, "loss": 3.179898262023926, "step": 3340, "token_acc": 0.27670737108509963 }, { "epoch": 1.9586631486367634, "grad_norm": 0.4552748307317893, "learning_rate": 0.00013912294051833074, "loss": 3.18254017829895, "step": 3341, "token_acc": 0.27814316506194897 }, { "epoch": 1.9592494869539725, "grad_norm": 0.48351955739373526, "learning_rate": 0.0001391218695840499, "loss": 3.1951117515563965, "step": 3342, "token_acc": 0.2778835193383347 }, { "epoch": 1.9598358252711816, "grad_norm": 0.4760817325750893, "learning_rate": 0.00013912079800046221, "loss": 3.1729722023010254, "step": 3343, "token_acc": 0.2783520362185126 }, { "epoch": 1.9604221635883905, "grad_norm": 0.4950446273086604, "learning_rate": 0.0001391197257675778, "loss": 3.1841742992401123, "step": 3344, "token_acc": 0.2775857214366162 }, { "epoch": 1.9610085019055994, "grad_norm": 0.5373742829542658, "learning_rate": 0.00013911865288540669, "loss": 3.183089256286621, "step": 3345, "token_acc": 0.2785410725928719 }, { "epoch": 1.9615948402228085, "grad_norm": 0.41963336031516096, "learning_rate": 0.000139117579353959, "loss": 3.1570446491241455, "step": 3346, "token_acc": 0.2805888824854613 }, { "epoch": 1.9621811785400176, "grad_norm": 0.41231944365877926, "learning_rate": 0.00013911650517324476, "loss": 3.163341522216797, "step": 3347, "token_acc": 0.2817850939960098 }, { "epoch": 1.9627675168572267, "grad_norm": 0.4446434675061905, "learning_rate": 0.0001391154303432741, "loss": 3.1212615966796875, "step": 3348, "token_acc": 0.2857479668335585 }, { "epoch": 1.9633538551744356, "grad_norm": 0.3920935737036198, "learning_rate": 0.00013911435486405708, "loss": 3.140012502670288, "step": 3349, "token_acc": 0.2854368114370536 }, { "epoch": 1.9639401934916447, "grad_norm": 0.36324685591141376, "learning_rate": 0.00013911327873560386, "loss": 3.2009711265563965, "step": 3350, "token_acc": 0.2753081048238647 }, { "epoch": 1.9645265318088536, "grad_norm": 0.4320949402726731, "learning_rate": 0.00013911220195792452, "loss": 3.1375491619110107, "step": 3351, "token_acc": 0.2835975571166382 }, { "epoch": 1.9651128701260627, "grad_norm": 0.4120621234884834, "learning_rate": 0.00013911112453102916, "loss": 3.1618008613586426, "step": 3352, "token_acc": 0.28184359696652916 }, { "epoch": 1.9656992084432718, "grad_norm": 0.39869757060849625, "learning_rate": 0.00013911004645492792, "loss": 3.1614646911621094, "step": 3353, "token_acc": 0.28210271603896925 }, { "epoch": 1.966285546760481, "grad_norm": 0.388628049288299, "learning_rate": 0.00013910896772963092, "loss": 3.171799421310425, "step": 3354, "token_acc": 0.2792662397119742 }, { "epoch": 1.9668718850776898, "grad_norm": 0.4950069484258709, "learning_rate": 0.00013910788835514828, "loss": 3.1041934490203857, "step": 3355, "token_acc": 0.28896797509535843 }, { "epoch": 1.9674582233948987, "grad_norm": 0.420312074379008, "learning_rate": 0.00013910680833149016, "loss": 3.145193099975586, "step": 3356, "token_acc": 0.2844221293515322 }, { "epoch": 1.9680445617121078, "grad_norm": 0.4379301611286279, "learning_rate": 0.0001391057276586667, "loss": 3.1596078872680664, "step": 3357, "token_acc": 0.28141256983986956 }, { "epoch": 1.968630900029317, "grad_norm": 0.4576193199560471, "learning_rate": 0.00013910464633668808, "loss": 3.137131690979004, "step": 3358, "token_acc": 0.28517970472915366 }, { "epoch": 1.969217238346526, "grad_norm": 0.3831662506452973, "learning_rate": 0.00013910356436556439, "loss": 3.10365629196167, "step": 3359, "token_acc": 0.28884990088094054 }, { "epoch": 1.969803576663735, "grad_norm": 0.42800697376584795, "learning_rate": 0.00013910248174530584, "loss": 3.1386098861694336, "step": 3360, "token_acc": 0.28403796107275875 }, { "epoch": 1.9703899149809438, "grad_norm": 0.36777368578711706, "learning_rate": 0.0001391013984759226, "loss": 3.1901535987854004, "step": 3361, "token_acc": 0.27514457168624845 }, { "epoch": 1.970976253298153, "grad_norm": 0.43693785479480013, "learning_rate": 0.00013910031455742483, "loss": 3.185361385345459, "step": 3362, "token_acc": 0.27827186456017994 }, { "epoch": 1.971562591615362, "grad_norm": 0.4080545794625674, "learning_rate": 0.0001390992299898227, "loss": 3.1773855686187744, "step": 3363, "token_acc": 0.27940934602495837 }, { "epoch": 1.9721489299325712, "grad_norm": 0.33149753041246305, "learning_rate": 0.00013909814477312645, "loss": 3.158292770385742, "step": 3364, "token_acc": 0.28046724535837236 }, { "epoch": 1.97273526824978, "grad_norm": 0.41432338329669927, "learning_rate": 0.0001390970589073462, "loss": 3.122222423553467, "step": 3365, "token_acc": 0.2868338477366255 }, { "epoch": 1.9733216065669892, "grad_norm": 0.38028346339070257, "learning_rate": 0.00013909597239249223, "loss": 3.1400723457336426, "step": 3366, "token_acc": 0.28621956339210747 }, { "epoch": 1.973907944884198, "grad_norm": 0.48225520172457403, "learning_rate": 0.0001390948852285747, "loss": 3.174577474594116, "step": 3367, "token_acc": 0.27805069077773104 }, { "epoch": 1.9744942832014072, "grad_norm": 0.4892394984713239, "learning_rate": 0.0001390937974156038, "loss": 3.2006263732910156, "step": 3368, "token_acc": 0.27479479035481663 }, { "epoch": 1.9750806215186163, "grad_norm": 0.37513538712981825, "learning_rate": 0.0001390927089535898, "loss": 3.1002066135406494, "step": 3369, "token_acc": 0.29237799711937645 }, { "epoch": 1.9756669598358254, "grad_norm": 0.3788080452458428, "learning_rate": 0.00013909161984254292, "loss": 3.163529634475708, "step": 3370, "token_acc": 0.28033904159636513 }, { "epoch": 1.9762532981530343, "grad_norm": 0.36749549261683495, "learning_rate": 0.00013909053008247333, "loss": 3.174147605895996, "step": 3371, "token_acc": 0.27931145253499706 }, { "epoch": 1.9768396364702432, "grad_norm": 0.4563196539727423, "learning_rate": 0.00013908943967339135, "loss": 3.1264514923095703, "step": 3372, "token_acc": 0.28626685162489396 }, { "epoch": 1.9774259747874523, "grad_norm": 0.5180713425226017, "learning_rate": 0.0001390883486153072, "loss": 3.1820428371429443, "step": 3373, "token_acc": 0.27802897432756785 }, { "epoch": 1.9780123131046614, "grad_norm": 0.553205273299101, "learning_rate": 0.00013908725690823105, "loss": 3.1747257709503174, "step": 3374, "token_acc": 0.2788229517025804 }, { "epoch": 1.9785986514218705, "grad_norm": 0.5342559386292113, "learning_rate": 0.00013908616455217328, "loss": 3.1705732345581055, "step": 3375, "token_acc": 0.28006252945319665 }, { "epoch": 1.9791849897390794, "grad_norm": 0.48529099459159397, "learning_rate": 0.00013908507154714405, "loss": 3.1789064407348633, "step": 3376, "token_acc": 0.27894978155582845 }, { "epoch": 1.9797713280562885, "grad_norm": 0.44637011902567103, "learning_rate": 0.00013908397789315366, "loss": 3.1910946369171143, "step": 3377, "token_acc": 0.2766798418972332 }, { "epoch": 1.9803576663734974, "grad_norm": 0.4588139083747449, "learning_rate": 0.00013908288359021243, "loss": 3.1674647331237793, "step": 3378, "token_acc": 0.2797496298989689 }, { "epoch": 1.9809440046907065, "grad_norm": 0.45585553465898915, "learning_rate": 0.00013908178863833055, "loss": 3.167445182800293, "step": 3379, "token_acc": 0.280860807517464 }, { "epoch": 1.9815303430079156, "grad_norm": 0.49982200519603226, "learning_rate": 0.00013908069303751838, "loss": 3.1494970321655273, "step": 3380, "token_acc": 0.28157468734790614 }, { "epoch": 1.9821166813251248, "grad_norm": 0.4795147066178234, "learning_rate": 0.0001390795967877862, "loss": 3.139040946960449, "step": 3381, "token_acc": 0.28312468209232555 }, { "epoch": 1.9827030196423336, "grad_norm": 0.36394608559749825, "learning_rate": 0.00013907849988914426, "loss": 3.158405303955078, "step": 3382, "token_acc": 0.28004248163554296 }, { "epoch": 1.9832893579595425, "grad_norm": 0.41611010968303497, "learning_rate": 0.00013907740234160292, "loss": 3.1257734298706055, "step": 3383, "token_acc": 0.28653803504102904 }, { "epoch": 1.9838756962767516, "grad_norm": 0.3813537595018297, "learning_rate": 0.00013907630414517247, "loss": 3.1708922386169434, "step": 3384, "token_acc": 0.28199272889576266 }, { "epoch": 1.9844620345939608, "grad_norm": 0.46635818782639954, "learning_rate": 0.00013907520529986322, "loss": 3.1240193843841553, "step": 3385, "token_acc": 0.2855685444280805 }, { "epoch": 1.9850483729111699, "grad_norm": 0.40974716316719545, "learning_rate": 0.0001390741058056855, "loss": 3.1687774658203125, "step": 3386, "token_acc": 0.27905286088088577 }, { "epoch": 1.9856347112283788, "grad_norm": 0.46702609892230296, "learning_rate": 0.00013907300566264963, "loss": 3.151608943939209, "step": 3387, "token_acc": 0.28313424998343095 }, { "epoch": 1.9862210495455876, "grad_norm": 0.4676852203644609, "learning_rate": 0.00013907190487076596, "loss": 3.134857416152954, "step": 3388, "token_acc": 0.28581432494315184 }, { "epoch": 1.9868073878627968, "grad_norm": 0.4116647375498723, "learning_rate": 0.0001390708034300448, "loss": 3.158132553100586, "step": 3389, "token_acc": 0.28150687559961624 }, { "epoch": 1.9873937261800059, "grad_norm": 0.42256908864682385, "learning_rate": 0.00013906970134049652, "loss": 3.142395496368408, "step": 3390, "token_acc": 0.28337116764514025 }, { "epoch": 1.987980064497215, "grad_norm": 0.342299822929288, "learning_rate": 0.00013906859860213146, "loss": 3.151010513305664, "step": 3391, "token_acc": 0.281481932108529 }, { "epoch": 1.9885664028144239, "grad_norm": 0.41686605485067674, "learning_rate": 0.00013906749521496, "loss": 3.141700029373169, "step": 3392, "token_acc": 0.2832681625910635 }, { "epoch": 1.989152741131633, "grad_norm": 0.438087000357279, "learning_rate": 0.0001390663911789925, "loss": 3.2011260986328125, "step": 3393, "token_acc": 0.27543164320532093 }, { "epoch": 1.9897390794488419, "grad_norm": 0.43210630934866456, "learning_rate": 0.00013906528649423934, "loss": 3.184021472930908, "step": 3394, "token_acc": 0.2769989649289002 }, { "epoch": 1.990325417766051, "grad_norm": 0.39396970731104547, "learning_rate": 0.00013906418116071083, "loss": 3.1602015495300293, "step": 3395, "token_acc": 0.2811733937175484 }, { "epoch": 1.99091175608326, "grad_norm": 0.3835246207990156, "learning_rate": 0.00013906307517841743, "loss": 3.174670934677124, "step": 3396, "token_acc": 0.2802589132347396 }, { "epoch": 1.9914980944004692, "grad_norm": 0.4502263409154559, "learning_rate": 0.0001390619685473695, "loss": 3.1310057640075684, "step": 3397, "token_acc": 0.28627079275469497 }, { "epoch": 1.992084432717678, "grad_norm": 0.3356345733414897, "learning_rate": 0.00013906086126757745, "loss": 3.1600825786590576, "step": 3398, "token_acc": 0.28327654875361774 }, { "epoch": 1.992670771034887, "grad_norm": 0.39931703854496614, "learning_rate": 0.00013905975333905165, "loss": 3.194636344909668, "step": 3399, "token_acc": 0.2775292786658566 }, { "epoch": 1.993257109352096, "grad_norm": 0.5363229668098536, "learning_rate": 0.00013905864476180252, "loss": 3.194115400314331, "step": 3400, "token_acc": 0.27615912574816476 }, { "epoch": 1.9938434476693052, "grad_norm": 0.36189066126935593, "learning_rate": 0.00013905753553584052, "loss": 3.172762632369995, "step": 3401, "token_acc": 0.27922836166826814 }, { "epoch": 1.9944297859865143, "grad_norm": 0.43477399592264865, "learning_rate": 0.000139056425661176, "loss": 3.1928272247314453, "step": 3402, "token_acc": 0.2779728777784421 }, { "epoch": 1.9950161243037232, "grad_norm": 0.5116720962708814, "learning_rate": 0.0001390553151378194, "loss": 3.179208755493164, "step": 3403, "token_acc": 0.2773243860949061 }, { "epoch": 1.9956024626209323, "grad_norm": 0.4601297982476801, "learning_rate": 0.0001390542039657812, "loss": 3.1578078269958496, "step": 3404, "token_acc": 0.2812150082301985 }, { "epoch": 1.9961888009381412, "grad_norm": 0.36016640229048014, "learning_rate": 0.00013905309214507178, "loss": 3.1756491661071777, "step": 3405, "token_acc": 0.27825384828696176 }, { "epoch": 1.9967751392553503, "grad_norm": 0.45878020867165564, "learning_rate": 0.00013905197967570163, "loss": 3.157804012298584, "step": 3406, "token_acc": 0.27966751614850033 }, { "epoch": 1.9973614775725594, "grad_norm": 0.5320286410410868, "learning_rate": 0.00013905086655768115, "loss": 3.162482738494873, "step": 3407, "token_acc": 0.28064756126094625 }, { "epoch": 1.9979478158897686, "grad_norm": 0.31122430183490246, "learning_rate": 0.00013904975279102087, "loss": 3.1572160720825195, "step": 3408, "token_acc": 0.28231274527433703 }, { "epoch": 1.9985341542069774, "grad_norm": 0.39401512308512576, "learning_rate": 0.0001390486383757312, "loss": 3.128884792327881, "step": 3409, "token_acc": 0.2859725573908362 }, { "epoch": 1.9991204925241863, "grad_norm": 0.3304642291278498, "learning_rate": 0.00013904752331182259, "loss": 3.14455509185791, "step": 3410, "token_acc": 0.28313608990647277 }, { "epoch": 1.9997068308413954, "grad_norm": 0.3900789751129363, "learning_rate": 0.00013904640759930555, "loss": 3.147974729537964, "step": 3411, "token_acc": 0.2815449028076655 }, { "epoch": 2.0, "grad_norm": 0.4310696272541357, "learning_rate": 0.00013904529123819054, "loss": 3.139267921447754, "step": 3412, "token_acc": 0.28635098877074433 }, { "epoch": 2.0, "eval_loss": 3.1359596252441406, "eval_runtime": 22.002, "eval_samples_per_second": 11.635, "eval_steps_per_second": 1.454, "eval_token_acc": 0.28405878778806104, "step": 3412 }, { "epoch": 2.000586338317209, "grad_norm": 0.4669908189040292, "learning_rate": 0.0001390441742284881, "loss": 3.072059154510498, "step": 3413, "token_acc": 0.2912753173855335 }, { "epoch": 2.0011726766344182, "grad_norm": 0.43554543297573217, "learning_rate": 0.00013904305657020863, "loss": 3.103198528289795, "step": 3414, "token_acc": 0.2872645329574976 }, { "epoch": 2.001759014951627, "grad_norm": 0.4301203737295776, "learning_rate": 0.00013904193826336271, "loss": 3.1443958282470703, "step": 3415, "token_acc": 0.2818147374952005 }, { "epoch": 2.002345353268836, "grad_norm": 0.4727902843210217, "learning_rate": 0.00013904081930796083, "loss": 3.1033222675323486, "step": 3416, "token_acc": 0.28631646444498754 }, { "epoch": 2.002931691586045, "grad_norm": 0.3954903682659769, "learning_rate": 0.00013903969970401346, "loss": 3.0854570865631104, "step": 3417, "token_acc": 0.2891944231301252 }, { "epoch": 2.0035180299032542, "grad_norm": 0.416162010598298, "learning_rate": 0.00013903857945153116, "loss": 3.046881914138794, "step": 3418, "token_acc": 0.2950915223326926 }, { "epoch": 2.0041043682204633, "grad_norm": 0.41094544425404733, "learning_rate": 0.0001390374585505244, "loss": 3.1224188804626465, "step": 3419, "token_acc": 0.2849217686809312 }, { "epoch": 2.0046907065376725, "grad_norm": 0.39276595654606583, "learning_rate": 0.00013903633700100378, "loss": 3.0865747928619385, "step": 3420, "token_acc": 0.2888065015357855 }, { "epoch": 2.005277044854881, "grad_norm": 0.35315777119064307, "learning_rate": 0.0001390352148029798, "loss": 3.0870871543884277, "step": 3421, "token_acc": 0.2890270350722388 }, { "epoch": 2.0058633831720902, "grad_norm": 0.42520944556099705, "learning_rate": 0.000139034091956463, "loss": 3.0999088287353516, "step": 3422, "token_acc": 0.289940221493437 }, { "epoch": 2.0064497214892993, "grad_norm": 0.39026284731512934, "learning_rate": 0.00013903296846146392, "loss": 3.0217690467834473, "step": 3423, "token_acc": 0.29762488840273144 }, { "epoch": 2.0070360598065085, "grad_norm": 0.39492583402254616, "learning_rate": 0.00013903184431799314, "loss": 3.0698060989379883, "step": 3424, "token_acc": 0.29152610657498923 }, { "epoch": 2.0076223981237176, "grad_norm": 0.41938728405294856, "learning_rate": 0.00013903071952606118, "loss": 3.063624382019043, "step": 3425, "token_acc": 0.2926996464381478 }, { "epoch": 2.0082087364409262, "grad_norm": 0.47611770461397407, "learning_rate": 0.00013902959408567867, "loss": 3.0697710514068604, "step": 3426, "token_acc": 0.29184361121835023 }, { "epoch": 2.0087950747581353, "grad_norm": 0.3988898565195493, "learning_rate": 0.0001390284679968561, "loss": 3.083996295928955, "step": 3427, "token_acc": 0.2904132445816811 }, { "epoch": 2.0093814130753445, "grad_norm": 0.4296929927232593, "learning_rate": 0.00013902734125960413, "loss": 3.071540355682373, "step": 3428, "token_acc": 0.2923533006679604 }, { "epoch": 2.0099677513925536, "grad_norm": 0.3872715929520596, "learning_rate": 0.0001390262138739333, "loss": 3.067164421081543, "step": 3429, "token_acc": 0.2914328040561688 }, { "epoch": 2.0105540897097627, "grad_norm": 0.41043113711035617, "learning_rate": 0.00013902508583985416, "loss": 3.0388574600219727, "step": 3430, "token_acc": 0.2961614298849497 }, { "epoch": 2.0111404280269713, "grad_norm": 0.38033273177235727, "learning_rate": 0.0001390239571573774, "loss": 3.0351784229278564, "step": 3431, "token_acc": 0.29528218901432934 }, { "epoch": 2.0117267663441805, "grad_norm": 0.395434071298658, "learning_rate": 0.00013902282782651354, "loss": 3.0387587547302246, "step": 3432, "token_acc": 0.29488862093038015 }, { "epoch": 2.0123131046613896, "grad_norm": 0.4574238969049557, "learning_rate": 0.00013902169784727324, "loss": 3.0689291954040527, "step": 3433, "token_acc": 0.2921390647253255 }, { "epoch": 2.0128994429785987, "grad_norm": 0.402623654896216, "learning_rate": 0.00013902056721966708, "loss": 3.045943260192871, "step": 3434, "token_acc": 0.29525997366652035 }, { "epoch": 2.013485781295808, "grad_norm": 0.4496952249744904, "learning_rate": 0.00013901943594370571, "loss": 3.0669713020324707, "step": 3435, "token_acc": 0.2930701516267887 }, { "epoch": 2.014072119613017, "grad_norm": 0.3932798656168556, "learning_rate": 0.00013901830401939975, "loss": 3.065974235534668, "step": 3436, "token_acc": 0.29347344955762594 }, { "epoch": 2.0146584579302256, "grad_norm": 0.47407363665798985, "learning_rate": 0.00013901717144675983, "loss": 3.04148530960083, "step": 3437, "token_acc": 0.2943343873350028 }, { "epoch": 2.0152447962474347, "grad_norm": 0.3859093513079026, "learning_rate": 0.00013901603822579655, "loss": 3.048157215118408, "step": 3438, "token_acc": 0.2938826006439368 }, { "epoch": 2.015831134564644, "grad_norm": 0.40237225387247594, "learning_rate": 0.00013901490435652063, "loss": 3.0712270736694336, "step": 3439, "token_acc": 0.29054114847842605 }, { "epoch": 2.016417472881853, "grad_norm": 0.4909742982888658, "learning_rate": 0.00013901376983894265, "loss": 3.079235315322876, "step": 3440, "token_acc": 0.29215351233370823 }, { "epoch": 2.017003811199062, "grad_norm": 0.31975014475213787, "learning_rate": 0.00013901263467307334, "loss": 3.0642590522766113, "step": 3441, "token_acc": 0.29168100367909794 }, { "epoch": 2.0175901495162707, "grad_norm": 0.4222532375163001, "learning_rate": 0.0001390114988589233, "loss": 3.084491729736328, "step": 3442, "token_acc": 0.291710751796941 }, { "epoch": 2.01817648783348, "grad_norm": 0.38970997897605225, "learning_rate": 0.0001390103623965032, "loss": 3.132301092147827, "step": 3443, "token_acc": 0.282129687689501 }, { "epoch": 2.018762826150689, "grad_norm": 0.3738976978706402, "learning_rate": 0.00013900922528582377, "loss": 3.0894436836242676, "step": 3444, "token_acc": 0.2888680699315493 }, { "epoch": 2.019349164467898, "grad_norm": 0.34166636403749784, "learning_rate": 0.00013900808752689568, "loss": 3.086480140686035, "step": 3445, "token_acc": 0.2889001623893929 }, { "epoch": 2.019935502785107, "grad_norm": 0.3511117298739608, "learning_rate": 0.00013900694911972956, "loss": 3.0834155082702637, "step": 3446, "token_acc": 0.29072972234527655 }, { "epoch": 2.0205218411023163, "grad_norm": 0.3345105659173039, "learning_rate": 0.00013900581006433615, "loss": 3.0512261390686035, "step": 3447, "token_acc": 0.2942930530542352 }, { "epoch": 2.021108179419525, "grad_norm": 0.3406395198209844, "learning_rate": 0.00013900467036072613, "loss": 3.0245869159698486, "step": 3448, "token_acc": 0.29897653551536474 }, { "epoch": 2.021694517736734, "grad_norm": 0.3231953406581536, "learning_rate": 0.00013900353000891022, "loss": 3.0322794914245605, "step": 3449, "token_acc": 0.2982415086005164 }, { "epoch": 2.022280856053943, "grad_norm": 0.34774244541798166, "learning_rate": 0.00013900238900889914, "loss": 3.0630829334259033, "step": 3450, "token_acc": 0.2910205882800493 }, { "epoch": 2.0228671943711523, "grad_norm": 0.3362818856368379, "learning_rate": 0.0001390012473607036, "loss": 3.067298412322998, "step": 3451, "token_acc": 0.29096726950190027 }, { "epoch": 2.0234535326883614, "grad_norm": 0.3691514087950321, "learning_rate": 0.00013900010506433434, "loss": 3.1181302070617676, "step": 3452, "token_acc": 0.2870263879617491 }, { "epoch": 2.02403987100557, "grad_norm": 0.3403475005569846, "learning_rate": 0.00013899896211980203, "loss": 2.9972331523895264, "step": 3453, "token_acc": 0.3017676834460835 }, { "epoch": 2.024626209322779, "grad_norm": 0.326686025552459, "learning_rate": 0.00013899781852711745, "loss": 3.1233699321746826, "step": 3454, "token_acc": 0.2844483761280902 }, { "epoch": 2.0252125476399883, "grad_norm": 0.4150678908932345, "learning_rate": 0.00013899667428629136, "loss": 3.0711934566497803, "step": 3455, "token_acc": 0.29046136281073226 }, { "epoch": 2.0257988859571974, "grad_norm": 0.3629808762699235, "learning_rate": 0.00013899552939733448, "loss": 3.0263452529907227, "step": 3456, "token_acc": 0.2984379645754577 }, { "epoch": 2.0263852242744065, "grad_norm": 0.3756951973299575, "learning_rate": 0.0001389943838602576, "loss": 3.035609006881714, "step": 3457, "token_acc": 0.296783867062837 }, { "epoch": 2.026971562591615, "grad_norm": 0.4289039762091412, "learning_rate": 0.00013899323767507143, "loss": 3.0780153274536133, "step": 3458, "token_acc": 0.28908465734555083 }, { "epoch": 2.0275579009088243, "grad_norm": 0.4474921310454204, "learning_rate": 0.00013899209084178676, "loss": 2.995811939239502, "step": 3459, "token_acc": 0.3021539336882394 }, { "epoch": 2.0281442392260334, "grad_norm": 0.47111657686215347, "learning_rate": 0.00013899094336041436, "loss": 3.089670419692993, "step": 3460, "token_acc": 0.28995273082258644 }, { "epoch": 2.0287305775432425, "grad_norm": 0.5961298681369821, "learning_rate": 0.00013898979523096502, "loss": 3.0654194355010986, "step": 3461, "token_acc": 0.2913794339680621 }, { "epoch": 2.0293169158604516, "grad_norm": 0.573750621079621, "learning_rate": 0.00013898864645344955, "loss": 3.057342290878296, "step": 3462, "token_acc": 0.2934591042765979 }, { "epoch": 2.0299032541776607, "grad_norm": 0.39945754262711736, "learning_rate": 0.00013898749702787866, "loss": 3.0644164085388184, "step": 3463, "token_acc": 0.29409398388787145 }, { "epoch": 2.0304895924948694, "grad_norm": 0.412384414730368, "learning_rate": 0.00013898634695426324, "loss": 3.058180332183838, "step": 3464, "token_acc": 0.2938338411285717 }, { "epoch": 2.0310759308120785, "grad_norm": 0.5066905819476569, "learning_rate": 0.000138985196232614, "loss": 3.0717389583587646, "step": 3465, "token_acc": 0.292684232789501 }, { "epoch": 2.0316622691292876, "grad_norm": 0.48871545088828505, "learning_rate": 0.00013898404486294185, "loss": 3.067204236984253, "step": 3466, "token_acc": 0.2935458879461016 }, { "epoch": 2.0322486074464967, "grad_norm": 0.4449837081070912, "learning_rate": 0.00013898289284525753, "loss": 3.0645785331726074, "step": 3467, "token_acc": 0.29097301854517554 }, { "epoch": 2.032834945763706, "grad_norm": 0.3815733217074833, "learning_rate": 0.0001389817401795719, "loss": 3.0932931900024414, "step": 3468, "token_acc": 0.287144719552194 }, { "epoch": 2.0334212840809145, "grad_norm": 0.44770759116377795, "learning_rate": 0.00013898058686589575, "loss": 3.0803585052490234, "step": 3469, "token_acc": 0.2899080776045779 }, { "epoch": 2.0340076223981236, "grad_norm": 0.3652633585681172, "learning_rate": 0.00013897943290423997, "loss": 3.0651512145996094, "step": 3470, "token_acc": 0.29123762825751387 }, { "epoch": 2.0345939607153327, "grad_norm": 0.3784496445286311, "learning_rate": 0.00013897827829461535, "loss": 3.044992208480835, "step": 3471, "token_acc": 0.29525641827775345 }, { "epoch": 2.035180299032542, "grad_norm": 0.33637936388710926, "learning_rate": 0.00013897712303703275, "loss": 3.0728578567504883, "step": 3472, "token_acc": 0.29096464517756226 }, { "epoch": 2.035766637349751, "grad_norm": 0.3820809499335438, "learning_rate": 0.00013897596713150306, "loss": 3.087179660797119, "step": 3473, "token_acc": 0.28980674026969366 }, { "epoch": 2.03635297566696, "grad_norm": 0.38592561104361417, "learning_rate": 0.00013897481057803708, "loss": 3.0463616847991943, "step": 3474, "token_acc": 0.2947199411825126 }, { "epoch": 2.0369393139841687, "grad_norm": 0.3374260129650237, "learning_rate": 0.0001389736533766457, "loss": 3.030139446258545, "step": 3475, "token_acc": 0.2970991123400071 }, { "epoch": 2.037525652301378, "grad_norm": 0.40412707712917734, "learning_rate": 0.0001389724955273398, "loss": 3.0653152465820312, "step": 3476, "token_acc": 0.2906515742020299 }, { "epoch": 2.038111990618587, "grad_norm": 0.44707635396474155, "learning_rate": 0.00013897133703013023, "loss": 3.0286879539489746, "step": 3477, "token_acc": 0.29569523435967837 }, { "epoch": 2.038698328935796, "grad_norm": 0.45007508827377335, "learning_rate": 0.0001389701778850279, "loss": 3.066561460494995, "step": 3478, "token_acc": 0.294022135670904 }, { "epoch": 2.039284667253005, "grad_norm": 0.3862571054450777, "learning_rate": 0.0001389690180920437, "loss": 3.0781593322753906, "step": 3479, "token_acc": 0.29256291047882005 }, { "epoch": 2.039871005570214, "grad_norm": 0.37927662650382504, "learning_rate": 0.00013896785765118847, "loss": 3.0932347774505615, "step": 3480, "token_acc": 0.2885424793793854 }, { "epoch": 2.040457343887423, "grad_norm": 0.42765202665111174, "learning_rate": 0.0001389666965624732, "loss": 3.059185028076172, "step": 3481, "token_acc": 0.29528892189342615 }, { "epoch": 2.041043682204632, "grad_norm": 0.3711948807703126, "learning_rate": 0.00013896553482590872, "loss": 3.0578880310058594, "step": 3482, "token_acc": 0.29277653026948824 }, { "epoch": 2.041630020521841, "grad_norm": 0.36493182997328655, "learning_rate": 0.00013896437244150596, "loss": 3.035597801208496, "step": 3483, "token_acc": 0.29741378162184046 }, { "epoch": 2.0422163588390503, "grad_norm": 0.38724176365213764, "learning_rate": 0.0001389632094092759, "loss": 3.091658115386963, "step": 3484, "token_acc": 0.2894869533898831 }, { "epoch": 2.042802697156259, "grad_norm": 0.37577560591828546, "learning_rate": 0.0001389620457292294, "loss": 3.089517116546631, "step": 3485, "token_acc": 0.2890385331066877 }, { "epoch": 2.043389035473468, "grad_norm": 0.40496128277998505, "learning_rate": 0.00013896088140137735, "loss": 3.055583953857422, "step": 3486, "token_acc": 0.29190633803571236 }, { "epoch": 2.043975373790677, "grad_norm": 0.38654520782815943, "learning_rate": 0.0001389597164257308, "loss": 3.0676355361938477, "step": 3487, "token_acc": 0.2928620365377452 }, { "epoch": 2.0445617121078863, "grad_norm": 0.4042467598816843, "learning_rate": 0.00013895855080230064, "loss": 3.0710344314575195, "step": 3488, "token_acc": 0.2913159185174591 }, { "epoch": 2.0451480504250954, "grad_norm": 0.40896869383979084, "learning_rate": 0.00013895738453109782, "loss": 3.030439853668213, "step": 3489, "token_acc": 0.29848190963871 }, { "epoch": 2.0457343887423045, "grad_norm": 0.38790729012197117, "learning_rate": 0.00013895621761213329, "loss": 3.0286054611206055, "step": 3490, "token_acc": 0.298629261849774 }, { "epoch": 2.046320727059513, "grad_norm": 0.4821222350377258, "learning_rate": 0.000138955050045418, "loss": 3.0593550205230713, "step": 3491, "token_acc": 0.2938112223168654 }, { "epoch": 2.0469070653767223, "grad_norm": 0.3991683610115054, "learning_rate": 0.00013895388183096294, "loss": 3.077314853668213, "step": 3492, "token_acc": 0.2910727055396934 }, { "epoch": 2.0474934036939314, "grad_norm": 0.38816021283949714, "learning_rate": 0.0001389527129687791, "loss": 3.0493855476379395, "step": 3493, "token_acc": 0.2961974649766511 }, { "epoch": 2.0480797420111405, "grad_norm": 0.42515795112143534, "learning_rate": 0.00013895154345887738, "loss": 3.0880775451660156, "step": 3494, "token_acc": 0.28967441239727537 }, { "epoch": 2.0486660803283496, "grad_norm": 0.42041345230269767, "learning_rate": 0.00013895037330126887, "loss": 3.0557007789611816, "step": 3495, "token_acc": 0.2934895826602533 }, { "epoch": 2.0492524186455583, "grad_norm": 0.4691868311427592, "learning_rate": 0.0001389492024959645, "loss": 3.0429744720458984, "step": 3496, "token_acc": 0.29481594598939076 }, { "epoch": 2.0498387569627674, "grad_norm": 0.3376588575930607, "learning_rate": 0.00013894803104297528, "loss": 3.0879642963409424, "step": 3497, "token_acc": 0.28765841274437315 }, { "epoch": 2.0504250952799765, "grad_norm": 0.4276939921884808, "learning_rate": 0.0001389468589423122, "loss": 3.081231117248535, "step": 3498, "token_acc": 0.29084729635138684 }, { "epoch": 2.0510114335971856, "grad_norm": 0.34471830057852365, "learning_rate": 0.00013894568619398634, "loss": 3.0540566444396973, "step": 3499, "token_acc": 0.2948839623729773 }, { "epoch": 2.0515977719143947, "grad_norm": 0.44890928496200116, "learning_rate": 0.00013894451279800862, "loss": 3.0594706535339355, "step": 3500, "token_acc": 0.2926022335164329 }, { "epoch": 2.052184110231604, "grad_norm": 0.4702387472093667, "learning_rate": 0.0001389433387543901, "loss": 3.055852174758911, "step": 3501, "token_acc": 0.29325221179875166 }, { "epoch": 2.0527704485488125, "grad_norm": 0.42990029925615225, "learning_rate": 0.00013894216406314184, "loss": 3.025768756866455, "step": 3502, "token_acc": 0.29865615843184806 }, { "epoch": 2.0533567868660216, "grad_norm": 0.411579069803052, "learning_rate": 0.00013894098872427484, "loss": 3.0794122219085693, "step": 3503, "token_acc": 0.2900157058680918 }, { "epoch": 2.0539431251832307, "grad_norm": 0.467918842582373, "learning_rate": 0.00013893981273780016, "loss": 3.0619492530822754, "step": 3504, "token_acc": 0.29292926595435015 }, { "epoch": 2.05452946350044, "grad_norm": 0.37465442090327244, "learning_rate": 0.00013893863610372882, "loss": 3.0856080055236816, "step": 3505, "token_acc": 0.2868833446702267 }, { "epoch": 2.055115801817649, "grad_norm": 0.42102381953460777, "learning_rate": 0.00013893745882207192, "loss": 3.043754816055298, "step": 3506, "token_acc": 0.2942270251350819 }, { "epoch": 2.0557021401348576, "grad_norm": 0.44549311869150143, "learning_rate": 0.00013893628089284047, "loss": 3.0618090629577637, "step": 3507, "token_acc": 0.2946323880661832 }, { "epoch": 2.0562884784520667, "grad_norm": 0.35740773778443635, "learning_rate": 0.00013893510231604553, "loss": 3.076803684234619, "step": 3508, "token_acc": 0.29123609625390756 }, { "epoch": 2.056874816769276, "grad_norm": 0.3784400651413219, "learning_rate": 0.0001389339230916982, "loss": 3.0753490924835205, "step": 3509, "token_acc": 0.2883057486822945 }, { "epoch": 2.057461155086485, "grad_norm": 0.3849374552498772, "learning_rate": 0.0001389327432198096, "loss": 3.0726709365844727, "step": 3510, "token_acc": 0.290370829344351 }, { "epoch": 2.058047493403694, "grad_norm": 0.4404796770604643, "learning_rate": 0.00013893156270039072, "loss": 3.0375819206237793, "step": 3511, "token_acc": 0.2958033796066766 }, { "epoch": 2.0586338317209028, "grad_norm": 0.4730945604544182, "learning_rate": 0.00013893038153345273, "loss": 3.049166679382324, "step": 3512, "token_acc": 0.29502331408790894 }, { "epoch": 2.059220170038112, "grad_norm": 0.4133791358890913, "learning_rate": 0.00013892919971900664, "loss": 3.0344483852386475, "step": 3513, "token_acc": 0.2953296739563779 }, { "epoch": 2.059806508355321, "grad_norm": 0.34088059367146384, "learning_rate": 0.00013892801725706364, "loss": 3.0450730323791504, "step": 3514, "token_acc": 0.2947151358972599 }, { "epoch": 2.06039284667253, "grad_norm": 0.38428536695216325, "learning_rate": 0.0001389268341476348, "loss": 3.056018352508545, "step": 3515, "token_acc": 0.29324597148110665 }, { "epoch": 2.060979184989739, "grad_norm": 0.4255846095671945, "learning_rate": 0.0001389256503907312, "loss": 3.088204860687256, "step": 3516, "token_acc": 0.28800695127823955 }, { "epoch": 2.0615655233069483, "grad_norm": 0.37318286696068487, "learning_rate": 0.000138924465986364, "loss": 3.052006721496582, "step": 3517, "token_acc": 0.2948661738991391 }, { "epoch": 2.062151861624157, "grad_norm": 0.34174364000427443, "learning_rate": 0.00013892328093454437, "loss": 3.0661933422088623, "step": 3518, "token_acc": 0.2916311754684838 }, { "epoch": 2.062738199941366, "grad_norm": 0.40610053314771744, "learning_rate": 0.00013892209523528335, "loss": 3.0514962673187256, "step": 3519, "token_acc": 0.29424710948531 }, { "epoch": 2.063324538258575, "grad_norm": 0.4574651245519017, "learning_rate": 0.00013892090888859213, "loss": 3.06376051902771, "step": 3520, "token_acc": 0.2930837772372374 }, { "epoch": 2.0639108765757843, "grad_norm": 0.4051548129017675, "learning_rate": 0.00013891972189448182, "loss": 3.058706045150757, "step": 3521, "token_acc": 0.29427995245536015 }, { "epoch": 2.0644972148929934, "grad_norm": 0.38283659334037207, "learning_rate": 0.00013891853425296362, "loss": 3.0689632892608643, "step": 3522, "token_acc": 0.2925337913132399 }, { "epoch": 2.065083553210202, "grad_norm": 0.33950106588876433, "learning_rate": 0.00013891734596404865, "loss": 3.07800030708313, "step": 3523, "token_acc": 0.29127493783386715 }, { "epoch": 2.065669891527411, "grad_norm": 0.42440471424385023, "learning_rate": 0.0001389161570277481, "loss": 3.028881549835205, "step": 3524, "token_acc": 0.29673678407855625 }, { "epoch": 2.0662562298446203, "grad_norm": 0.3842773131311537, "learning_rate": 0.0001389149674440731, "loss": 3.116633892059326, "step": 3525, "token_acc": 0.28492784461357995 }, { "epoch": 2.0668425681618294, "grad_norm": 0.3430151755366685, "learning_rate": 0.00013891377721303485, "loss": 3.076627492904663, "step": 3526, "token_acc": 0.2900480995451456 }, { "epoch": 2.0674289064790385, "grad_norm": 0.4204628725896369, "learning_rate": 0.00013891258633464453, "loss": 3.044167995452881, "step": 3527, "token_acc": 0.29622921560408216 }, { "epoch": 2.068015244796247, "grad_norm": 0.40267316373095247, "learning_rate": 0.00013891139480891332, "loss": 3.044188976287842, "step": 3528, "token_acc": 0.2951207391352991 }, { "epoch": 2.0686015831134563, "grad_norm": 0.33559651782703215, "learning_rate": 0.0001389102026358524, "loss": 3.0739214420318604, "step": 3529, "token_acc": 0.2914874652100133 }, { "epoch": 2.0691879214306654, "grad_norm": 0.45584418670165666, "learning_rate": 0.000138909009815473, "loss": 3.0521011352539062, "step": 3530, "token_acc": 0.29478772717634333 }, { "epoch": 2.0697742597478745, "grad_norm": 0.46365594539469024, "learning_rate": 0.00013890781634778632, "loss": 3.0737452507019043, "step": 3531, "token_acc": 0.2908484948112482 }, { "epoch": 2.0703605980650837, "grad_norm": 0.37207110847329805, "learning_rate": 0.00013890662223280353, "loss": 3.102999210357666, "step": 3532, "token_acc": 0.2869288937898953 }, { "epoch": 2.0709469363822928, "grad_norm": 0.4525946802934254, "learning_rate": 0.00013890542747053587, "loss": 3.0277554988861084, "step": 3533, "token_acc": 0.2964807436918991 }, { "epoch": 2.0715332746995014, "grad_norm": 0.4092387580535566, "learning_rate": 0.0001389042320609946, "loss": 3.0771327018737793, "step": 3534, "token_acc": 0.29068526704757497 }, { "epoch": 2.0721196130167105, "grad_norm": 0.4273660003873795, "learning_rate": 0.0001389030360041909, "loss": 3.0531277656555176, "step": 3535, "token_acc": 0.29315080261465654 }, { "epoch": 2.0727059513339197, "grad_norm": 0.4092966684668221, "learning_rate": 0.00013890183930013607, "loss": 3.0588302612304688, "step": 3536, "token_acc": 0.2933185386442119 }, { "epoch": 2.0732922896511288, "grad_norm": 0.3346312341655145, "learning_rate": 0.00013890064194884127, "loss": 3.0411734580993652, "step": 3537, "token_acc": 0.29643031464822606 }, { "epoch": 2.073878627968338, "grad_norm": 0.41731637847588315, "learning_rate": 0.00013889944395031778, "loss": 3.0604910850524902, "step": 3538, "token_acc": 0.29336089695763906 }, { "epoch": 2.0744649662855466, "grad_norm": 0.4045197174314917, "learning_rate": 0.00013889824530457685, "loss": 3.050107002258301, "step": 3539, "token_acc": 0.29259017311371377 }, { "epoch": 2.0750513046027557, "grad_norm": 0.452924669036322, "learning_rate": 0.00013889704601162975, "loss": 3.071323871612549, "step": 3540, "token_acc": 0.29121733355903073 }, { "epoch": 2.0756376429199648, "grad_norm": 0.40431479652835034, "learning_rate": 0.00013889584607148776, "loss": 3.0575058460235596, "step": 3541, "token_acc": 0.2928217953493322 }, { "epoch": 2.076223981237174, "grad_norm": 0.4364179452223577, "learning_rate": 0.00013889464548416214, "loss": 3.0586159229278564, "step": 3542, "token_acc": 0.2939687374584521 }, { "epoch": 2.076810319554383, "grad_norm": 0.3698869493118724, "learning_rate": 0.00013889344424966414, "loss": 3.067042350769043, "step": 3543, "token_acc": 0.29363012837152547 }, { "epoch": 2.077396657871592, "grad_norm": 0.36602209007964237, "learning_rate": 0.00013889224236800508, "loss": 3.101879119873047, "step": 3544, "token_acc": 0.28768401161940854 }, { "epoch": 2.077982996188801, "grad_norm": 0.37952766460468174, "learning_rate": 0.00013889103983919621, "loss": 3.030776023864746, "step": 3545, "token_acc": 0.29583906054285053 }, { "epoch": 2.07856933450601, "grad_norm": 0.36148176497114115, "learning_rate": 0.00013888983666324889, "loss": 3.088855266571045, "step": 3546, "token_acc": 0.2876598640157119 }, { "epoch": 2.079155672823219, "grad_norm": 0.4510779278604473, "learning_rate": 0.00013888863284017438, "loss": 3.083803653717041, "step": 3547, "token_acc": 0.289877606550837 }, { "epoch": 2.079742011140428, "grad_norm": 0.3980043338887096, "learning_rate": 0.00013888742836998396, "loss": 3.0812692642211914, "step": 3548, "token_acc": 0.2904634740702101 }, { "epoch": 2.0803283494576372, "grad_norm": 0.3860855959875072, "learning_rate": 0.00013888622325268903, "loss": 3.092256546020508, "step": 3549, "token_acc": 0.2872811571042955 }, { "epoch": 2.080914687774846, "grad_norm": 0.3875731991675352, "learning_rate": 0.0001388850174883008, "loss": 3.0609793663024902, "step": 3550, "token_acc": 0.29324398172260124 }, { "epoch": 2.081501026092055, "grad_norm": 0.39500752505142206, "learning_rate": 0.0001388838110768307, "loss": 3.043837070465088, "step": 3551, "token_acc": 0.29330793306630043 }, { "epoch": 2.082087364409264, "grad_norm": 0.41043470812820104, "learning_rate": 0.00013888260401828998, "loss": 3.0730392932891846, "step": 3552, "token_acc": 0.2917727452014937 }, { "epoch": 2.0826737027264732, "grad_norm": 0.3507368869803403, "learning_rate": 0.00013888139631269004, "loss": 3.0573110580444336, "step": 3553, "token_acc": 0.29385295510762593 }, { "epoch": 2.0832600410436823, "grad_norm": 0.4006798736851808, "learning_rate": 0.0001388801879600422, "loss": 3.04017972946167, "step": 3554, "token_acc": 0.29484467702849504 }, { "epoch": 2.0838463793608915, "grad_norm": 0.452925579882886, "learning_rate": 0.0001388789789603578, "loss": 3.0513229370117188, "step": 3555, "token_acc": 0.29312111376168315 }, { "epoch": 2.0844327176781, "grad_norm": 0.3943787766630467, "learning_rate": 0.00013887776931364822, "loss": 3.056554079055786, "step": 3556, "token_acc": 0.29287224832547554 }, { "epoch": 2.0850190559953092, "grad_norm": 0.3915269367099127, "learning_rate": 0.0001388765590199248, "loss": 3.0723562240600586, "step": 3557, "token_acc": 0.2902428742997579 }, { "epoch": 2.0856053943125183, "grad_norm": 0.37404432548508443, "learning_rate": 0.00013887534807919893, "loss": 3.0584282875061035, "step": 3558, "token_acc": 0.2935394568209129 }, { "epoch": 2.0861917326297275, "grad_norm": 0.30856376439782496, "learning_rate": 0.00013887413649148197, "loss": 3.0505573749542236, "step": 3559, "token_acc": 0.2943578396359768 }, { "epoch": 2.0867780709469366, "grad_norm": 0.36798856932035745, "learning_rate": 0.00013887292425678532, "loss": 3.0731148719787598, "step": 3560, "token_acc": 0.29178892447566895 }, { "epoch": 2.0873644092641452, "grad_norm": 0.35350458537987217, "learning_rate": 0.00013887171137512034, "loss": 3.055309295654297, "step": 3561, "token_acc": 0.293104949690331 }, { "epoch": 2.0879507475813543, "grad_norm": 0.35907991658595895, "learning_rate": 0.00013887049784649843, "loss": 3.0864431858062744, "step": 3562, "token_acc": 0.28949481990965836 }, { "epoch": 2.0885370858985635, "grad_norm": 0.3387628504731505, "learning_rate": 0.000138869283670931, "loss": 3.070488691329956, "step": 3563, "token_acc": 0.28914147570812276 }, { "epoch": 2.0891234242157726, "grad_norm": 0.41085522690362625, "learning_rate": 0.00013886806884842945, "loss": 3.0603437423706055, "step": 3564, "token_acc": 0.2938226088801655 }, { "epoch": 2.0897097625329817, "grad_norm": 0.2784734052620729, "learning_rate": 0.0001388668533790052, "loss": 3.1336379051208496, "step": 3565, "token_acc": 0.28183984697004927 }, { "epoch": 2.0902961008501904, "grad_norm": 0.3657858610358495, "learning_rate": 0.0001388656372626697, "loss": 2.970426559448242, "step": 3566, "token_acc": 0.3057965256164051 }, { "epoch": 2.0908824391673995, "grad_norm": 0.33229922196910266, "learning_rate": 0.00013886442049943428, "loss": 3.077293872833252, "step": 3567, "token_acc": 0.2906531923163472 }, { "epoch": 2.0914687774846086, "grad_norm": 0.39200059285857153, "learning_rate": 0.00013886320308931045, "loss": 3.0790364742279053, "step": 3568, "token_acc": 0.2907560388999268 }, { "epoch": 2.0920551158018177, "grad_norm": 0.37369921209068635, "learning_rate": 0.00013886198503230962, "loss": 3.0396976470947266, "step": 3569, "token_acc": 0.29564506130763907 }, { "epoch": 2.092641454119027, "grad_norm": 0.3805989472191098, "learning_rate": 0.00013886076632844323, "loss": 3.043625831604004, "step": 3570, "token_acc": 0.2953660149441935 }, { "epoch": 2.093227792436236, "grad_norm": 0.3238231588133131, "learning_rate": 0.00013885954697772274, "loss": 3.0459446907043457, "step": 3571, "token_acc": 0.2949544744920946 }, { "epoch": 2.0938141307534446, "grad_norm": 0.4877732099431555, "learning_rate": 0.0001388583269801596, "loss": 3.0441641807556152, "step": 3572, "token_acc": 0.29641167202564084 }, { "epoch": 2.0944004690706537, "grad_norm": 0.5199488882369063, "learning_rate": 0.00013885710633576524, "loss": 3.035614013671875, "step": 3573, "token_acc": 0.29613103306188393 }, { "epoch": 2.094986807387863, "grad_norm": 0.46036855836612933, "learning_rate": 0.00013885588504455117, "loss": 3.057079315185547, "step": 3574, "token_acc": 0.29274486286830265 }, { "epoch": 2.095573145705072, "grad_norm": 0.33080412061328, "learning_rate": 0.00013885466310652883, "loss": 3.0364432334899902, "step": 3575, "token_acc": 0.2961586461319034 }, { "epoch": 2.096159484022281, "grad_norm": 0.45210945309584005, "learning_rate": 0.00013885344052170972, "loss": 3.0727055072784424, "step": 3576, "token_acc": 0.2908903854303351 }, { "epoch": 2.0967458223394897, "grad_norm": 0.3771424159078343, "learning_rate": 0.00013885221729010533, "loss": 3.049459457397461, "step": 3577, "token_acc": 0.2953231878678615 }, { "epoch": 2.097332160656699, "grad_norm": 0.3946934157881624, "learning_rate": 0.0001388509934117271, "loss": 3.0266995429992676, "step": 3578, "token_acc": 0.2987828547495735 }, { "epoch": 2.097918498973908, "grad_norm": 0.33273365398867966, "learning_rate": 0.0001388497688865866, "loss": 3.123490333557129, "step": 3579, "token_acc": 0.2817662395590508 }, { "epoch": 2.098504837291117, "grad_norm": 0.4174170306956088, "learning_rate": 0.0001388485437146953, "loss": 3.0044331550598145, "step": 3580, "token_acc": 0.3007205021199537 }, { "epoch": 2.099091175608326, "grad_norm": 0.3448999335467518, "learning_rate": 0.00013884731789606472, "loss": 3.0394911766052246, "step": 3581, "token_acc": 0.2958471955070383 }, { "epoch": 2.099677513925535, "grad_norm": 0.35329633195899907, "learning_rate": 0.00013884609143070633, "loss": 3.0720462799072266, "step": 3582, "token_acc": 0.2900113201444666 }, { "epoch": 2.100263852242744, "grad_norm": 0.37334592023000984, "learning_rate": 0.0001388448643186317, "loss": 3.078434467315674, "step": 3583, "token_acc": 0.2910762291575339 }, { "epoch": 2.100850190559953, "grad_norm": 0.3627473373863117, "learning_rate": 0.0001388436365598523, "loss": 3.04420804977417, "step": 3584, "token_acc": 0.2952784095695392 }, { "epoch": 2.101436528877162, "grad_norm": 0.3164468872263793, "learning_rate": 0.00013884240815437976, "loss": 3.0036802291870117, "step": 3585, "token_acc": 0.3006704446313341 }, { "epoch": 2.1020228671943713, "grad_norm": 0.3262969368433032, "learning_rate": 0.00013884117910222552, "loss": 3.0708227157592773, "step": 3586, "token_acc": 0.29111566108961867 }, { "epoch": 2.1026092055115804, "grad_norm": 0.3423508577097571, "learning_rate": 0.0001388399494034012, "loss": 3.086907386779785, "step": 3587, "token_acc": 0.2894343090339478 }, { "epoch": 2.103195543828789, "grad_norm": 0.40907763832314026, "learning_rate": 0.00013883871905791828, "loss": 3.0891640186309814, "step": 3588, "token_acc": 0.28894577958187906 }, { "epoch": 2.103781882145998, "grad_norm": 0.32224690229854824, "learning_rate": 0.00013883748806578839, "loss": 3.0678625106811523, "step": 3589, "token_acc": 0.29178688159455496 }, { "epoch": 2.1043682204632073, "grad_norm": 0.35566718818039156, "learning_rate": 0.00013883625642702304, "loss": 3.0766353607177734, "step": 3590, "token_acc": 0.2908577307632113 }, { "epoch": 2.1049545587804164, "grad_norm": 0.3967392343762941, "learning_rate": 0.0001388350241416338, "loss": 3.0980453491210938, "step": 3591, "token_acc": 0.28833701831964625 }, { "epoch": 2.1055408970976255, "grad_norm": 0.3504420694306796, "learning_rate": 0.0001388337912096323, "loss": 3.1171092987060547, "step": 3592, "token_acc": 0.28501722384694816 }, { "epoch": 2.106127235414834, "grad_norm": 0.37889231818891056, "learning_rate": 0.00013883255763103006, "loss": 3.0386791229248047, "step": 3593, "token_acc": 0.2972305138742019 }, { "epoch": 2.1067135737320433, "grad_norm": 0.43377059479628705, "learning_rate": 0.00013883132340583872, "loss": 3.067584991455078, "step": 3594, "token_acc": 0.2896499366925756 }, { "epoch": 2.1072999120492524, "grad_norm": 0.42918857678919764, "learning_rate": 0.00013883008853406986, "loss": 3.099790096282959, "step": 3595, "token_acc": 0.28827267630419096 }, { "epoch": 2.1078862503664615, "grad_norm": 0.32746334629217966, "learning_rate": 0.00013882885301573503, "loss": 3.083308458328247, "step": 3596, "token_acc": 0.29072784104154187 }, { "epoch": 2.1084725886836706, "grad_norm": 0.4151508781766218, "learning_rate": 0.00013882761685084588, "loss": 3.0627522468566895, "step": 3597, "token_acc": 0.292556027769712 }, { "epoch": 2.1090589270008797, "grad_norm": 0.4501716845944949, "learning_rate": 0.00013882638003941404, "loss": 3.057436466217041, "step": 3598, "token_acc": 0.2941305450472346 }, { "epoch": 2.1096452653180884, "grad_norm": 0.3919677735952834, "learning_rate": 0.00013882514258145107, "loss": 3.1192445755004883, "step": 3599, "token_acc": 0.28294378713564294 }, { "epoch": 2.1102316036352975, "grad_norm": 0.37019183032269365, "learning_rate": 0.00013882390447696866, "loss": 3.0782432556152344, "step": 3600, "token_acc": 0.29042322837241563 }, { "epoch": 2.1108179419525066, "grad_norm": 0.4771789753368112, "learning_rate": 0.0001388226657259784, "loss": 3.047024726867676, "step": 3601, "token_acc": 0.294799740764744 }, { "epoch": 2.1114042802697157, "grad_norm": 0.37605110120237084, "learning_rate": 0.00013882142632849192, "loss": 3.0196657180786133, "step": 3602, "token_acc": 0.2976590542579202 }, { "epoch": 2.111990618586925, "grad_norm": 0.41957170706842967, "learning_rate": 0.00013882018628452088, "loss": 3.052840232849121, "step": 3603, "token_acc": 0.2946973446557537 }, { "epoch": 2.1125769569041335, "grad_norm": 0.33889499844363175, "learning_rate": 0.00013881894559407694, "loss": 3.0038328170776367, "step": 3604, "token_acc": 0.30226127079942483 }, { "epoch": 2.1131632952213426, "grad_norm": 0.39923432764794514, "learning_rate": 0.00013881770425717174, "loss": 3.0949344635009766, "step": 3605, "token_acc": 0.29052818669236946 }, { "epoch": 2.1137496335385517, "grad_norm": 0.36667302462685836, "learning_rate": 0.00013881646227381693, "loss": 3.0374503135681152, "step": 3606, "token_acc": 0.29694623471035303 }, { "epoch": 2.114335971855761, "grad_norm": 0.37789056269355675, "learning_rate": 0.00013881521964402422, "loss": 3.0434722900390625, "step": 3607, "token_acc": 0.2940331985108957 }, { "epoch": 2.11492231017297, "grad_norm": 0.38962330669679957, "learning_rate": 0.0001388139763678052, "loss": 3.0545945167541504, "step": 3608, "token_acc": 0.2938835956178853 }, { "epoch": 2.115508648490179, "grad_norm": 0.3773396580271617, "learning_rate": 0.00013881273244517164, "loss": 3.053905487060547, "step": 3609, "token_acc": 0.2932949371318769 }, { "epoch": 2.1160949868073877, "grad_norm": 0.34250857421263803, "learning_rate": 0.00013881148787613516, "loss": 3.0920209884643555, "step": 3610, "token_acc": 0.2891248425362829 }, { "epoch": 2.116681325124597, "grad_norm": 0.3474107552382277, "learning_rate": 0.00013881024266070748, "loss": 3.084031581878662, "step": 3611, "token_acc": 0.28883255354446663 }, { "epoch": 2.117267663441806, "grad_norm": 0.39087542899953936, "learning_rate": 0.00013880899679890031, "loss": 3.050617218017578, "step": 3612, "token_acc": 0.29192745571469875 }, { "epoch": 2.117854001759015, "grad_norm": 0.3555552888370427, "learning_rate": 0.00013880775029072534, "loss": 3.0233449935913086, "step": 3613, "token_acc": 0.29731515296103944 }, { "epoch": 2.118440340076224, "grad_norm": 0.3914464117314933, "learning_rate": 0.00013880650313619425, "loss": 3.0774118900299072, "step": 3614, "token_acc": 0.29087339597697964 }, { "epoch": 2.119026678393433, "grad_norm": 0.32859215092179267, "learning_rate": 0.0001388052553353188, "loss": 3.030160903930664, "step": 3615, "token_acc": 0.29745642403913275 }, { "epoch": 2.119613016710642, "grad_norm": 0.3400075420358624, "learning_rate": 0.00013880400688811068, "loss": 3.0234766006469727, "step": 3616, "token_acc": 0.29710982036347383 }, { "epoch": 2.120199355027851, "grad_norm": 0.39377774699355317, "learning_rate": 0.00013880275779458163, "loss": 3.097604274749756, "step": 3617, "token_acc": 0.2869141039236479 }, { "epoch": 2.12078569334506, "grad_norm": 0.37940735004322373, "learning_rate": 0.0001388015080547434, "loss": 3.059603452682495, "step": 3618, "token_acc": 0.29475008275405495 }, { "epoch": 2.1213720316622693, "grad_norm": 0.3778553124002791, "learning_rate": 0.0001388002576686077, "loss": 3.0681915283203125, "step": 3619, "token_acc": 0.2903755456685635 }, { "epoch": 2.121958369979478, "grad_norm": 0.38972075395246414, "learning_rate": 0.00013879900663618628, "loss": 3.0619359016418457, "step": 3620, "token_acc": 0.29286590394823103 }, { "epoch": 2.122544708296687, "grad_norm": 0.4404951896061724, "learning_rate": 0.00013879775495749094, "loss": 3.0906691551208496, "step": 3621, "token_acc": 0.2884586809314568 }, { "epoch": 2.123131046613896, "grad_norm": 0.4491328320037228, "learning_rate": 0.00013879650263253336, "loss": 3.0378637313842773, "step": 3622, "token_acc": 0.2951317392948691 }, { "epoch": 2.1237173849311053, "grad_norm": 0.4992025779950859, "learning_rate": 0.00013879524966132535, "loss": 3.06771183013916, "step": 3623, "token_acc": 0.2925594456247561 }, { "epoch": 2.1243037232483144, "grad_norm": 0.4082898380301269, "learning_rate": 0.00013879399604387865, "loss": 3.0601770877838135, "step": 3624, "token_acc": 0.2923739113098148 }, { "epoch": 2.1248900615655235, "grad_norm": 0.3675700665200735, "learning_rate": 0.0001387927417802051, "loss": 3.037097930908203, "step": 3625, "token_acc": 0.29715589483792104 }, { "epoch": 2.125476399882732, "grad_norm": 0.4303759949350231, "learning_rate": 0.00013879148687031642, "loss": 3.0430657863616943, "step": 3626, "token_acc": 0.29609048753618733 }, { "epoch": 2.1260627381999413, "grad_norm": 0.3946235400757452, "learning_rate": 0.00013879023131422444, "loss": 2.9939348697662354, "step": 3627, "token_acc": 0.30232217244255866 }, { "epoch": 2.1266490765171504, "grad_norm": 0.42086315973020916, "learning_rate": 0.0001387889751119409, "loss": 3.0546250343322754, "step": 3628, "token_acc": 0.29454897402044067 }, { "epoch": 2.1272354148343595, "grad_norm": 0.40593444058890665, "learning_rate": 0.00013878771826347766, "loss": 3.0519537925720215, "step": 3629, "token_acc": 0.29307148665361094 }, { "epoch": 2.1278217531515686, "grad_norm": 0.43234312908781897, "learning_rate": 0.00013878646076884648, "loss": 3.070136547088623, "step": 3630, "token_acc": 0.29147284405623647 }, { "epoch": 2.1284080914687773, "grad_norm": 0.4504799345237179, "learning_rate": 0.00013878520262805918, "loss": 3.082275390625, "step": 3631, "token_acc": 0.28904291979949875 }, { "epoch": 2.1289944297859864, "grad_norm": 0.4011962312662107, "learning_rate": 0.0001387839438411276, "loss": 3.054649591445923, "step": 3632, "token_acc": 0.29241864266511775 }, { "epoch": 2.1295807681031955, "grad_norm": 0.41912516080667495, "learning_rate": 0.0001387826844080636, "loss": 3.0400443077087402, "step": 3633, "token_acc": 0.2942002936560174 }, { "epoch": 2.1301671064204046, "grad_norm": 0.34348948494630654, "learning_rate": 0.00013878142432887893, "loss": 3.041868209838867, "step": 3634, "token_acc": 0.2953584830086854 }, { "epoch": 2.1307534447376137, "grad_norm": 0.4035827395360975, "learning_rate": 0.00013878016360358545, "loss": 3.068477153778076, "step": 3635, "token_acc": 0.29235573274084814 }, { "epoch": 2.1313397830548224, "grad_norm": 0.4262363695095578, "learning_rate": 0.00013877890223219503, "loss": 3.067061424255371, "step": 3636, "token_acc": 0.29295252343588357 }, { "epoch": 2.1319261213720315, "grad_norm": 0.40681922164514644, "learning_rate": 0.0001387776402147195, "loss": 3.0702812671661377, "step": 3637, "token_acc": 0.29150909513447276 }, { "epoch": 2.1325124596892406, "grad_norm": 0.33436231298634655, "learning_rate": 0.00013877637755117073, "loss": 3.0855891704559326, "step": 3638, "token_acc": 0.29016843959974226 }, { "epoch": 2.1330987980064497, "grad_norm": 0.38711956761857064, "learning_rate": 0.00013877511424156057, "loss": 3.0739312171936035, "step": 3639, "token_acc": 0.2899624537424705 }, { "epoch": 2.133685136323659, "grad_norm": 0.4318123819826134, "learning_rate": 0.00013877385028590087, "loss": 3.10980224609375, "step": 3640, "token_acc": 0.2851005864783999 }, { "epoch": 2.134271474640868, "grad_norm": 0.3819295474651742, "learning_rate": 0.00013877258568420353, "loss": 3.073606014251709, "step": 3641, "token_acc": 0.29272549139673015 }, { "epoch": 2.1348578129580766, "grad_norm": 0.3909055834300277, "learning_rate": 0.00013877132043648043, "loss": 3.070145606994629, "step": 3642, "token_acc": 0.29150916769321267 }, { "epoch": 2.1354441512752858, "grad_norm": 0.4353780446853067, "learning_rate": 0.00013877005454274342, "loss": 3.088529109954834, "step": 3643, "token_acc": 0.28969861550104614 }, { "epoch": 2.136030489592495, "grad_norm": 0.40087679951274807, "learning_rate": 0.00013876878800300445, "loss": 3.0913333892822266, "step": 3644, "token_acc": 0.28927806279925095 }, { "epoch": 2.136616827909704, "grad_norm": 0.33669371253807756, "learning_rate": 0.00013876752081727536, "loss": 3.0919599533081055, "step": 3645, "token_acc": 0.2889062207364724 }, { "epoch": 2.137203166226913, "grad_norm": 0.35344715790764947, "learning_rate": 0.0001387662529855681, "loss": 3.006959915161133, "step": 3646, "token_acc": 0.2983681689472595 }, { "epoch": 2.1377895045441218, "grad_norm": 0.37274571488313335, "learning_rate": 0.00013876498450789452, "loss": 3.0951483249664307, "step": 3647, "token_acc": 0.2883967273370232 }, { "epoch": 2.138375842861331, "grad_norm": 0.3487507334823329, "learning_rate": 0.00013876371538426658, "loss": 3.0230093002319336, "step": 3648, "token_acc": 0.29741157946052194 }, { "epoch": 2.13896218117854, "grad_norm": 0.3633333053378516, "learning_rate": 0.00013876244561469622, "loss": 3.100297689437866, "step": 3649, "token_acc": 0.287270521698186 }, { "epoch": 2.139548519495749, "grad_norm": 0.38050448528690217, "learning_rate": 0.00013876117519919532, "loss": 3.0393474102020264, "step": 3650, "token_acc": 0.2964029405267673 }, { "epoch": 2.140134857812958, "grad_norm": 0.33489754719401343, "learning_rate": 0.00013875990413777584, "loss": 3.0547125339508057, "step": 3651, "token_acc": 0.29423085012409034 }, { "epoch": 2.1407211961301673, "grad_norm": 0.3864930676077191, "learning_rate": 0.00013875863243044973, "loss": 3.066652297973633, "step": 3652, "token_acc": 0.29185848902732253 }, { "epoch": 2.141307534447376, "grad_norm": 0.3344110178839375, "learning_rate": 0.0001387573600772289, "loss": 3.1205050945281982, "step": 3653, "token_acc": 0.2840130959797463 }, { "epoch": 2.141893872764585, "grad_norm": 0.3657241972698941, "learning_rate": 0.00013875608707812533, "loss": 3.0484561920166016, "step": 3654, "token_acc": 0.294231117460062 }, { "epoch": 2.142480211081794, "grad_norm": 0.38402585340674356, "learning_rate": 0.000138754813433151, "loss": 3.075462818145752, "step": 3655, "token_acc": 0.2906715912341361 }, { "epoch": 2.1430665493990033, "grad_norm": 0.3275827527592282, "learning_rate": 0.00013875353914231782, "loss": 3.018284797668457, "step": 3656, "token_acc": 0.29906496223885964 }, { "epoch": 2.1436528877162124, "grad_norm": 0.395919189019556, "learning_rate": 0.00013875226420563777, "loss": 3.027825355529785, "step": 3657, "token_acc": 0.2975526942069318 }, { "epoch": 2.144239226033421, "grad_norm": 0.3511033846613789, "learning_rate": 0.00013875098862312289, "loss": 3.080601453781128, "step": 3658, "token_acc": 0.28909791947003416 }, { "epoch": 2.14482556435063, "grad_norm": 0.37160672064283234, "learning_rate": 0.00013874971239478506, "loss": 3.0602476596832275, "step": 3659, "token_acc": 0.29364957251806606 }, { "epoch": 2.1454119026678393, "grad_norm": 0.32056967862860675, "learning_rate": 0.00013874843552063635, "loss": 3.0788612365722656, "step": 3660, "token_acc": 0.28874581072454514 }, { "epoch": 2.1459982409850484, "grad_norm": 0.3783432078255706, "learning_rate": 0.00013874715800068872, "loss": 3.0443053245544434, "step": 3661, "token_acc": 0.29407903194483576 }, { "epoch": 2.1465845793022575, "grad_norm": 0.32572597395113556, "learning_rate": 0.0001387458798349542, "loss": 3.067356586456299, "step": 3662, "token_acc": 0.29309579411633363 }, { "epoch": 2.1471709176194667, "grad_norm": 0.3840179338080964, "learning_rate": 0.00013874460102344477, "loss": 3.085653066635132, "step": 3663, "token_acc": 0.2903970112341814 }, { "epoch": 2.1477572559366753, "grad_norm": 0.32948715681376284, "learning_rate": 0.00013874332156617244, "loss": 3.034700870513916, "step": 3664, "token_acc": 0.2972501068061382 }, { "epoch": 2.1483435942538844, "grad_norm": 0.3002745315299916, "learning_rate": 0.00013874204146314923, "loss": 3.0876808166503906, "step": 3665, "token_acc": 0.2901335086225356 }, { "epoch": 2.1489299325710935, "grad_norm": 0.39374262948818484, "learning_rate": 0.00013874076071438717, "loss": 3.0260844230651855, "step": 3666, "token_acc": 0.29816531209981106 }, { "epoch": 2.1495162708883027, "grad_norm": 0.37591434430094156, "learning_rate": 0.0001387394793198983, "loss": 3.052907943725586, "step": 3667, "token_acc": 0.2931241990449627 }, { "epoch": 2.1501026092055118, "grad_norm": 0.33043674151675834, "learning_rate": 0.00013873819727969465, "loss": 3.0446677207946777, "step": 3668, "token_acc": 0.2953654346596772 }, { "epoch": 2.1506889475227204, "grad_norm": 0.37498769926723785, "learning_rate": 0.00013873691459378827, "loss": 3.0363340377807617, "step": 3669, "token_acc": 0.2967423272458442 }, { "epoch": 2.1512752858399296, "grad_norm": 0.3508183158161332, "learning_rate": 0.0001387356312621912, "loss": 3.1133742332458496, "step": 3670, "token_acc": 0.28428066007282493 }, { "epoch": 2.1518616241571387, "grad_norm": 0.3543527107334875, "learning_rate": 0.00013873434728491548, "loss": 3.0330026149749756, "step": 3671, "token_acc": 0.29579688322244163 }, { "epoch": 2.1524479624743478, "grad_norm": 0.33057145508591396, "learning_rate": 0.0001387330626619732, "loss": 3.0613560676574707, "step": 3672, "token_acc": 0.2916567618965294 }, { "epoch": 2.153034300791557, "grad_norm": 0.34355229517349395, "learning_rate": 0.00013873177739337644, "loss": 3.050032138824463, "step": 3673, "token_acc": 0.294324142084322 }, { "epoch": 2.1536206391087656, "grad_norm": 0.3492257763401718, "learning_rate": 0.0001387304914791372, "loss": 3.108842134475708, "step": 3674, "token_acc": 0.28639746291683893 }, { "epoch": 2.1542069774259747, "grad_norm": 0.33297352386959567, "learning_rate": 0.00013872920491926762, "loss": 3.0299482345581055, "step": 3675, "token_acc": 0.2987976436751925 }, { "epoch": 2.154793315743184, "grad_norm": 0.3469590089013452, "learning_rate": 0.0001387279177137798, "loss": 3.055520534515381, "step": 3676, "token_acc": 0.29350654177817426 }, { "epoch": 2.155379654060393, "grad_norm": 0.3532602767974127, "learning_rate": 0.00013872662986268578, "loss": 3.06115460395813, "step": 3677, "token_acc": 0.2918595383746523 }, { "epoch": 2.155965992377602, "grad_norm": 0.3190345846968025, "learning_rate": 0.0001387253413659977, "loss": 3.0101656913757324, "step": 3678, "token_acc": 0.29874629311997813 }, { "epoch": 2.1565523306948107, "grad_norm": 0.37120519446301753, "learning_rate": 0.00013872405222372766, "loss": 3.0963406562805176, "step": 3679, "token_acc": 0.289283651833917 }, { "epoch": 2.15713866901202, "grad_norm": 0.3933495833608431, "learning_rate": 0.0001387227624358877, "loss": 3.0724759101867676, "step": 3680, "token_acc": 0.29197194906119844 }, { "epoch": 2.157725007329229, "grad_norm": 0.43429109210816436, "learning_rate": 0.00013872147200249003, "loss": 3.0453054904937744, "step": 3681, "token_acc": 0.295247251064555 }, { "epoch": 2.158311345646438, "grad_norm": 0.33834917267168285, "learning_rate": 0.00013872018092354673, "loss": 3.0735626220703125, "step": 3682, "token_acc": 0.29215881972332025 }, { "epoch": 2.158897683963647, "grad_norm": 0.3977111416940941, "learning_rate": 0.00013871888919906992, "loss": 3.1411757469177246, "step": 3683, "token_acc": 0.2817891549959399 }, { "epoch": 2.1594840222808562, "grad_norm": 0.46326847758176, "learning_rate": 0.00013871759682907177, "loss": 3.069664239883423, "step": 3684, "token_acc": 0.29289964153938336 }, { "epoch": 2.160070360598065, "grad_norm": 0.3535593276211105, "learning_rate": 0.00013871630381356439, "loss": 3.066467046737671, "step": 3685, "token_acc": 0.29108191423116314 }, { "epoch": 2.160656698915274, "grad_norm": 0.43580055453221866, "learning_rate": 0.00013871501015255992, "loss": 3.0497055053710938, "step": 3686, "token_acc": 0.2943328299480011 }, { "epoch": 2.161243037232483, "grad_norm": 0.4267797503908854, "learning_rate": 0.00013871371584607052, "loss": 3.049644947052002, "step": 3687, "token_acc": 0.2936733192970532 }, { "epoch": 2.1618293755496922, "grad_norm": 0.4167482931096908, "learning_rate": 0.0001387124208941084, "loss": 3.0985755920410156, "step": 3688, "token_acc": 0.28847724375853884 }, { "epoch": 2.1624157138669013, "grad_norm": 0.421703405790752, "learning_rate": 0.00013871112529668562, "loss": 3.0653538703918457, "step": 3689, "token_acc": 0.2905989746025681 }, { "epoch": 2.16300205218411, "grad_norm": 0.45062132450792347, "learning_rate": 0.00013870982905381444, "loss": 3.0211079120635986, "step": 3690, "token_acc": 0.2978757407921695 }, { "epoch": 2.163588390501319, "grad_norm": 0.3510442601763075, "learning_rate": 0.00013870853216550697, "loss": 3.0555906295776367, "step": 3691, "token_acc": 0.2955308475924564 }, { "epoch": 2.1641747288185282, "grad_norm": 0.36212792654951687, "learning_rate": 0.00013870723463177547, "loss": 3.085108995437622, "step": 3692, "token_acc": 0.28803588008665865 }, { "epoch": 2.1647610671357373, "grad_norm": 0.3353144039638235, "learning_rate": 0.00013870593645263203, "loss": 3.0589964389801025, "step": 3693, "token_acc": 0.2928838264015415 }, { "epoch": 2.1653474054529465, "grad_norm": 0.3944769159378328, "learning_rate": 0.00013870463762808894, "loss": 3.081333875656128, "step": 3694, "token_acc": 0.2892041853667369 }, { "epoch": 2.1659337437701556, "grad_norm": 0.38905277377201836, "learning_rate": 0.00013870333815815835, "loss": 3.085146427154541, "step": 3695, "token_acc": 0.29007323442682437 }, { "epoch": 2.1665200820873642, "grad_norm": 0.3540205292815972, "learning_rate": 0.0001387020380428525, "loss": 3.0569205284118652, "step": 3696, "token_acc": 0.2916895868682095 }, { "epoch": 2.1671064204045734, "grad_norm": 0.3807583869151187, "learning_rate": 0.00013870073728218353, "loss": 3.0659291744232178, "step": 3697, "token_acc": 0.29133554590652094 }, { "epoch": 2.1676927587217825, "grad_norm": 0.37312172865874976, "learning_rate": 0.00013869943587616374, "loss": 3.0327696800231934, "step": 3698, "token_acc": 0.29609208543366416 }, { "epoch": 2.1682790970389916, "grad_norm": 0.3548253832132521, "learning_rate": 0.00013869813382480533, "loss": 3.032527208328247, "step": 3699, "token_acc": 0.2986480769679966 }, { "epoch": 2.1688654353562007, "grad_norm": 0.4321092988735974, "learning_rate": 0.0001386968311281205, "loss": 3.0885863304138184, "step": 3700, "token_acc": 0.28938733016170054 }, { "epoch": 2.1694517736734094, "grad_norm": 0.344140991691171, "learning_rate": 0.00013869552778612154, "loss": 3.0269875526428223, "step": 3701, "token_acc": 0.2985170113688803 }, { "epoch": 2.1700381119906185, "grad_norm": 0.3870202579124156, "learning_rate": 0.00013869422379882065, "loss": 3.0187439918518066, "step": 3702, "token_acc": 0.29769086374038933 }, { "epoch": 2.1706244503078276, "grad_norm": 0.4179396836032735, "learning_rate": 0.0001386929191662301, "loss": 3.0423059463500977, "step": 3703, "token_acc": 0.29574282595093293 }, { "epoch": 2.1712107886250367, "grad_norm": 0.3295170274460673, "learning_rate": 0.00013869161388836213, "loss": 3.089437961578369, "step": 3704, "token_acc": 0.2878570626033268 }, { "epoch": 2.171797126942246, "grad_norm": 0.35577094804729825, "learning_rate": 0.00013869030796522902, "loss": 3.0802865028381348, "step": 3705, "token_acc": 0.29013122653135004 }, { "epoch": 2.172383465259455, "grad_norm": 0.3543273750504757, "learning_rate": 0.000138689001396843, "loss": 3.0879061222076416, "step": 3706, "token_acc": 0.2902451620743081 }, { "epoch": 2.1729698035766636, "grad_norm": 0.3756799407650146, "learning_rate": 0.0001386876941832164, "loss": 3.043858051300049, "step": 3707, "token_acc": 0.29578152848461425 }, { "epoch": 2.1735561418938727, "grad_norm": 0.3765999992318311, "learning_rate": 0.00013868638632436148, "loss": 3.061802864074707, "step": 3708, "token_acc": 0.2922717184099537 }, { "epoch": 2.174142480211082, "grad_norm": 0.3715973833120276, "learning_rate": 0.00013868507782029049, "loss": 3.0692548751831055, "step": 3709, "token_acc": 0.29245102748038776 }, { "epoch": 2.174728818528291, "grad_norm": 0.3814896201935661, "learning_rate": 0.00013868376867101577, "loss": 3.042123317718506, "step": 3710, "token_acc": 0.2942606733158249 }, { "epoch": 2.1753151568455, "grad_norm": 0.3967303330174871, "learning_rate": 0.0001386824588765496, "loss": 3.0813355445861816, "step": 3711, "token_acc": 0.29048891032580915 }, { "epoch": 2.1759014951627087, "grad_norm": 0.35923419579956684, "learning_rate": 0.00013868114843690424, "loss": 3.1104512214660645, "step": 3712, "token_acc": 0.286604014470948 }, { "epoch": 2.176487833479918, "grad_norm": 0.30757260834184413, "learning_rate": 0.00013867983735209207, "loss": 3.049495220184326, "step": 3713, "token_acc": 0.29556019413745854 }, { "epoch": 2.177074171797127, "grad_norm": 0.33801634603338193, "learning_rate": 0.00013867852562212538, "loss": 3.0556013584136963, "step": 3714, "token_acc": 0.2951947197433894 }, { "epoch": 2.177660510114336, "grad_norm": 0.36698135234241874, "learning_rate": 0.00013867721324701648, "loss": 3.056035041809082, "step": 3715, "token_acc": 0.2917627576678079 }, { "epoch": 2.178246848431545, "grad_norm": 0.4645937714476885, "learning_rate": 0.0001386759002267777, "loss": 3.0827581882476807, "step": 3716, "token_acc": 0.29038527802903047 }, { "epoch": 2.1788331867487543, "grad_norm": 0.4618781778891149, "learning_rate": 0.00013867458656142138, "loss": 3.0646395683288574, "step": 3717, "token_acc": 0.29132272786752156 }, { "epoch": 2.179419525065963, "grad_norm": 0.35099768923673663, "learning_rate": 0.00013867327225095986, "loss": 3.0314223766326904, "step": 3718, "token_acc": 0.29740863679962065 }, { "epoch": 2.180005863383172, "grad_norm": 0.32948210480712653, "learning_rate": 0.0001386719572954055, "loss": 3.0496826171875, "step": 3719, "token_acc": 0.2968249466973354 }, { "epoch": 2.180592201700381, "grad_norm": 0.37304514387205356, "learning_rate": 0.00013867064169477062, "loss": 3.0601322650909424, "step": 3720, "token_acc": 0.2912145168493548 }, { "epoch": 2.1811785400175903, "grad_norm": 0.3463072682259301, "learning_rate": 0.0001386693254490676, "loss": 3.0533735752105713, "step": 3721, "token_acc": 0.29294265823310406 }, { "epoch": 2.1817648783347994, "grad_norm": 0.31209225171639965, "learning_rate": 0.00013866800855830881, "loss": 3.0868101119995117, "step": 3722, "token_acc": 0.2904257002690299 }, { "epoch": 2.182351216652008, "grad_norm": 0.3312393845306281, "learning_rate": 0.0001386666910225066, "loss": 3.1240670680999756, "step": 3723, "token_acc": 0.2852433199737349 }, { "epoch": 2.182937554969217, "grad_norm": 0.4087886688339445, "learning_rate": 0.00013866537284167336, "loss": 3.076992988586426, "step": 3724, "token_acc": 0.2910729985434409 }, { "epoch": 2.1835238932864263, "grad_norm": 0.4687490233916709, "learning_rate": 0.00013866405401582145, "loss": 3.0642905235290527, "step": 3725, "token_acc": 0.2942508627933894 }, { "epoch": 2.1841102316036354, "grad_norm": 0.3552835404382854, "learning_rate": 0.0001386627345449633, "loss": 3.087520122528076, "step": 3726, "token_acc": 0.28893885173554235 }, { "epoch": 2.1846965699208445, "grad_norm": 0.3991863088209864, "learning_rate": 0.0001386614144291113, "loss": 3.087493419647217, "step": 3727, "token_acc": 0.28767080117338406 }, { "epoch": 2.185282908238053, "grad_norm": 0.3898983406995071, "learning_rate": 0.0001386600936682778, "loss": 3.0735762119293213, "step": 3728, "token_acc": 0.29093964826114865 }, { "epoch": 2.1858692465552623, "grad_norm": 0.40206861132769756, "learning_rate": 0.00013865877226247527, "loss": 3.095548629760742, "step": 3729, "token_acc": 0.2870144700738571 }, { "epoch": 2.1864555848724714, "grad_norm": 0.34366341482979906, "learning_rate": 0.0001386574502117161, "loss": 3.0830752849578857, "step": 3730, "token_acc": 0.2895688093784407 }, { "epoch": 2.1870419231896805, "grad_norm": 0.41961794331987, "learning_rate": 0.00013865612751601266, "loss": 3.0800833702087402, "step": 3731, "token_acc": 0.293000142140194 }, { "epoch": 2.1876282615068896, "grad_norm": 0.37030852390643393, "learning_rate": 0.00013865480417537743, "loss": 3.079542636871338, "step": 3732, "token_acc": 0.28971192783998323 }, { "epoch": 2.1882145998240983, "grad_norm": 0.4036872442160519, "learning_rate": 0.00013865348018982283, "loss": 3.060689926147461, "step": 3733, "token_acc": 0.29304401346467046 }, { "epoch": 2.1888009381413074, "grad_norm": 0.3725636356502434, "learning_rate": 0.00013865215555936133, "loss": 3.0625882148742676, "step": 3734, "token_acc": 0.2935405476843642 }, { "epoch": 2.1893872764585165, "grad_norm": 0.4327971640345311, "learning_rate": 0.0001386508302840053, "loss": 3.125330924987793, "step": 3735, "token_acc": 0.2839445367564527 }, { "epoch": 2.1899736147757256, "grad_norm": 0.3450048080568765, "learning_rate": 0.00013864950436376724, "loss": 3.0916874408721924, "step": 3736, "token_acc": 0.2909560544428036 }, { "epoch": 2.1905599530929347, "grad_norm": 0.4171269872296327, "learning_rate": 0.0001386481777986596, "loss": 3.073681116104126, "step": 3737, "token_acc": 0.29043564699882735 }, { "epoch": 2.191146291410144, "grad_norm": 0.41354734320136044, "learning_rate": 0.00013864685058869483, "loss": 3.051309108734131, "step": 3738, "token_acc": 0.29414511456133463 }, { "epoch": 2.1917326297273525, "grad_norm": 0.4149795160748362, "learning_rate": 0.00013864552273388538, "loss": 3.11872935295105, "step": 3739, "token_acc": 0.2837969252053957 }, { "epoch": 2.1923189680445616, "grad_norm": 0.40006614351374076, "learning_rate": 0.00013864419423424376, "loss": 3.099200487136841, "step": 3740, "token_acc": 0.28900737295264484 }, { "epoch": 2.1929053063617707, "grad_norm": 0.3304117022601082, "learning_rate": 0.00013864286508978243, "loss": 3.0832107067108154, "step": 3741, "token_acc": 0.29107287645927415 }, { "epoch": 2.19349164467898, "grad_norm": 0.3967139123174787, "learning_rate": 0.0001386415353005139, "loss": 3.075504779815674, "step": 3742, "token_acc": 0.2906743215250833 }, { "epoch": 2.194077982996189, "grad_norm": 0.3955180790135535, "learning_rate": 0.00013864020486645061, "loss": 3.087817907333374, "step": 3743, "token_acc": 0.2895387234920799 }, { "epoch": 2.1946643213133976, "grad_norm": 0.3786986022680428, "learning_rate": 0.00013863887378760513, "loss": 3.067044496536255, "step": 3744, "token_acc": 0.29155823321806706 }, { "epoch": 2.1952506596306067, "grad_norm": 0.42917368254151966, "learning_rate": 0.0001386375420639899, "loss": 3.091047763824463, "step": 3745, "token_acc": 0.2883813733294231 }, { "epoch": 2.195836997947816, "grad_norm": 0.3661432374750033, "learning_rate": 0.00013863620969561746, "loss": 3.1084158420562744, "step": 3746, "token_acc": 0.28657014222858895 }, { "epoch": 2.196423336265025, "grad_norm": 0.41026014686501694, "learning_rate": 0.00013863487668250028, "loss": 3.097309112548828, "step": 3747, "token_acc": 0.2882603008502289 }, { "epoch": 2.197009674582234, "grad_norm": 0.35200472358163115, "learning_rate": 0.00013863354302465097, "loss": 3.1072463989257812, "step": 3748, "token_acc": 0.28545067013854253 }, { "epoch": 2.197596012899443, "grad_norm": 0.3842943663948009, "learning_rate": 0.000138632208722082, "loss": 3.086787223815918, "step": 3749, "token_acc": 0.2899879663056558 }, { "epoch": 2.198182351216652, "grad_norm": 0.40559178945964, "learning_rate": 0.00013863087377480587, "loss": 3.1021575927734375, "step": 3750, "token_acc": 0.2876891923810838 }, { "epoch": 2.198768689533861, "grad_norm": 0.32600158488324765, "learning_rate": 0.00013862953818283521, "loss": 3.035088062286377, "step": 3751, "token_acc": 0.2969977958904364 }, { "epoch": 2.19935502785107, "grad_norm": 0.41950201319552866, "learning_rate": 0.0001386282019461825, "loss": 3.009361505508423, "step": 3752, "token_acc": 0.29905385561140896 }, { "epoch": 2.199941366168279, "grad_norm": 0.41036158603621264, "learning_rate": 0.0001386268650648603, "loss": 3.0970406532287598, "step": 3753, "token_acc": 0.28790319472812054 }, { "epoch": 2.2005277044854883, "grad_norm": 0.42631355562131457, "learning_rate": 0.0001386255275388812, "loss": 3.0976290702819824, "step": 3754, "token_acc": 0.28765802934411766 }, { "epoch": 2.201114042802697, "grad_norm": 0.33669948795740023, "learning_rate": 0.00013862418936825773, "loss": 3.024677276611328, "step": 3755, "token_acc": 0.29783821174522596 }, { "epoch": 2.201700381119906, "grad_norm": 0.4239687563564588, "learning_rate": 0.0001386228505530025, "loss": 3.0452051162719727, "step": 3756, "token_acc": 0.29310620430955475 }, { "epoch": 2.202286719437115, "grad_norm": 0.35681349673147544, "learning_rate": 0.000138621511093128, "loss": 3.065423011779785, "step": 3757, "token_acc": 0.292034497413203 }, { "epoch": 2.2028730577543243, "grad_norm": 0.4468831781078109, "learning_rate": 0.00013862017098864693, "loss": 3.0564935207366943, "step": 3758, "token_acc": 0.2935169793398243 }, { "epoch": 2.2034593960715334, "grad_norm": 0.3804211722185984, "learning_rate": 0.0001386188302395718, "loss": 3.0392606258392334, "step": 3759, "token_acc": 0.296826453617399 }, { "epoch": 2.2040457343887425, "grad_norm": 0.3595889514401457, "learning_rate": 0.00013861748884591522, "loss": 3.0608508586883545, "step": 3760, "token_acc": 0.2949711169896885 }, { "epoch": 2.204632072705951, "grad_norm": 0.34024012274823767, "learning_rate": 0.0001386161468076898, "loss": 3.032320737838745, "step": 3761, "token_acc": 0.2976641397230184 }, { "epoch": 2.2052184110231603, "grad_norm": 0.3707747622958935, "learning_rate": 0.00013861480412490814, "loss": 3.0459506511688232, "step": 3762, "token_acc": 0.29445863600929 }, { "epoch": 2.2058047493403694, "grad_norm": 0.4134574340058096, "learning_rate": 0.00013861346079758284, "loss": 3.0669643878936768, "step": 3763, "token_acc": 0.29578423548190225 }, { "epoch": 2.2063910876575785, "grad_norm": 0.3827924929971751, "learning_rate": 0.00013861211682572656, "loss": 3.0904345512390137, "step": 3764, "token_acc": 0.2895340594678232 }, { "epoch": 2.2069774259747876, "grad_norm": 0.3975125811324629, "learning_rate": 0.0001386107722093519, "loss": 3.062269687652588, "step": 3765, "token_acc": 0.29238909892798004 }, { "epoch": 2.2075637642919963, "grad_norm": 0.37070096987124396, "learning_rate": 0.00013860942694847146, "loss": 3.096532106399536, "step": 3766, "token_acc": 0.2885560858233541 }, { "epoch": 2.2081501026092054, "grad_norm": 0.3733632937318072, "learning_rate": 0.00013860808104309793, "loss": 3.0766539573669434, "step": 3767, "token_acc": 0.291986503811225 }, { "epoch": 2.2087364409264145, "grad_norm": 0.3861482177380816, "learning_rate": 0.00013860673449324392, "loss": 3.0388424396514893, "step": 3768, "token_acc": 0.29634862735533946 }, { "epoch": 2.2093227792436236, "grad_norm": 0.3328788152812705, "learning_rate": 0.0001386053872989221, "loss": 3.049903631210327, "step": 3769, "token_acc": 0.29628138347213584 }, { "epoch": 2.2099091175608327, "grad_norm": 0.3665032437250928, "learning_rate": 0.0001386040394601451, "loss": 3.0990562438964844, "step": 3770, "token_acc": 0.2877302443901239 }, { "epoch": 2.210495455878042, "grad_norm": 0.3935427250092516, "learning_rate": 0.00013860269097692562, "loss": 3.042750120162964, "step": 3771, "token_acc": 0.2950490217941688 }, { "epoch": 2.2110817941952505, "grad_norm": 0.3790368450262934, "learning_rate": 0.00013860134184927626, "loss": 3.087726593017578, "step": 3772, "token_acc": 0.2890953251907334 }, { "epoch": 2.2116681325124596, "grad_norm": 0.3329180179693466, "learning_rate": 0.00013859999207720976, "loss": 3.066281795501709, "step": 3773, "token_acc": 0.292274211671706 }, { "epoch": 2.2122544708296688, "grad_norm": 0.36837434853005435, "learning_rate": 0.00013859864166073876, "loss": 3.0711097717285156, "step": 3774, "token_acc": 0.29330375526361235 }, { "epoch": 2.212840809146878, "grad_norm": 0.34550041619504096, "learning_rate": 0.00013859729059987596, "loss": 3.068091869354248, "step": 3775, "token_acc": 0.2911572429221351 }, { "epoch": 2.213427147464087, "grad_norm": 0.34686041244132104, "learning_rate": 0.0001385959388946341, "loss": 3.0785999298095703, "step": 3776, "token_acc": 0.290350476396988 }, { "epoch": 2.2140134857812956, "grad_norm": 0.4427015523386205, "learning_rate": 0.00013859458654502576, "loss": 3.0862464904785156, "step": 3777, "token_acc": 0.2881395686791404 }, { "epoch": 2.2145998240985048, "grad_norm": 0.3380676468191562, "learning_rate": 0.00013859323355106372, "loss": 3.0615451335906982, "step": 3778, "token_acc": 0.29317260934898676 }, { "epoch": 2.215186162415714, "grad_norm": 0.32874635886861964, "learning_rate": 0.0001385918799127607, "loss": 3.0828118324279785, "step": 3779, "token_acc": 0.291167430230768 }, { "epoch": 2.215772500732923, "grad_norm": 0.35866249491572844, "learning_rate": 0.00013859052563012941, "loss": 3.0996599197387695, "step": 3780, "token_acc": 0.28767575704820325 }, { "epoch": 2.216358839050132, "grad_norm": 0.3356062617591622, "learning_rate": 0.00013858917070318254, "loss": 3.096961498260498, "step": 3781, "token_acc": 0.287208802327052 }, { "epoch": 2.2169451773673408, "grad_norm": 0.2866381658289626, "learning_rate": 0.00013858781513193287, "loss": 3.034224033355713, "step": 3782, "token_acc": 0.2949192544990444 }, { "epoch": 2.21753151568455, "grad_norm": 0.3422182396837577, "learning_rate": 0.00013858645891639306, "loss": 3.080829620361328, "step": 3783, "token_acc": 0.2900252162036785 }, { "epoch": 2.218117854001759, "grad_norm": 0.33660444229786707, "learning_rate": 0.00013858510205657588, "loss": 3.023237705230713, "step": 3784, "token_acc": 0.29838801844542784 }, { "epoch": 2.218704192318968, "grad_norm": 0.39360677680222356, "learning_rate": 0.00013858374455249413, "loss": 3.0547568798065186, "step": 3785, "token_acc": 0.29511132685257335 }, { "epoch": 2.219290530636177, "grad_norm": 0.36827221552212785, "learning_rate": 0.00013858238640416048, "loss": 3.0956835746765137, "step": 3786, "token_acc": 0.2875418816002425 }, { "epoch": 2.219876868953386, "grad_norm": 0.3436965245656828, "learning_rate": 0.00013858102761158775, "loss": 3.0983381271362305, "step": 3787, "token_acc": 0.2858509422076356 }, { "epoch": 2.220463207270595, "grad_norm": 0.3689248687425219, "learning_rate": 0.0001385796681747887, "loss": 3.0596258640289307, "step": 3788, "token_acc": 0.29293988195418114 }, { "epoch": 2.221049545587804, "grad_norm": 0.3554970149908357, "learning_rate": 0.00013857830809377604, "loss": 3.0135722160339355, "step": 3789, "token_acc": 0.29909582193842166 }, { "epoch": 2.221635883905013, "grad_norm": 0.3805101502623777, "learning_rate": 0.00013857694736856257, "loss": 3.064178466796875, "step": 3790, "token_acc": 0.29190498273818727 }, { "epoch": 2.2222222222222223, "grad_norm": 0.3787794242671034, "learning_rate": 0.0001385755859991611, "loss": 3.0673046112060547, "step": 3791, "token_acc": 0.29237323109282853 }, { "epoch": 2.2228085605394314, "grad_norm": 0.4147574363674826, "learning_rate": 0.00013857422398558443, "loss": 3.07576322555542, "step": 3792, "token_acc": 0.2910539248632925 }, { "epoch": 2.22339489885664, "grad_norm": 0.3503271229218247, "learning_rate": 0.00013857286132784534, "loss": 3.0468931198120117, "step": 3793, "token_acc": 0.2936412116479316 }, { "epoch": 2.223981237173849, "grad_norm": 0.3423016308594863, "learning_rate": 0.0001385714980259566, "loss": 3.043551445007324, "step": 3794, "token_acc": 0.29536154241155327 }, { "epoch": 2.2245675754910583, "grad_norm": 0.37258745494018086, "learning_rate": 0.00013857013407993105, "loss": 3.0831363201141357, "step": 3795, "token_acc": 0.2908149930779283 }, { "epoch": 2.2251539138082674, "grad_norm": 0.3218087826637603, "learning_rate": 0.00013856876948978146, "loss": 3.094085216522217, "step": 3796, "token_acc": 0.28828291987823046 }, { "epoch": 2.2257402521254765, "grad_norm": 0.42376246305839144, "learning_rate": 0.00013856740425552072, "loss": 3.100907802581787, "step": 3797, "token_acc": 0.2863208623837981 }, { "epoch": 2.226326590442685, "grad_norm": 0.3653197680591713, "learning_rate": 0.00013856603837716157, "loss": 3.112792730331421, "step": 3798, "token_acc": 0.28421200187256507 }, { "epoch": 2.2269129287598943, "grad_norm": 0.3814786710666813, "learning_rate": 0.00013856467185471692, "loss": 3.0908679962158203, "step": 3799, "token_acc": 0.28907967837651527 }, { "epoch": 2.2274992670771034, "grad_norm": 0.3453140554524636, "learning_rate": 0.00013856330468819955, "loss": 3.0593624114990234, "step": 3800, "token_acc": 0.29216386284833695 }, { "epoch": 2.2280856053943126, "grad_norm": 0.31385205146097345, "learning_rate": 0.00013856193687762232, "loss": 3.0305235385894775, "step": 3801, "token_acc": 0.2981718995738585 }, { "epoch": 2.2286719437115217, "grad_norm": 0.3659994272375126, "learning_rate": 0.0001385605684229981, "loss": 3.0897340774536133, "step": 3802, "token_acc": 0.28929190256612264 }, { "epoch": 2.2292582820287308, "grad_norm": 0.30963311315338893, "learning_rate": 0.0001385591993243397, "loss": 3.054443597793579, "step": 3803, "token_acc": 0.29312993051564706 }, { "epoch": 2.2298446203459394, "grad_norm": 0.3463074572278685, "learning_rate": 0.00013855782958166005, "loss": 3.0653886795043945, "step": 3804, "token_acc": 0.2917200269782454 }, { "epoch": 2.2304309586631486, "grad_norm": 0.34667633162718664, "learning_rate": 0.00013855645919497192, "loss": 3.0768229961395264, "step": 3805, "token_acc": 0.2915734249270794 }, { "epoch": 2.2310172969803577, "grad_norm": 0.33314599253969146, "learning_rate": 0.00013855508816428827, "loss": 3.0449976921081543, "step": 3806, "token_acc": 0.2950463091015041 }, { "epoch": 2.231603635297567, "grad_norm": 0.34785222809156313, "learning_rate": 0.00013855371648962192, "loss": 3.125851631164551, "step": 3807, "token_acc": 0.284079295875442 }, { "epoch": 2.232189973614776, "grad_norm": 0.37300313620739417, "learning_rate": 0.0001385523441709858, "loss": 3.060353994369507, "step": 3808, "token_acc": 0.29168526977367176 }, { "epoch": 2.2327763119319846, "grad_norm": 0.3257428815474043, "learning_rate": 0.00013855097120839277, "loss": 3.0714316368103027, "step": 3809, "token_acc": 0.29021098976499515 }, { "epoch": 2.2333626502491937, "grad_norm": 0.3591617873836489, "learning_rate": 0.00013854959760185577, "loss": 3.0790562629699707, "step": 3810, "token_acc": 0.2906487716918742 }, { "epoch": 2.233948988566403, "grad_norm": 0.36750578779158793, "learning_rate": 0.00013854822335138765, "loss": 3.053553581237793, "step": 3811, "token_acc": 0.293160701344496 }, { "epoch": 2.234535326883612, "grad_norm": 0.354244645765748, "learning_rate": 0.00013854684845700135, "loss": 3.044809341430664, "step": 3812, "token_acc": 0.29472120966653165 }, { "epoch": 2.235121665200821, "grad_norm": 0.3537797132358028, "learning_rate": 0.00013854547291870976, "loss": 3.1144967079162598, "step": 3813, "token_acc": 0.28588642402521164 }, { "epoch": 2.23570800351803, "grad_norm": 0.34587562793021065, "learning_rate": 0.00013854409673652585, "loss": 3.0583996772766113, "step": 3814, "token_acc": 0.2921480357488666 }, { "epoch": 2.236294341835239, "grad_norm": 0.27271027611091136, "learning_rate": 0.00013854271991046248, "loss": 3.108921766281128, "step": 3815, "token_acc": 0.28559805766750174 }, { "epoch": 2.236880680152448, "grad_norm": 0.36674670409614357, "learning_rate": 0.00013854134244053264, "loss": 3.035670757293701, "step": 3816, "token_acc": 0.2959816894867647 }, { "epoch": 2.237467018469657, "grad_norm": 0.35812017812214175, "learning_rate": 0.00013853996432674923, "loss": 3.0336666107177734, "step": 3817, "token_acc": 0.29797116441456 }, { "epoch": 2.238053356786866, "grad_norm": 0.306802967751694, "learning_rate": 0.0001385385855691252, "loss": 3.0306594371795654, "step": 3818, "token_acc": 0.2985627244938917 }, { "epoch": 2.2386396951040752, "grad_norm": 0.3605161758675863, "learning_rate": 0.00013853720616767358, "loss": 3.050117015838623, "step": 3819, "token_acc": 0.2942519705658994 }, { "epoch": 2.239226033421284, "grad_norm": 0.3523337502953437, "learning_rate": 0.0001385358261224072, "loss": 3.0416762828826904, "step": 3820, "token_acc": 0.29354767315696 }, { "epoch": 2.239812371738493, "grad_norm": 0.35480552714582975, "learning_rate": 0.0001385344454333391, "loss": 3.0634946823120117, "step": 3821, "token_acc": 0.2935524139277311 }, { "epoch": 2.240398710055702, "grad_norm": 0.3873114416482302, "learning_rate": 0.00013853306410048228, "loss": 3.0801591873168945, "step": 3822, "token_acc": 0.2903039762956692 }, { "epoch": 2.2409850483729112, "grad_norm": 0.4681869828660558, "learning_rate": 0.00013853168212384962, "loss": 3.0847954750061035, "step": 3823, "token_acc": 0.2892951614161893 }, { "epoch": 2.2415713866901203, "grad_norm": 0.45669102775933923, "learning_rate": 0.00013853029950345417, "loss": 3.0518932342529297, "step": 3824, "token_acc": 0.2955840327827603 }, { "epoch": 2.2421577250073295, "grad_norm": 0.47172488476489194, "learning_rate": 0.0001385289162393089, "loss": 3.0572433471679688, "step": 3825, "token_acc": 0.2922535027116924 }, { "epoch": 2.242744063324538, "grad_norm": 0.3479425232860213, "learning_rate": 0.00013852753233142682, "loss": 3.0926454067230225, "step": 3826, "token_acc": 0.2883968647597815 }, { "epoch": 2.2433304016417472, "grad_norm": 0.38904355744361435, "learning_rate": 0.00013852614777982091, "loss": 3.0816333293914795, "step": 3827, "token_acc": 0.28780635743070976 }, { "epoch": 2.2439167399589564, "grad_norm": 0.3534492471823705, "learning_rate": 0.00013852476258450417, "loss": 3.0962390899658203, "step": 3828, "token_acc": 0.28809711886251194 }, { "epoch": 2.2445030782761655, "grad_norm": 0.3680981354693585, "learning_rate": 0.00013852337674548964, "loss": 3.023831844329834, "step": 3829, "token_acc": 0.296342772439174 }, { "epoch": 2.2450894165933746, "grad_norm": 0.316223346961509, "learning_rate": 0.00013852199026279032, "loss": 3.069500684738159, "step": 3830, "token_acc": 0.2908919037384017 }, { "epoch": 2.2456757549105832, "grad_norm": 0.36365936128137066, "learning_rate": 0.00013852060313641925, "loss": 3.0748047828674316, "step": 3831, "token_acc": 0.2905084953877795 }, { "epoch": 2.2462620932277924, "grad_norm": 0.3868203613551509, "learning_rate": 0.00013851921536638942, "loss": 3.091153144836426, "step": 3832, "token_acc": 0.28856550970349376 }, { "epoch": 2.2468484315450015, "grad_norm": 0.30695453576793913, "learning_rate": 0.00013851782695271388, "loss": 3.058513879776001, "step": 3833, "token_acc": 0.2913674202236187 }, { "epoch": 2.2474347698622106, "grad_norm": 0.320822947984259, "learning_rate": 0.00013851643789540569, "loss": 3.0583903789520264, "step": 3834, "token_acc": 0.2920655363837905 }, { "epoch": 2.2480211081794197, "grad_norm": 0.3334916738846431, "learning_rate": 0.00013851504819447792, "loss": 3.0836739540100098, "step": 3835, "token_acc": 0.29022244823974863 }, { "epoch": 2.2486074464966284, "grad_norm": 0.3624911257458761, "learning_rate": 0.0001385136578499436, "loss": 3.0421528816223145, "step": 3836, "token_acc": 0.29452986498941275 }, { "epoch": 2.2491937848138375, "grad_norm": 0.3842212216548702, "learning_rate": 0.00013851226686181577, "loss": 3.086411952972412, "step": 3837, "token_acc": 0.28994420623030426 }, { "epoch": 2.2497801231310466, "grad_norm": 0.4396663585584787, "learning_rate": 0.00013851087523010752, "loss": 3.0686631202697754, "step": 3838, "token_acc": 0.29272678780324857 }, { "epoch": 2.2503664614482557, "grad_norm": 0.355390869958925, "learning_rate": 0.0001385094829548319, "loss": 3.051602363586426, "step": 3839, "token_acc": 0.29505380423899413 }, { "epoch": 2.250952799765465, "grad_norm": 0.3815435667966809, "learning_rate": 0.000138508090036002, "loss": 3.065894365310669, "step": 3840, "token_acc": 0.29146291946183334 }, { "epoch": 2.2515391380826735, "grad_norm": 0.3516208026766999, "learning_rate": 0.00013850669647363093, "loss": 3.0497169494628906, "step": 3841, "token_acc": 0.2940542736515152 }, { "epoch": 2.2521254763998826, "grad_norm": 0.31093523820407654, "learning_rate": 0.00013850530226773176, "loss": 3.0835113525390625, "step": 3842, "token_acc": 0.2908220698964202 }, { "epoch": 2.2527118147170917, "grad_norm": 0.4287870879387594, "learning_rate": 0.0001385039074183176, "loss": 3.081928253173828, "step": 3843, "token_acc": 0.2903566596912226 }, { "epoch": 2.253298153034301, "grad_norm": 0.4632590715839384, "learning_rate": 0.00013850251192540152, "loss": 3.0524673461914062, "step": 3844, "token_acc": 0.29384110166209704 }, { "epoch": 2.25388449135151, "grad_norm": 0.3257951862837143, "learning_rate": 0.00013850111578899666, "loss": 3.063847064971924, "step": 3845, "token_acc": 0.2917265757605643 }, { "epoch": 2.254470829668719, "grad_norm": 0.33257806071671053, "learning_rate": 0.00013849971900911612, "loss": 3.0801315307617188, "step": 3846, "token_acc": 0.28987972922420996 }, { "epoch": 2.2550571679859277, "grad_norm": 0.42810361042950146, "learning_rate": 0.000138498321585773, "loss": 3.082514762878418, "step": 3847, "token_acc": 0.2906722680729772 }, { "epoch": 2.255643506303137, "grad_norm": 0.3730060321896698, "learning_rate": 0.0001384969235189805, "loss": 3.0572128295898438, "step": 3848, "token_acc": 0.29362732349731757 }, { "epoch": 2.256229844620346, "grad_norm": 0.31898839106236154, "learning_rate": 0.00013849552480875167, "loss": 3.0367794036865234, "step": 3849, "token_acc": 0.2955675304435111 }, { "epoch": 2.256816182937555, "grad_norm": 0.40894332547215756, "learning_rate": 0.0001384941254550997, "loss": 3.0966711044311523, "step": 3850, "token_acc": 0.2863093326149351 }, { "epoch": 2.257402521254764, "grad_norm": 0.41981508593598515, "learning_rate": 0.0001384927254580377, "loss": 3.0757436752319336, "step": 3851, "token_acc": 0.2913163342910259 }, { "epoch": 2.257988859571973, "grad_norm": 0.38290068043357256, "learning_rate": 0.00013849132481757887, "loss": 3.0381224155426025, "step": 3852, "token_acc": 0.2966919877779664 }, { "epoch": 2.258575197889182, "grad_norm": 0.35797918625685216, "learning_rate": 0.0001384899235337363, "loss": 3.084138870239258, "step": 3853, "token_acc": 0.28986087759626694 }, { "epoch": 2.259161536206391, "grad_norm": 0.4266088980771241, "learning_rate": 0.0001384885216065232, "loss": 3.0886075496673584, "step": 3854, "token_acc": 0.2905526597888183 }, { "epoch": 2.2597478745236, "grad_norm": 0.37619670303682273, "learning_rate": 0.00013848711903595274, "loss": 3.059359550476074, "step": 3855, "token_acc": 0.2928408096947423 }, { "epoch": 2.2603342128408093, "grad_norm": 0.3277123481096555, "learning_rate": 0.00013848571582203808, "loss": 3.043466567993164, "step": 3856, "token_acc": 0.2938416138551594 }, { "epoch": 2.2609205511580184, "grad_norm": 0.4149954854926732, "learning_rate": 0.0001384843119647924, "loss": 3.0852713584899902, "step": 3857, "token_acc": 0.2891124940460165 }, { "epoch": 2.261506889475227, "grad_norm": 0.35594866015117504, "learning_rate": 0.0001384829074642289, "loss": 3.083378314971924, "step": 3858, "token_acc": 0.2878920332445703 }, { "epoch": 2.262093227792436, "grad_norm": 0.3596749401916214, "learning_rate": 0.00013848150232036077, "loss": 3.0727322101593018, "step": 3859, "token_acc": 0.2911177220569486 }, { "epoch": 2.2626795661096453, "grad_norm": 0.31384890629757456, "learning_rate": 0.00013848009653320118, "loss": 3.056255340576172, "step": 3860, "token_acc": 0.2937604992254056 }, { "epoch": 2.2632659044268544, "grad_norm": 0.3693556109221701, "learning_rate": 0.00013847869010276338, "loss": 3.0808157920837402, "step": 3861, "token_acc": 0.2905301360985094 }, { "epoch": 2.2638522427440635, "grad_norm": 0.28549176013999433, "learning_rate": 0.00013847728302906058, "loss": 3.045353889465332, "step": 3862, "token_acc": 0.2952277911751287 }, { "epoch": 2.264438581061272, "grad_norm": 0.34361119960744657, "learning_rate": 0.00013847587531210596, "loss": 3.008805990219116, "step": 3863, "token_acc": 0.30027796456972666 }, { "epoch": 2.2650249193784813, "grad_norm": 0.3438879524843519, "learning_rate": 0.00013847446695191277, "loss": 3.0809240341186523, "step": 3864, "token_acc": 0.2918190527653998 }, { "epoch": 2.2656112576956904, "grad_norm": 0.4481729237126814, "learning_rate": 0.00013847305794849422, "loss": 3.112427234649658, "step": 3865, "token_acc": 0.2845875798051524 }, { "epoch": 2.2661975960128995, "grad_norm": 0.4158568425389823, "learning_rate": 0.00013847164830186356, "loss": 3.0549936294555664, "step": 3866, "token_acc": 0.29271134958212686 }, { "epoch": 2.2667839343301086, "grad_norm": 0.38058550616335823, "learning_rate": 0.00013847023801203404, "loss": 3.058981418609619, "step": 3867, "token_acc": 0.29141808410219733 }, { "epoch": 2.2673702726473177, "grad_norm": 0.3847258988173558, "learning_rate": 0.00013846882707901892, "loss": 3.02866530418396, "step": 3868, "token_acc": 0.29838261944418254 }, { "epoch": 2.2679566109645264, "grad_norm": 0.3297279481140381, "learning_rate": 0.0001384674155028314, "loss": 3.075563430786133, "step": 3869, "token_acc": 0.29091768170118565 }, { "epoch": 2.2685429492817355, "grad_norm": 0.38615549597048326, "learning_rate": 0.00013846600328348477, "loss": 3.053814649581909, "step": 3870, "token_acc": 0.29585497798604427 }, { "epoch": 2.2691292875989446, "grad_norm": 0.3442695087994908, "learning_rate": 0.00013846459042099232, "loss": 3.1013071537017822, "step": 3871, "token_acc": 0.2869471977404085 }, { "epoch": 2.2697156259161537, "grad_norm": 0.3491753752334351, "learning_rate": 0.0001384631769153673, "loss": 3.0239322185516357, "step": 3872, "token_acc": 0.29861107402799214 }, { "epoch": 2.270301964233363, "grad_norm": 0.31757208227376005, "learning_rate": 0.00013846176276662296, "loss": 3.0704989433288574, "step": 3873, "token_acc": 0.2924044996861807 }, { "epoch": 2.2708883025505715, "grad_norm": 0.33313170151748045, "learning_rate": 0.00013846034797477264, "loss": 3.104428291320801, "step": 3874, "token_acc": 0.2878942808498635 }, { "epoch": 2.2714746408677806, "grad_norm": 0.3178161077129083, "learning_rate": 0.0001384589325398296, "loss": 3.0700361728668213, "step": 3875, "token_acc": 0.2909593489251511 }, { "epoch": 2.2720609791849897, "grad_norm": 0.2934594505903017, "learning_rate": 0.00013845751646180714, "loss": 3.0353221893310547, "step": 3876, "token_acc": 0.29744088931800133 }, { "epoch": 2.272647317502199, "grad_norm": 0.30526435293479054, "learning_rate": 0.00013845609974071855, "loss": 3.0820460319519043, "step": 3877, "token_acc": 0.28996609024412423 }, { "epoch": 2.273233655819408, "grad_norm": 0.31425111242090714, "learning_rate": 0.00013845468237657716, "loss": 3.1019535064697266, "step": 3878, "token_acc": 0.28769741474700733 }, { "epoch": 2.273819994136617, "grad_norm": 0.3695404059605474, "learning_rate": 0.00013845326436939627, "loss": 3.0397562980651855, "step": 3879, "token_acc": 0.2965638262058948 }, { "epoch": 2.2744063324538257, "grad_norm": 0.44180940491657555, "learning_rate": 0.0001384518457191892, "loss": 3.0991034507751465, "step": 3880, "token_acc": 0.28856595254454437 }, { "epoch": 2.274992670771035, "grad_norm": 0.4338804599948091, "learning_rate": 0.00013845042642596927, "loss": 3.0665316581726074, "step": 3881, "token_acc": 0.293010533815073 }, { "epoch": 2.275579009088244, "grad_norm": 0.3490998043387444, "learning_rate": 0.0001384490064897498, "loss": 3.045743703842163, "step": 3882, "token_acc": 0.29341090544365217 }, { "epoch": 2.276165347405453, "grad_norm": 0.4132368175385659, "learning_rate": 0.00013844758591054421, "loss": 3.07841157913208, "step": 3883, "token_acc": 0.2898292054126155 }, { "epoch": 2.2767516857226617, "grad_norm": 0.3615695796033046, "learning_rate": 0.00013844616468836575, "loss": 3.060102939605713, "step": 3884, "token_acc": 0.2920097947675321 }, { "epoch": 2.277338024039871, "grad_norm": 0.4054110198104882, "learning_rate": 0.0001384447428232278, "loss": 3.0752532482147217, "step": 3885, "token_acc": 0.2910475907166496 }, { "epoch": 2.27792436235708, "grad_norm": 0.33057903400938954, "learning_rate": 0.00013844332031514374, "loss": 3.1007637977600098, "step": 3886, "token_acc": 0.28695548521250813 }, { "epoch": 2.278510700674289, "grad_norm": 0.36419562016706897, "learning_rate": 0.00013844189716412692, "loss": 3.071690559387207, "step": 3887, "token_acc": 0.29020181790171007 }, { "epoch": 2.279097038991498, "grad_norm": 0.3637926280923247, "learning_rate": 0.00013844047337019066, "loss": 3.104456663131714, "step": 3888, "token_acc": 0.2894872674894673 }, { "epoch": 2.2796833773087073, "grad_norm": 0.3604741317456975, "learning_rate": 0.00013843904893334842, "loss": 3.102541208267212, "step": 3889, "token_acc": 0.2872509046429846 }, { "epoch": 2.280269715625916, "grad_norm": 0.3754668565514983, "learning_rate": 0.00013843762385361353, "loss": 3.069880962371826, "step": 3890, "token_acc": 0.2927078214191207 }, { "epoch": 2.280856053943125, "grad_norm": 0.303617069769762, "learning_rate": 0.00013843619813099937, "loss": 3.011373281478882, "step": 3891, "token_acc": 0.29978096577400526 }, { "epoch": 2.281442392260334, "grad_norm": 0.40685262449068804, "learning_rate": 0.00013843477176551935, "loss": 3.123256206512451, "step": 3892, "token_acc": 0.2862076523765991 }, { "epoch": 2.2820287305775433, "grad_norm": 0.3755941747882267, "learning_rate": 0.0001384333447571869, "loss": 3.0905954837799072, "step": 3893, "token_acc": 0.28829916841627495 }, { "epoch": 2.2826150688947524, "grad_norm": 0.3174386471742889, "learning_rate": 0.00013843191710601535, "loss": 3.072826385498047, "step": 3894, "token_acc": 0.2911789489711005 }, { "epoch": 2.283201407211961, "grad_norm": 0.3469984076873514, "learning_rate": 0.00013843048881201814, "loss": 3.0636138916015625, "step": 3895, "token_acc": 0.29226631896340965 }, { "epoch": 2.28378774552917, "grad_norm": 0.34896365888440845, "learning_rate": 0.00013842905987520874, "loss": 3.0686473846435547, "step": 3896, "token_acc": 0.29159882774340606 }, { "epoch": 2.2843740838463793, "grad_norm": 0.3258553343889736, "learning_rate": 0.0001384276302956005, "loss": 3.091007947921753, "step": 3897, "token_acc": 0.2896524105242369 }, { "epoch": 2.2849604221635884, "grad_norm": 0.30338385508668364, "learning_rate": 0.00013842620007320692, "loss": 3.0607547760009766, "step": 3898, "token_acc": 0.29499240410669963 }, { "epoch": 2.2855467604807975, "grad_norm": 0.3059890838355351, "learning_rate": 0.00013842476920804137, "loss": 3.099848508834839, "step": 3899, "token_acc": 0.28921514187038666 }, { "epoch": 2.2861330987980066, "grad_norm": 0.3102744394809184, "learning_rate": 0.0001384233377001173, "loss": 3.082677125930786, "step": 3900, "token_acc": 0.289320009697583 }, { "epoch": 2.2867194371152153, "grad_norm": 0.29937684176137513, "learning_rate": 0.0001384219055494482, "loss": 3.088576078414917, "step": 3901, "token_acc": 0.28855555080122375 }, { "epoch": 2.2873057754324244, "grad_norm": 0.3386098938733586, "learning_rate": 0.00013842047275604752, "loss": 3.0769803524017334, "step": 3902, "token_acc": 0.2905053338666287 }, { "epoch": 2.2878921137496335, "grad_norm": 0.3349865834286317, "learning_rate": 0.00013841903931992866, "loss": 3.060807228088379, "step": 3903, "token_acc": 0.29270408163265305 }, { "epoch": 2.2884784520668426, "grad_norm": 0.31807035820314544, "learning_rate": 0.00013841760524110512, "loss": 3.031798839569092, "step": 3904, "token_acc": 0.2985571181041039 }, { "epoch": 2.2890647903840518, "grad_norm": 0.39711321733173727, "learning_rate": 0.00013841617051959038, "loss": 3.1210246086120605, "step": 3905, "token_acc": 0.28511430981812086 }, { "epoch": 2.2896511287012604, "grad_norm": 0.3421190504437334, "learning_rate": 0.0001384147351553979, "loss": 3.0354483127593994, "step": 3906, "token_acc": 0.295446856931972 }, { "epoch": 2.2902374670184695, "grad_norm": 0.3453646496565421, "learning_rate": 0.0001384132991485412, "loss": 3.059192180633545, "step": 3907, "token_acc": 0.2917562575985586 }, { "epoch": 2.2908238053356786, "grad_norm": 0.39893429695253313, "learning_rate": 0.00013841186249903371, "loss": 3.053541898727417, "step": 3908, "token_acc": 0.29299291704644403 }, { "epoch": 2.2914101436528878, "grad_norm": 0.2959880178171732, "learning_rate": 0.00013841042520688898, "loss": 3.0286571979522705, "step": 3909, "token_acc": 0.2968736752861382 }, { "epoch": 2.291996481970097, "grad_norm": 0.4179579431544344, "learning_rate": 0.0001384089872721205, "loss": 3.0871942043304443, "step": 3910, "token_acc": 0.2885968491943638 }, { "epoch": 2.292582820287306, "grad_norm": 0.3633772333212466, "learning_rate": 0.00013840754869474172, "loss": 3.0757594108581543, "step": 3911, "token_acc": 0.29027082514859265 }, { "epoch": 2.2931691586045146, "grad_norm": 0.3988546266852141, "learning_rate": 0.00013840610947476626, "loss": 3.055666446685791, "step": 3912, "token_acc": 0.29214874077468944 }, { "epoch": 2.2937554969217238, "grad_norm": 0.3655916915048003, "learning_rate": 0.00013840466961220755, "loss": 3.061676502227783, "step": 3913, "token_acc": 0.2922934236818837 }, { "epoch": 2.294341835238933, "grad_norm": 0.403967399510546, "learning_rate": 0.00013840322910707914, "loss": 3.06962251663208, "step": 3914, "token_acc": 0.2916321716179497 }, { "epoch": 2.294928173556142, "grad_norm": 0.3998517520050068, "learning_rate": 0.00013840178795939458, "loss": 3.0726213455200195, "step": 3915, "token_acc": 0.29180546651140277 }, { "epoch": 2.295514511873351, "grad_norm": 0.35832908035239197, "learning_rate": 0.0001384003461691674, "loss": 3.040363311767578, "step": 3916, "token_acc": 0.29413685398832634 }, { "epoch": 2.2961008501905598, "grad_norm": 0.37753816086791764, "learning_rate": 0.00013839890373641112, "loss": 3.0885636806488037, "step": 3917, "token_acc": 0.28859214979101183 }, { "epoch": 2.296687188507769, "grad_norm": 0.44766841565925836, "learning_rate": 0.0001383974606611393, "loss": 3.1028342247009277, "step": 3918, "token_acc": 0.28623087550660276 }, { "epoch": 2.297273526824978, "grad_norm": 0.42745971793668625, "learning_rate": 0.00013839601694336551, "loss": 3.0878496170043945, "step": 3919, "token_acc": 0.288244458340521 }, { "epoch": 2.297859865142187, "grad_norm": 0.42854937699000895, "learning_rate": 0.00013839457258310332, "loss": 3.0849950313568115, "step": 3920, "token_acc": 0.2897338053422831 }, { "epoch": 2.298446203459396, "grad_norm": 0.3725991936571028, "learning_rate": 0.00013839312758036628, "loss": 3.086827039718628, "step": 3921, "token_acc": 0.29129848655594054 }, { "epoch": 2.2990325417766053, "grad_norm": 0.3285078645432387, "learning_rate": 0.00013839168193516797, "loss": 3.0785536766052246, "step": 3922, "token_acc": 0.2902410825161649 }, { "epoch": 2.299618880093814, "grad_norm": 0.3182123846032993, "learning_rate": 0.00013839023564752196, "loss": 3.070664167404175, "step": 3923, "token_acc": 0.2925844299782157 }, { "epoch": 2.300205218411023, "grad_norm": 0.3117010777522154, "learning_rate": 0.00013838878871744184, "loss": 3.079596996307373, "step": 3924, "token_acc": 0.2895881279674898 }, { "epoch": 2.300791556728232, "grad_norm": 0.3178648079353162, "learning_rate": 0.0001383873411449412, "loss": 3.0349442958831787, "step": 3925, "token_acc": 0.2950673775901584 }, { "epoch": 2.3013778950454413, "grad_norm": 0.3647867046323575, "learning_rate": 0.00013838589293003366, "loss": 3.08089017868042, "step": 3926, "token_acc": 0.2898600162031057 }, { "epoch": 2.3019642333626504, "grad_norm": 0.43829197298564454, "learning_rate": 0.00013838444407273282, "loss": 3.0685341358184814, "step": 3927, "token_acc": 0.28976690624627727 }, { "epoch": 2.302550571679859, "grad_norm": 0.4551887875512502, "learning_rate": 0.00013838299457305224, "loss": 3.033466339111328, "step": 3928, "token_acc": 0.29643042041125167 }, { "epoch": 2.303136909997068, "grad_norm": 0.33368281754810836, "learning_rate": 0.0001383815444310056, "loss": 3.0512032508850098, "step": 3929, "token_acc": 0.2940415111336609 }, { "epoch": 2.3037232483142773, "grad_norm": 0.4647301235279653, "learning_rate": 0.00013838009364660646, "loss": 3.0661444664001465, "step": 3930, "token_acc": 0.29198844437566096 }, { "epoch": 2.3043095866314864, "grad_norm": 0.34507979452575765, "learning_rate": 0.00013837864221986852, "loss": 3.0297296047210693, "step": 3931, "token_acc": 0.2966106792174789 }, { "epoch": 2.3048959249486956, "grad_norm": 0.368975645597791, "learning_rate": 0.00013837719015080536, "loss": 3.0287535190582275, "step": 3932, "token_acc": 0.2963790024254367 }, { "epoch": 2.3054822632659047, "grad_norm": 0.30871522169311133, "learning_rate": 0.00013837573743943066, "loss": 3.074644088745117, "step": 3933, "token_acc": 0.2908353147937558 }, { "epoch": 2.3060686015831133, "grad_norm": 0.3678700493138395, "learning_rate": 0.00013837428408575804, "loss": 3.0418813228607178, "step": 3934, "token_acc": 0.2938495740142057 }, { "epoch": 2.3066549399003224, "grad_norm": 0.30303871732615073, "learning_rate": 0.00013837283008980112, "loss": 3.0823864936828613, "step": 3935, "token_acc": 0.2888338936620595 }, { "epoch": 2.3072412782175316, "grad_norm": 0.38016233836064756, "learning_rate": 0.00013837137545157362, "loss": 3.0457088947296143, "step": 3936, "token_acc": 0.29506458164500604 }, { "epoch": 2.3078276165347407, "grad_norm": 0.411991614263233, "learning_rate": 0.00013836992017108918, "loss": 3.1036641597747803, "step": 3937, "token_acc": 0.28654254114060923 }, { "epoch": 2.3084139548519493, "grad_norm": 0.41491000002170797, "learning_rate": 0.00013836846424836147, "loss": 3.08748197555542, "step": 3938, "token_acc": 0.28838537894717003 }, { "epoch": 2.3090002931691584, "grad_norm": 0.29636185087528, "learning_rate": 0.00013836700768340418, "loss": 3.067830801010132, "step": 3939, "token_acc": 0.2922965308006859 }, { "epoch": 2.3095866314863676, "grad_norm": 0.3966499283124552, "learning_rate": 0.00013836555047623094, "loss": 3.094179153442383, "step": 3940, "token_acc": 0.28791486188003784 }, { "epoch": 2.3101729698035767, "grad_norm": 0.3775647204566085, "learning_rate": 0.00013836409262685552, "loss": 3.0462324619293213, "step": 3941, "token_acc": 0.2934821221097166 }, { "epoch": 2.310759308120786, "grad_norm": 0.30155060987588705, "learning_rate": 0.00013836263413529153, "loss": 3.0329737663269043, "step": 3942, "token_acc": 0.2971600507031549 }, { "epoch": 2.311345646437995, "grad_norm": 0.3334891402063084, "learning_rate": 0.00013836117500155276, "loss": 3.073120594024658, "step": 3943, "token_acc": 0.2929092131736944 }, { "epoch": 2.3119319847552036, "grad_norm": 0.31005464660951365, "learning_rate": 0.00013835971522565283, "loss": 3.038090229034424, "step": 3944, "token_acc": 0.29504287315280564 }, { "epoch": 2.3125183230724127, "grad_norm": 0.326270648546799, "learning_rate": 0.0001383582548076055, "loss": 3.0682320594787598, "step": 3945, "token_acc": 0.2906784853156251 }, { "epoch": 2.313104661389622, "grad_norm": 0.37340992572194265, "learning_rate": 0.0001383567937474245, "loss": 3.0554776191711426, "step": 3946, "token_acc": 0.29302766593064034 }, { "epoch": 2.313690999706831, "grad_norm": 0.30478096597378435, "learning_rate": 0.00013835533204512352, "loss": 3.1246190071105957, "step": 3947, "token_acc": 0.28467782616304804 }, { "epoch": 2.31427733802404, "grad_norm": 0.3516625512867287, "learning_rate": 0.0001383538697007163, "loss": 3.0827436447143555, "step": 3948, "token_acc": 0.2911390433468841 }, { "epoch": 2.3148636763412487, "grad_norm": 0.35923479240990774, "learning_rate": 0.0001383524067142166, "loss": 3.048055648803711, "step": 3949, "token_acc": 0.294329034163631 }, { "epoch": 2.315450014658458, "grad_norm": 0.3066055700570638, "learning_rate": 0.0001383509430856381, "loss": 3.1093990802764893, "step": 3950, "token_acc": 0.286667708369197 }, { "epoch": 2.316036352975667, "grad_norm": 0.3508106270957589, "learning_rate": 0.00013834947881499464, "loss": 3.0936403274536133, "step": 3951, "token_acc": 0.2883760622977634 }, { "epoch": 2.316622691292876, "grad_norm": 0.33554112783780493, "learning_rate": 0.0001383480139022999, "loss": 3.068114757537842, "step": 3952, "token_acc": 0.2914524538521919 }, { "epoch": 2.317209029610085, "grad_norm": 0.3260311605565729, "learning_rate": 0.0001383465483475677, "loss": 3.047159194946289, "step": 3953, "token_acc": 0.2940285214385092 }, { "epoch": 2.3177953679272942, "grad_norm": 0.3462536981459243, "learning_rate": 0.00013834508215081177, "loss": 3.0494046211242676, "step": 3954, "token_acc": 0.2942017254451936 }, { "epoch": 2.318381706244503, "grad_norm": 0.371036549300742, "learning_rate": 0.00013834361531204586, "loss": 3.0578203201293945, "step": 3955, "token_acc": 0.2923353354895015 }, { "epoch": 2.318968044561712, "grad_norm": 0.39886508513731955, "learning_rate": 0.00013834214783128382, "loss": 3.0986618995666504, "step": 3956, "token_acc": 0.2892249934691217 }, { "epoch": 2.319554382878921, "grad_norm": 0.3136540350075441, "learning_rate": 0.00013834067970853935, "loss": 3.0377135276794434, "step": 3957, "token_acc": 0.2965276396117156 }, { "epoch": 2.3201407211961302, "grad_norm": 0.35490422535823235, "learning_rate": 0.0001383392109438263, "loss": 3.1068856716156006, "step": 3958, "token_acc": 0.285132527133979 }, { "epoch": 2.3207270595133394, "grad_norm": 0.3777219580936742, "learning_rate": 0.0001383377415371585, "loss": 3.081146240234375, "step": 3959, "token_acc": 0.2904690630744562 }, { "epoch": 2.321313397830548, "grad_norm": 0.27863890762171567, "learning_rate": 0.00013833627148854963, "loss": 3.0828027725219727, "step": 3960, "token_acc": 0.2909300430742364 }, { "epoch": 2.321899736147757, "grad_norm": 0.4312174518679636, "learning_rate": 0.00013833480079801361, "loss": 3.087311267852783, "step": 3961, "token_acc": 0.2890677292503347 }, { "epoch": 2.3224860744649662, "grad_norm": 0.3575567588294255, "learning_rate": 0.0001383333294655642, "loss": 3.0697579383850098, "step": 3962, "token_acc": 0.29103192475824613 }, { "epoch": 2.3230724127821754, "grad_norm": 0.36566810034540165, "learning_rate": 0.00013833185749121527, "loss": 3.0293309688568115, "step": 3963, "token_acc": 0.2974110679844744 }, { "epoch": 2.3236587510993845, "grad_norm": 0.3734852795516362, "learning_rate": 0.00013833038487498058, "loss": 3.0859031677246094, "step": 3964, "token_acc": 0.28922819046027637 }, { "epoch": 2.3242450894165936, "grad_norm": 0.37806452367667803, "learning_rate": 0.000138328911616874, "loss": 3.032557249069214, "step": 3965, "token_acc": 0.29631383692477703 }, { "epoch": 2.3248314277338022, "grad_norm": 0.3650133085647914, "learning_rate": 0.00013832743771690942, "loss": 3.071094036102295, "step": 3966, "token_acc": 0.29140922084741 }, { "epoch": 2.3254177660510114, "grad_norm": 0.4120146141710093, "learning_rate": 0.00013832596317510062, "loss": 3.092928886413574, "step": 3967, "token_acc": 0.2880726216119415 }, { "epoch": 2.3260041043682205, "grad_norm": 0.38112498813787155, "learning_rate": 0.00013832448799146145, "loss": 3.0728306770324707, "step": 3968, "token_acc": 0.29063424270070537 }, { "epoch": 2.3265904426854296, "grad_norm": 0.3477489528267661, "learning_rate": 0.0001383230121660058, "loss": 3.093782424926758, "step": 3969, "token_acc": 0.2889436555570175 }, { "epoch": 2.3271767810026387, "grad_norm": 0.34481013484783046, "learning_rate": 0.00013832153569874747, "loss": 3.083939552307129, "step": 3970, "token_acc": 0.28950840923117444 }, { "epoch": 2.3277631193198474, "grad_norm": 0.3933006452051719, "learning_rate": 0.0001383200585897004, "loss": 3.081540107727051, "step": 3971, "token_acc": 0.2901016091660821 }, { "epoch": 2.3283494576370565, "grad_norm": 0.3686053933521343, "learning_rate": 0.00013831858083887847, "loss": 3.0922231674194336, "step": 3972, "token_acc": 0.28701522513767413 }, { "epoch": 2.3289357959542656, "grad_norm": 0.3605850353219423, "learning_rate": 0.00013831710244629553, "loss": 3.1031837463378906, "step": 3973, "token_acc": 0.28654869258099214 }, { "epoch": 2.3295221342714747, "grad_norm": 0.38370149366935163, "learning_rate": 0.00013831562341196544, "loss": 3.1072187423706055, "step": 3974, "token_acc": 0.2846763574531618 }, { "epoch": 2.330108472588684, "grad_norm": 0.3735128990342422, "learning_rate": 0.00013831414373590215, "loss": 3.06618332862854, "step": 3975, "token_acc": 0.2922717001196331 }, { "epoch": 2.330694810905893, "grad_norm": 0.2993427822473872, "learning_rate": 0.0001383126634181195, "loss": 3.0443787574768066, "step": 3976, "token_acc": 0.295177867991284 }, { "epoch": 2.3312811492231016, "grad_norm": 0.36019972953393525, "learning_rate": 0.0001383111824586315, "loss": 3.0392229557037354, "step": 3977, "token_acc": 0.2962553881430682 }, { "epoch": 2.3318674875403107, "grad_norm": 0.3577751018932838, "learning_rate": 0.00013830970085745191, "loss": 3.055182933807373, "step": 3978, "token_acc": 0.2928139342197195 }, { "epoch": 2.33245382585752, "grad_norm": 0.45208972173851025, "learning_rate": 0.0001383082186145948, "loss": 3.0525264739990234, "step": 3979, "token_acc": 0.2943688880864675 }, { "epoch": 2.333040164174729, "grad_norm": 0.34428818472429057, "learning_rate": 0.00013830673573007396, "loss": 3.026096820831299, "step": 3980, "token_acc": 0.2993248352022832 }, { "epoch": 2.333626502491938, "grad_norm": 0.3795493699860801, "learning_rate": 0.0001383052522039034, "loss": 3.1066219806671143, "step": 3981, "token_acc": 0.28654674768497274 }, { "epoch": 2.3342128408091467, "grad_norm": 0.38052898550665104, "learning_rate": 0.00013830376803609706, "loss": 3.0857529640197754, "step": 3982, "token_acc": 0.2893822467211557 }, { "epoch": 2.334799179126356, "grad_norm": 0.4202396438888431, "learning_rate": 0.00013830228322666885, "loss": 3.034611701965332, "step": 3983, "token_acc": 0.29587338693614834 }, { "epoch": 2.335385517443565, "grad_norm": 0.32271000943399963, "learning_rate": 0.00013830079777563272, "loss": 3.067209243774414, "step": 3984, "token_acc": 0.29072697027362326 }, { "epoch": 2.335971855760774, "grad_norm": 0.4078850918242615, "learning_rate": 0.00013829931168300263, "loss": 3.047999858856201, "step": 3985, "token_acc": 0.2958718607081161 }, { "epoch": 2.336558194077983, "grad_norm": 0.3768602634313092, "learning_rate": 0.00013829782494879255, "loss": 3.0607588291168213, "step": 3986, "token_acc": 0.2933943948106573 }, { "epoch": 2.3371445323951923, "grad_norm": 0.4410022734936046, "learning_rate": 0.00013829633757301643, "loss": 3.0240979194641113, "step": 3987, "token_acc": 0.2974994757810862 }, { "epoch": 2.337730870712401, "grad_norm": 0.3610629135036041, "learning_rate": 0.00013829484955568824, "loss": 3.07926607131958, "step": 3988, "token_acc": 0.28910768907618184 }, { "epoch": 2.33831720902961, "grad_norm": 0.3735683157088353, "learning_rate": 0.000138293360896822, "loss": 3.0647778511047363, "step": 3989, "token_acc": 0.292597104866706 }, { "epoch": 2.338903547346819, "grad_norm": 0.4249051342349725, "learning_rate": 0.00013829187159643166, "loss": 3.0688180923461914, "step": 3990, "token_acc": 0.2910318081236073 }, { "epoch": 2.3394898856640283, "grad_norm": 0.3321492461878886, "learning_rate": 0.00013829038165453117, "loss": 3.0527029037475586, "step": 3991, "token_acc": 0.29314810700610305 }, { "epoch": 2.340076223981237, "grad_norm": 0.3922092786319316, "learning_rate": 0.0001382888910711346, "loss": 3.073105812072754, "step": 3992, "token_acc": 0.2891765056618127 }, { "epoch": 2.340662562298446, "grad_norm": 0.4373794295474907, "learning_rate": 0.00013828739984625592, "loss": 3.0416932106018066, "step": 3993, "token_acc": 0.2940508878823105 }, { "epoch": 2.341248900615655, "grad_norm": 0.35129363799170327, "learning_rate": 0.00013828590797990912, "loss": 3.081122398376465, "step": 3994, "token_acc": 0.28935319566993356 }, { "epoch": 2.3418352389328643, "grad_norm": 0.37607118493416813, "learning_rate": 0.00013828441547210823, "loss": 3.054319143295288, "step": 3995, "token_acc": 0.2933143910738876 }, { "epoch": 2.3424215772500734, "grad_norm": 0.37915250336417033, "learning_rate": 0.00013828292232286727, "loss": 3.076972723007202, "step": 3996, "token_acc": 0.2919744661416152 }, { "epoch": 2.3430079155672825, "grad_norm": 0.3246571348230093, "learning_rate": 0.00013828142853220026, "loss": 3.0523529052734375, "step": 3997, "token_acc": 0.29483920223951565 }, { "epoch": 2.343594253884491, "grad_norm": 0.37870966536023726, "learning_rate": 0.00013827993410012125, "loss": 3.0730700492858887, "step": 3998, "token_acc": 0.29078057806549495 }, { "epoch": 2.3441805922017003, "grad_norm": 0.2924652010089367, "learning_rate": 0.00013827843902664428, "loss": 3.042371988296509, "step": 3999, "token_acc": 0.29554569559366295 }, { "epoch": 2.3447669305189094, "grad_norm": 0.32864397948620866, "learning_rate": 0.00013827694331178337, "loss": 3.015341281890869, "step": 4000, "token_acc": 0.29874758171265653 }, { "epoch": 2.3453532688361185, "grad_norm": 0.3260975546136628, "learning_rate": 0.0001382754469555526, "loss": 3.0426340103149414, "step": 4001, "token_acc": 0.29593337387190705 }, { "epoch": 2.3459396071533276, "grad_norm": 0.33821429921382246, "learning_rate": 0.00013827394995796598, "loss": 3.09721040725708, "step": 4002, "token_acc": 0.2862814662422622 }, { "epoch": 2.3465259454705363, "grad_norm": 0.3607584146514768, "learning_rate": 0.00013827245231903763, "loss": 3.073962688446045, "step": 4003, "token_acc": 0.29062085060153703 }, { "epoch": 2.3471122837877454, "grad_norm": 0.3614024588757974, "learning_rate": 0.00013827095403878156, "loss": 3.1109650135040283, "step": 4004, "token_acc": 0.28504770291323855 }, { "epoch": 2.3476986221049545, "grad_norm": 0.36607633484900964, "learning_rate": 0.00013826945511721188, "loss": 3.0866904258728027, "step": 4005, "token_acc": 0.2891883117700204 }, { "epoch": 2.3482849604221636, "grad_norm": 0.3168241154340406, "learning_rate": 0.00013826795555434268, "loss": 3.1120781898498535, "step": 4006, "token_acc": 0.2855197446055293 }, { "epoch": 2.3488712987393727, "grad_norm": 0.3536726381899361, "learning_rate": 0.000138266455350188, "loss": 3.0480971336364746, "step": 4007, "token_acc": 0.2941428017094604 }, { "epoch": 2.349457637056582, "grad_norm": 0.3068375626359624, "learning_rate": 0.000138264954504762, "loss": 3.0502212047576904, "step": 4008, "token_acc": 0.2954183908869099 }, { "epoch": 2.3500439753737905, "grad_norm": 0.34688065477987445, "learning_rate": 0.0001382634530180787, "loss": 3.1053340435028076, "step": 4009, "token_acc": 0.2878484294890683 }, { "epoch": 2.3506303136909996, "grad_norm": 0.33483633305720745, "learning_rate": 0.00013826195089015227, "loss": 3.086273670196533, "step": 4010, "token_acc": 0.28825379609544466 }, { "epoch": 2.3512166520082087, "grad_norm": 0.31842708647938556, "learning_rate": 0.0001382604481209968, "loss": 3.051870822906494, "step": 4011, "token_acc": 0.2937826847809247 }, { "epoch": 2.351802990325418, "grad_norm": 0.314056255568696, "learning_rate": 0.00013825894471062637, "loss": 3.067373752593994, "step": 4012, "token_acc": 0.2918770268517317 }, { "epoch": 2.352389328642627, "grad_norm": 0.3910271371527118, "learning_rate": 0.00013825744065905517, "loss": 3.0934102535247803, "step": 4013, "token_acc": 0.2864583466244753 }, { "epoch": 2.3529756669598356, "grad_norm": 0.41182589367096817, "learning_rate": 0.00013825593596629727, "loss": 3.0705127716064453, "step": 4014, "token_acc": 0.2906387154812856 }, { "epoch": 2.3535620052770447, "grad_norm": 0.4127548309459529, "learning_rate": 0.00013825443063236685, "loss": 3.065591812133789, "step": 4015, "token_acc": 0.29140705255604826 }, { "epoch": 2.354148343594254, "grad_norm": 0.42932804815668946, "learning_rate": 0.00013825292465727802, "loss": 3.0428242683410645, "step": 4016, "token_acc": 0.29601421134811623 }, { "epoch": 2.354734681911463, "grad_norm": 0.45083453956684727, "learning_rate": 0.0001382514180410449, "loss": 3.0786654949188232, "step": 4017, "token_acc": 0.2891833062821626 }, { "epoch": 2.355321020228672, "grad_norm": 0.3228720853274811, "learning_rate": 0.00013824991078368175, "loss": 3.0895023345947266, "step": 4018, "token_acc": 0.2898784972320375 }, { "epoch": 2.355907358545881, "grad_norm": 0.4158063542761776, "learning_rate": 0.0001382484028852026, "loss": 3.0418193340301514, "step": 4019, "token_acc": 0.29579330207917254 }, { "epoch": 2.35649369686309, "grad_norm": 0.3075804236829235, "learning_rate": 0.0001382468943456217, "loss": 3.071878433227539, "step": 4020, "token_acc": 0.2917890149086774 }, { "epoch": 2.357080035180299, "grad_norm": 0.4080120503605802, "learning_rate": 0.00013824538516495316, "loss": 3.0993309020996094, "step": 4021, "token_acc": 0.2860343424959516 }, { "epoch": 2.357666373497508, "grad_norm": 0.3270273033840868, "learning_rate": 0.00013824387534321122, "loss": 3.081183433532715, "step": 4022, "token_acc": 0.28872161265628987 }, { "epoch": 2.358252711814717, "grad_norm": 0.38403149582779955, "learning_rate": 0.00013824236488041002, "loss": 3.0490152835845947, "step": 4023, "token_acc": 0.29524286402970246 }, { "epoch": 2.3588390501319263, "grad_norm": 0.3256975565097746, "learning_rate": 0.00013824085377656375, "loss": 3.055072546005249, "step": 4024, "token_acc": 0.2930164939733559 }, { "epoch": 2.359425388449135, "grad_norm": 0.36096604892148604, "learning_rate": 0.0001382393420316866, "loss": 3.0699915885925293, "step": 4025, "token_acc": 0.2933423364028838 }, { "epoch": 2.360011726766344, "grad_norm": 0.3689472404169113, "learning_rate": 0.0001382378296457928, "loss": 3.078838348388672, "step": 4026, "token_acc": 0.29007424213374233 }, { "epoch": 2.360598065083553, "grad_norm": 0.3672161005937233, "learning_rate": 0.00013823631661889657, "loss": 3.0636298656463623, "step": 4027, "token_acc": 0.2930266696752073 }, { "epoch": 2.3611844034007623, "grad_norm": 0.3334483766193717, "learning_rate": 0.00013823480295101207, "loss": 3.0760507583618164, "step": 4028, "token_acc": 0.2907202409601143 }, { "epoch": 2.3617707417179714, "grad_norm": 0.30872890845480866, "learning_rate": 0.00013823328864215353, "loss": 3.0882153511047363, "step": 4029, "token_acc": 0.28696937184963256 }, { "epoch": 2.3623570800351805, "grad_norm": 0.31365788170921904, "learning_rate": 0.0001382317736923352, "loss": 3.045743703842163, "step": 4030, "token_acc": 0.29274624717252723 }, { "epoch": 2.362943418352389, "grad_norm": 0.33863753721491446, "learning_rate": 0.00013823025810157132, "loss": 3.090925693511963, "step": 4031, "token_acc": 0.28993738322209106 }, { "epoch": 2.3635297566695983, "grad_norm": 0.31331428377038156, "learning_rate": 0.00013822874186987608, "loss": 3.0555782318115234, "step": 4032, "token_acc": 0.29357890838075357 }, { "epoch": 2.3641160949868074, "grad_norm": 0.31758580577439677, "learning_rate": 0.00013822722499726378, "loss": 3.022827625274658, "step": 4033, "token_acc": 0.29760879182285965 }, { "epoch": 2.3647024333040165, "grad_norm": 0.33168212276889436, "learning_rate": 0.00013822570748374863, "loss": 3.083014488220215, "step": 4034, "token_acc": 0.28863155695971177 }, { "epoch": 2.3652887716212256, "grad_norm": 0.3310220935725908, "learning_rate": 0.00013822418932934487, "loss": 3.0169708728790283, "step": 4035, "token_acc": 0.30038244840628964 }, { "epoch": 2.3658751099384343, "grad_norm": 0.3462380091944063, "learning_rate": 0.0001382226705340668, "loss": 3.086747169494629, "step": 4036, "token_acc": 0.28855244281970144 }, { "epoch": 2.3664614482556434, "grad_norm": 0.3575901350959513, "learning_rate": 0.00013822115109792866, "loss": 3.051285743713379, "step": 4037, "token_acc": 0.2941521564425395 }, { "epoch": 2.3670477865728525, "grad_norm": 0.3637067129632318, "learning_rate": 0.00013821963102094475, "loss": 3.0806288719177246, "step": 4038, "token_acc": 0.29084709176371215 }, { "epoch": 2.3676341248900616, "grad_norm": 0.36692319209159735, "learning_rate": 0.00013821811030312933, "loss": 3.033203601837158, "step": 4039, "token_acc": 0.2974709283008838 }, { "epoch": 2.3682204632072708, "grad_norm": 0.3729396393889702, "learning_rate": 0.0001382165889444967, "loss": 3.054262638092041, "step": 4040, "token_acc": 0.29372414941159897 }, { "epoch": 2.36880680152448, "grad_norm": 0.355000632695877, "learning_rate": 0.00013821506694506108, "loss": 3.058180332183838, "step": 4041, "token_acc": 0.29371287923377387 }, { "epoch": 2.3693931398416885, "grad_norm": 0.3427230134181248, "learning_rate": 0.0001382135443048369, "loss": 3.0629005432128906, "step": 4042, "token_acc": 0.29037819367865186 }, { "epoch": 2.3699794781588976, "grad_norm": 0.3248163754550457, "learning_rate": 0.00013821202102383838, "loss": 3.011953592300415, "step": 4043, "token_acc": 0.2999598493293253 }, { "epoch": 2.3705658164761068, "grad_norm": 0.2916359564803973, "learning_rate": 0.0001382104971020798, "loss": 3.001657485961914, "step": 4044, "token_acc": 0.30137082674323773 }, { "epoch": 2.371152154793316, "grad_norm": 0.304637871324074, "learning_rate": 0.00013820897253957553, "loss": 3.0536303520202637, "step": 4045, "token_acc": 0.2933927648578811 }, { "epoch": 2.3717384931105245, "grad_norm": 0.37829603603059114, "learning_rate": 0.0001382074473363399, "loss": 3.059512138366699, "step": 4046, "token_acc": 0.2918110652232478 }, { "epoch": 2.3723248314277336, "grad_norm": 0.3743648619708405, "learning_rate": 0.0001382059214923872, "loss": 3.08774995803833, "step": 4047, "token_acc": 0.28890100566402616 }, { "epoch": 2.3729111697449428, "grad_norm": 0.3362355758585943, "learning_rate": 0.00013820439500773177, "loss": 3.04176926612854, "step": 4048, "token_acc": 0.29599399568439816 }, { "epoch": 2.373497508062152, "grad_norm": 0.367099024288784, "learning_rate": 0.00013820286788238795, "loss": 3.0716967582702637, "step": 4049, "token_acc": 0.29085849779601997 }, { "epoch": 2.374083846379361, "grad_norm": 0.3583516299294482, "learning_rate": 0.00013820134011637009, "loss": 3.1031441688537598, "step": 4050, "token_acc": 0.2872665363448994 }, { "epoch": 2.37467018469657, "grad_norm": 0.3391823702835769, "learning_rate": 0.00013819981170969255, "loss": 3.0645999908447266, "step": 4051, "token_acc": 0.29363430883664293 }, { "epoch": 2.3752565230137788, "grad_norm": 0.3568092605151071, "learning_rate": 0.0001381982826623697, "loss": 3.052489995956421, "step": 4052, "token_acc": 0.29440629634461624 }, { "epoch": 2.375842861330988, "grad_norm": 0.28824426709113227, "learning_rate": 0.00013819675297441585, "loss": 3.0608763694763184, "step": 4053, "token_acc": 0.2928354868886868 }, { "epoch": 2.376429199648197, "grad_norm": 0.3483493696699502, "learning_rate": 0.0001381952226458454, "loss": 3.084655284881592, "step": 4054, "token_acc": 0.28927451795657977 }, { "epoch": 2.377015537965406, "grad_norm": 0.3484689034429514, "learning_rate": 0.00013819369167667275, "loss": 3.0497865676879883, "step": 4055, "token_acc": 0.2952654560378618 }, { "epoch": 2.377601876282615, "grad_norm": 0.3216616728709735, "learning_rate": 0.00013819216006691223, "loss": 3.0622987747192383, "step": 4056, "token_acc": 0.29198270811230226 }, { "epoch": 2.378188214599824, "grad_norm": 0.3404862466038519, "learning_rate": 0.0001381906278165783, "loss": 3.0224781036376953, "step": 4057, "token_acc": 0.29733951952516796 }, { "epoch": 2.378774552917033, "grad_norm": 0.28324993627748535, "learning_rate": 0.00013818909492568527, "loss": 3.074069023132324, "step": 4058, "token_acc": 0.29172482127446486 }, { "epoch": 2.379360891234242, "grad_norm": 0.32150627228025674, "learning_rate": 0.00013818756139424761, "loss": 3.079206943511963, "step": 4059, "token_acc": 0.29092672485870036 }, { "epoch": 2.379947229551451, "grad_norm": 0.3279346158769462, "learning_rate": 0.00013818602722227966, "loss": 3.050701379776001, "step": 4060, "token_acc": 0.2939685081398452 }, { "epoch": 2.3805335678686603, "grad_norm": 0.2807842248919441, "learning_rate": 0.00013818449240979593, "loss": 3.0847315788269043, "step": 4061, "token_acc": 0.2897007860924503 }, { "epoch": 2.3811199061858694, "grad_norm": 0.3953699349411169, "learning_rate": 0.0001381829569568107, "loss": 3.061007499694824, "step": 4062, "token_acc": 0.29251529250733455 }, { "epoch": 2.381706244503078, "grad_norm": 0.3375840157269756, "learning_rate": 0.0001381814208633385, "loss": 3.033616542816162, "step": 4063, "token_acc": 0.29557286454956144 }, { "epoch": 2.382292582820287, "grad_norm": 0.3270374510362583, "learning_rate": 0.00013817988412939374, "loss": 3.0157980918884277, "step": 4064, "token_acc": 0.2977575207058893 }, { "epoch": 2.3828789211374963, "grad_norm": 0.3751430500405435, "learning_rate": 0.0001381783467549908, "loss": 3.0645761489868164, "step": 4065, "token_acc": 0.2918709697159773 }, { "epoch": 2.3834652594547054, "grad_norm": 0.3404453545662311, "learning_rate": 0.0001381768087401442, "loss": 3.0646448135375977, "step": 4066, "token_acc": 0.29160191725529766 }, { "epoch": 2.3840515977719146, "grad_norm": 0.3132388633272704, "learning_rate": 0.00013817527008486835, "loss": 3.1022961139678955, "step": 4067, "token_acc": 0.28859802253078076 }, { "epoch": 2.3846379360891232, "grad_norm": 0.3233519304952651, "learning_rate": 0.0001381737307891777, "loss": 3.0591681003570557, "step": 4068, "token_acc": 0.292330152345415 }, { "epoch": 2.3852242744063323, "grad_norm": 0.29387491556139494, "learning_rate": 0.0001381721908530867, "loss": 3.0418269634246826, "step": 4069, "token_acc": 0.2960987599855043 }, { "epoch": 2.3858106127235414, "grad_norm": 0.34138560002834906, "learning_rate": 0.00013817065027660984, "loss": 3.032656192779541, "step": 4070, "token_acc": 0.2977244548753906 }, { "epoch": 2.3863969510407506, "grad_norm": 0.2985401427057655, "learning_rate": 0.0001381691090597616, "loss": 3.063190221786499, "step": 4071, "token_acc": 0.29104182757627434 }, { "epoch": 2.3869832893579597, "grad_norm": 0.33331121800188135, "learning_rate": 0.0001381675672025564, "loss": 3.054957151412964, "step": 4072, "token_acc": 0.2910006967070347 }, { "epoch": 2.387569627675169, "grad_norm": 0.3555551781024421, "learning_rate": 0.0001381660247050088, "loss": 3.0243213176727295, "step": 4073, "token_acc": 0.2968951220076446 }, { "epoch": 2.3881559659923774, "grad_norm": 0.3500690881799634, "learning_rate": 0.00013816448156713323, "loss": 3.077343463897705, "step": 4074, "token_acc": 0.2915393109187217 }, { "epoch": 2.3887423043095866, "grad_norm": 0.30615546925582227, "learning_rate": 0.0001381629377889442, "loss": 3.082634449005127, "step": 4075, "token_acc": 0.29029830905030307 }, { "epoch": 2.3893286426267957, "grad_norm": 0.35025707918105214, "learning_rate": 0.00013816139337045625, "loss": 3.0680062770843506, "step": 4076, "token_acc": 0.2908369482010717 }, { "epoch": 2.389914980944005, "grad_norm": 0.3818079734501489, "learning_rate": 0.00013815984831168384, "loss": 3.0846099853515625, "step": 4077, "token_acc": 0.28951831371312364 }, { "epoch": 2.390501319261214, "grad_norm": 0.3383494767519999, "learning_rate": 0.0001381583026126415, "loss": 3.0626983642578125, "step": 4078, "token_acc": 0.29318502305778327 }, { "epoch": 2.3910876575784226, "grad_norm": 0.2829571379063603, "learning_rate": 0.00013815675627334376, "loss": 3.0563549995422363, "step": 4079, "token_acc": 0.29453555573753 }, { "epoch": 2.3916739958956317, "grad_norm": 0.35026984101785025, "learning_rate": 0.00013815520929380513, "loss": 3.0235755443573, "step": 4080, "token_acc": 0.2954108885798578 }, { "epoch": 2.392260334212841, "grad_norm": 0.3250948930710766, "learning_rate": 0.00013815366167404017, "loss": 3.0632219314575195, "step": 4081, "token_acc": 0.29336491030656703 }, { "epoch": 2.39284667253005, "grad_norm": 0.35948364736233557, "learning_rate": 0.00013815211341406335, "loss": 3.0566649436950684, "step": 4082, "token_acc": 0.29364804712781833 }, { "epoch": 2.393433010847259, "grad_norm": 0.4016717665118695, "learning_rate": 0.0001381505645138893, "loss": 3.0661568641662598, "step": 4083, "token_acc": 0.2920331507175917 }, { "epoch": 2.394019349164468, "grad_norm": 0.4045526919121566, "learning_rate": 0.00013814901497353254, "loss": 3.0538201332092285, "step": 4084, "token_acc": 0.29284649776453053 }, { "epoch": 2.394605687481677, "grad_norm": 0.3478007513464753, "learning_rate": 0.00013814746479300758, "loss": 3.081631660461426, "step": 4085, "token_acc": 0.28880027457628005 }, { "epoch": 2.395192025798886, "grad_norm": 0.3685976875080815, "learning_rate": 0.00013814591397232903, "loss": 3.0492920875549316, "step": 4086, "token_acc": 0.29437172360690667 }, { "epoch": 2.395778364116095, "grad_norm": 0.36336117919587485, "learning_rate": 0.00013814436251151146, "loss": 3.1037445068359375, "step": 4087, "token_acc": 0.2874033555988008 }, { "epoch": 2.396364702433304, "grad_norm": 0.3036246117557848, "learning_rate": 0.00013814281041056941, "loss": 3.021918296813965, "step": 4088, "token_acc": 0.29836310099844904 }, { "epoch": 2.3969510407505132, "grad_norm": 0.3832705588070443, "learning_rate": 0.0001381412576695175, "loss": 3.049736976623535, "step": 4089, "token_acc": 0.2942974455009713 }, { "epoch": 2.397537379067722, "grad_norm": 0.3366002295942935, "learning_rate": 0.00013813970428837029, "loss": 3.0911481380462646, "step": 4090, "token_acc": 0.2875600930203456 }, { "epoch": 2.398123717384931, "grad_norm": 0.27993328149928026, "learning_rate": 0.00013813815026714237, "loss": 3.099857807159424, "step": 4091, "token_acc": 0.2871189203303152 }, { "epoch": 2.39871005570214, "grad_norm": 0.37378110567489703, "learning_rate": 0.00013813659560584835, "loss": 3.066340923309326, "step": 4092, "token_acc": 0.2914561660292629 }, { "epoch": 2.3992963940193492, "grad_norm": 0.29518569623757934, "learning_rate": 0.00013813504030450282, "loss": 3.0006675720214844, "step": 4093, "token_acc": 0.2999981794491013 }, { "epoch": 2.3998827323365584, "grad_norm": 0.41463019821573593, "learning_rate": 0.0001381334843631204, "loss": 3.086228370666504, "step": 4094, "token_acc": 0.28825555216872817 }, { "epoch": 2.4004690706537675, "grad_norm": 0.3181574594553303, "learning_rate": 0.00013813192778171573, "loss": 3.080528736114502, "step": 4095, "token_acc": 0.29000351510082184 }, { "epoch": 2.401055408970976, "grad_norm": 0.3395088721927154, "learning_rate": 0.00013813037056030337, "loss": 3.1221415996551514, "step": 4096, "token_acc": 0.2835148163681646 }, { "epoch": 2.4016417472881852, "grad_norm": 0.34549356259812153, "learning_rate": 0.00013812881269889804, "loss": 3.0727949142456055, "step": 4097, "token_acc": 0.2906959099232162 }, { "epoch": 2.4022280856053944, "grad_norm": 0.3542869715372162, "learning_rate": 0.0001381272541975143, "loss": 3.076119899749756, "step": 4098, "token_acc": 0.29077665782766254 }, { "epoch": 2.4028144239226035, "grad_norm": 0.35754848630682295, "learning_rate": 0.00013812569505616677, "loss": 3.0876035690307617, "step": 4099, "token_acc": 0.2894302962336122 }, { "epoch": 2.403400762239812, "grad_norm": 0.34169924982136807, "learning_rate": 0.00013812413527487016, "loss": 3.0799460411071777, "step": 4100, "token_acc": 0.29011599868010257 }, { "epoch": 2.4039871005570213, "grad_norm": 0.3175183564288105, "learning_rate": 0.0001381225748536391, "loss": 3.095269203186035, "step": 4101, "token_acc": 0.2890923194682037 }, { "epoch": 2.4045734388742304, "grad_norm": 0.31458374342242673, "learning_rate": 0.00013812101379248826, "loss": 3.0431973934173584, "step": 4102, "token_acc": 0.295406758608326 }, { "epoch": 2.4051597771914395, "grad_norm": 0.39569875724962733, "learning_rate": 0.00013811945209143227, "loss": 3.0634074211120605, "step": 4103, "token_acc": 0.2914431758516673 }, { "epoch": 2.4057461155086486, "grad_norm": 0.38632586166988386, "learning_rate": 0.00013811788975048582, "loss": 3.1201887130737305, "step": 4104, "token_acc": 0.2855339129062381 }, { "epoch": 2.4063324538258577, "grad_norm": 0.3362628417464489, "learning_rate": 0.00013811632676966358, "loss": 3.095259189605713, "step": 4105, "token_acc": 0.2882208109276658 }, { "epoch": 2.4069187921430664, "grad_norm": 0.34542818907432, "learning_rate": 0.00013811476314898026, "loss": 3.045332431793213, "step": 4106, "token_acc": 0.2965409988210801 }, { "epoch": 2.4075051304602755, "grad_norm": 0.3366463327741982, "learning_rate": 0.0001381131988884505, "loss": 3.041891098022461, "step": 4107, "token_acc": 0.2958037809545309 }, { "epoch": 2.4080914687774846, "grad_norm": 0.3828885604448513, "learning_rate": 0.00013811163398808903, "loss": 3.033005714416504, "step": 4108, "token_acc": 0.2961006329399703 }, { "epoch": 2.4086778070946937, "grad_norm": 0.3408223977179087, "learning_rate": 0.00013811006844791055, "loss": 3.070070266723633, "step": 4109, "token_acc": 0.29177589888713346 }, { "epoch": 2.409264145411903, "grad_norm": 0.3506097798331669, "learning_rate": 0.00013810850226792973, "loss": 3.051692008972168, "step": 4110, "token_acc": 0.29312885992010723 }, { "epoch": 2.4098504837291115, "grad_norm": 0.4665151771964703, "learning_rate": 0.00013810693544816135, "loss": 3.0511651039123535, "step": 4111, "token_acc": 0.2935248322147651 }, { "epoch": 2.4104368220463206, "grad_norm": 0.38130740465766716, "learning_rate": 0.00013810536798862006, "loss": 3.0713272094726562, "step": 4112, "token_acc": 0.2899629213337984 }, { "epoch": 2.4110231603635297, "grad_norm": 0.33428071514048635, "learning_rate": 0.00013810379988932062, "loss": 3.0229110717773438, "step": 4113, "token_acc": 0.29891578512417644 }, { "epoch": 2.411609498680739, "grad_norm": 0.3781170500494203, "learning_rate": 0.00013810223115027774, "loss": 3.06805419921875, "step": 4114, "token_acc": 0.2924023240258633 }, { "epoch": 2.412195836997948, "grad_norm": 0.3042728702577096, "learning_rate": 0.0001381006617715062, "loss": 3.0791373252868652, "step": 4115, "token_acc": 0.29112361199945475 }, { "epoch": 2.412782175315157, "grad_norm": 0.3659165864863998, "learning_rate": 0.00013809909175302066, "loss": 3.0979957580566406, "step": 4116, "token_acc": 0.28708677766185214 }, { "epoch": 2.4133685136323657, "grad_norm": 0.3526604954457835, "learning_rate": 0.00013809752109483596, "loss": 3.056419610977173, "step": 4117, "token_acc": 0.29475044624649754 }, { "epoch": 2.413954851949575, "grad_norm": 0.3399916853247518, "learning_rate": 0.00013809594979696677, "loss": 3.0682530403137207, "step": 4118, "token_acc": 0.2909325390091659 }, { "epoch": 2.414541190266784, "grad_norm": 0.32480849818618657, "learning_rate": 0.00013809437785942792, "loss": 3.094308853149414, "step": 4119, "token_acc": 0.2887992829157878 }, { "epoch": 2.415127528583993, "grad_norm": 0.38793502750738823, "learning_rate": 0.00013809280528223416, "loss": 3.0941684246063232, "step": 4120, "token_acc": 0.2857783805279956 }, { "epoch": 2.415713866901202, "grad_norm": 0.34947814006673344, "learning_rate": 0.00013809123206540022, "loss": 3.06655216217041, "step": 4121, "token_acc": 0.29107961381285236 }, { "epoch": 2.416300205218411, "grad_norm": 0.3408482759064972, "learning_rate": 0.00013808965820894092, "loss": 3.0576772689819336, "step": 4122, "token_acc": 0.29238496102850015 }, { "epoch": 2.41688654353562, "grad_norm": 0.34873401508744295, "learning_rate": 0.00013808808371287105, "loss": 3.0357532501220703, "step": 4123, "token_acc": 0.2948350492903997 }, { "epoch": 2.417472881852829, "grad_norm": 0.3484309951695064, "learning_rate": 0.00013808650857720535, "loss": 3.0974361896514893, "step": 4124, "token_acc": 0.28813157481133006 }, { "epoch": 2.418059220170038, "grad_norm": 0.34779541211803133, "learning_rate": 0.00013808493280195868, "loss": 3.096229076385498, "step": 4125, "token_acc": 0.28604804334588474 }, { "epoch": 2.4186455584872473, "grad_norm": 0.3873329635330523, "learning_rate": 0.00013808335638714581, "loss": 3.074026107788086, "step": 4126, "token_acc": 0.29147395964774797 }, { "epoch": 2.4192318968044564, "grad_norm": 0.32389180456593564, "learning_rate": 0.00013808177933278154, "loss": 3.0681073665618896, "step": 4127, "token_acc": 0.2900738277419066 }, { "epoch": 2.419818235121665, "grad_norm": 0.318698966441261, "learning_rate": 0.00013808020163888067, "loss": 3.0403025150299072, "step": 4128, "token_acc": 0.2947781245478784 }, { "epoch": 2.420404573438874, "grad_norm": 0.3514803052091189, "learning_rate": 0.00013807862330545808, "loss": 3.067467212677002, "step": 4129, "token_acc": 0.29139676485576343 }, { "epoch": 2.4209909117560833, "grad_norm": 0.34673503270889744, "learning_rate": 0.00013807704433252855, "loss": 3.0547804832458496, "step": 4130, "token_acc": 0.29271940122178275 }, { "epoch": 2.4215772500732924, "grad_norm": 0.34038758080968573, "learning_rate": 0.00013807546472010694, "loss": 3.048649787902832, "step": 4131, "token_acc": 0.2934819118631204 }, { "epoch": 2.4221635883905015, "grad_norm": 0.3593713278239768, "learning_rate": 0.00013807388446820804, "loss": 3.041304111480713, "step": 4132, "token_acc": 0.29546170691084145 }, { "epoch": 2.42274992670771, "grad_norm": 0.2914221756299142, "learning_rate": 0.00013807230357684675, "loss": 3.077401638031006, "step": 4133, "token_acc": 0.2917599564426384 }, { "epoch": 2.4233362650249193, "grad_norm": 0.3841534764097119, "learning_rate": 0.0001380707220460379, "loss": 3.0268115997314453, "step": 4134, "token_acc": 0.2964310276635034 }, { "epoch": 2.4239226033421284, "grad_norm": 0.34832612159022314, "learning_rate": 0.00013806913987579633, "loss": 3.0552332401275635, "step": 4135, "token_acc": 0.29305657746969754 }, { "epoch": 2.4245089416593375, "grad_norm": 0.37900066728091414, "learning_rate": 0.00013806755706613692, "loss": 3.013362407684326, "step": 4136, "token_acc": 0.2992748307214118 }, { "epoch": 2.4250952799765466, "grad_norm": 0.3743038784304707, "learning_rate": 0.00013806597361707454, "loss": 3.106978416442871, "step": 4137, "token_acc": 0.2859262083789297 }, { "epoch": 2.4256816182937557, "grad_norm": 0.2796962271559677, "learning_rate": 0.00013806438952862404, "loss": 3.0778822898864746, "step": 4138, "token_acc": 0.290056378034085 }, { "epoch": 2.4262679566109644, "grad_norm": 0.35445996165094373, "learning_rate": 0.00013806280480080032, "loss": 3.062171697616577, "step": 4139, "token_acc": 0.29274304870834156 }, { "epoch": 2.4268542949281735, "grad_norm": 0.3197776807815522, "learning_rate": 0.00013806121943361828, "loss": 3.0503427982330322, "step": 4140, "token_acc": 0.2937312371209506 }, { "epoch": 2.4274406332453826, "grad_norm": 0.31685236906073194, "learning_rate": 0.0001380596334270928, "loss": 3.083651542663574, "step": 4141, "token_acc": 0.2886351545977086 }, { "epoch": 2.4280269715625917, "grad_norm": 0.3268678662638589, "learning_rate": 0.00013805804678123875, "loss": 3.045289993286133, "step": 4142, "token_acc": 0.2936968160773866 }, { "epoch": 2.4286133098798004, "grad_norm": 0.32024777989946906, "learning_rate": 0.00013805645949607108, "loss": 3.046217918395996, "step": 4143, "token_acc": 0.294906431559688 }, { "epoch": 2.4291996481970095, "grad_norm": 0.35472478216331943, "learning_rate": 0.00013805487157160469, "loss": 3.0657083988189697, "step": 4144, "token_acc": 0.29113440790428124 }, { "epoch": 2.4297859865142186, "grad_norm": 0.35457093820853947, "learning_rate": 0.00013805328300785444, "loss": 3.0784337520599365, "step": 4145, "token_acc": 0.2908242743236856 }, { "epoch": 2.4303723248314277, "grad_norm": 0.31114379962915933, "learning_rate": 0.00013805169380483534, "loss": 3.104710102081299, "step": 4146, "token_acc": 0.2857458625297893 }, { "epoch": 2.430958663148637, "grad_norm": 0.3363819020622537, "learning_rate": 0.00013805010396256227, "loss": 3.072657585144043, "step": 4147, "token_acc": 0.2898475601971248 }, { "epoch": 2.431545001465846, "grad_norm": 0.32003371158401184, "learning_rate": 0.00013804851348105018, "loss": 3.0489306449890137, "step": 4148, "token_acc": 0.29531111430531864 }, { "epoch": 2.432131339783055, "grad_norm": 0.35150323795537414, "learning_rate": 0.00013804692236031398, "loss": 3.1220552921295166, "step": 4149, "token_acc": 0.28486208911165095 }, { "epoch": 2.4327176781002637, "grad_norm": 0.3398683657955479, "learning_rate": 0.00013804533060036867, "loss": 3.079378604888916, "step": 4150, "token_acc": 0.2912951190093701 }, { "epoch": 2.433304016417473, "grad_norm": 0.3169236722930002, "learning_rate": 0.00013804373820122914, "loss": 3.1077394485473633, "step": 4151, "token_acc": 0.28583589574261714 }, { "epoch": 2.433890354734682, "grad_norm": 0.30439378510381315, "learning_rate": 0.0001380421451629104, "loss": 3.065969705581665, "step": 4152, "token_acc": 0.2930793322989956 }, { "epoch": 2.434476693051891, "grad_norm": 0.3326924502404188, "learning_rate": 0.00013804055148542737, "loss": 3.0168094635009766, "step": 4153, "token_acc": 0.29893113253050574 }, { "epoch": 2.4350630313690997, "grad_norm": 0.31292841762111473, "learning_rate": 0.00013803895716879507, "loss": 3.0405845642089844, "step": 4154, "token_acc": 0.2942930628622149 }, { "epoch": 2.435649369686309, "grad_norm": 0.2991690033583295, "learning_rate": 0.00013803736221302846, "loss": 3.082681179046631, "step": 4155, "token_acc": 0.2909080648498741 }, { "epoch": 2.436235708003518, "grad_norm": 0.3035620146367998, "learning_rate": 0.00013803576661814248, "loss": 3.0515425205230713, "step": 4156, "token_acc": 0.29372080018760033 }, { "epoch": 2.436822046320727, "grad_norm": 0.32100518437957365, "learning_rate": 0.00013803417038415217, "loss": 3.1041226387023926, "step": 4157, "token_acc": 0.28512866570551454 }, { "epoch": 2.437408384637936, "grad_norm": 0.32976823122775684, "learning_rate": 0.0001380325735110725, "loss": 3.061105251312256, "step": 4158, "token_acc": 0.29313765953393867 }, { "epoch": 2.4379947229551453, "grad_norm": 0.34293108290177926, "learning_rate": 0.0001380309759989185, "loss": 3.108186721801758, "step": 4159, "token_acc": 0.28545691384870736 }, { "epoch": 2.438581061272354, "grad_norm": 0.3534184961986147, "learning_rate": 0.0001380293778477051, "loss": 3.0628538131713867, "step": 4160, "token_acc": 0.2912351482300944 }, { "epoch": 2.439167399589563, "grad_norm": 0.3677103532815081, "learning_rate": 0.00013802777905744742, "loss": 3.053551435470581, "step": 4161, "token_acc": 0.29412439243689814 }, { "epoch": 2.439753737906772, "grad_norm": 0.37126677113923023, "learning_rate": 0.0001380261796281604, "loss": 3.0701839923858643, "step": 4162, "token_acc": 0.29078938947907335 }, { "epoch": 2.4403400762239813, "grad_norm": 0.42593488210705166, "learning_rate": 0.00013802457955985908, "loss": 3.0736584663391113, "step": 4163, "token_acc": 0.2903134353583871 }, { "epoch": 2.4409264145411904, "grad_norm": 0.3682124452188901, "learning_rate": 0.0001380229788525585, "loss": 3.065493583679199, "step": 4164, "token_acc": 0.29104621635347894 }, { "epoch": 2.441512752858399, "grad_norm": 0.338741231952482, "learning_rate": 0.00013802137750627372, "loss": 3.0811452865600586, "step": 4165, "token_acc": 0.291322581340693 }, { "epoch": 2.442099091175608, "grad_norm": 0.38782716445304827, "learning_rate": 0.00013801977552101977, "loss": 3.059845447540283, "step": 4166, "token_acc": 0.2933271547729379 }, { "epoch": 2.4426854294928173, "grad_norm": 0.29681801080742914, "learning_rate": 0.00013801817289681165, "loss": 3.1183009147644043, "step": 4167, "token_acc": 0.28458395821482135 }, { "epoch": 2.4432717678100264, "grad_norm": 0.3670902861080377, "learning_rate": 0.00013801656963366446, "loss": 3.0657567977905273, "step": 4168, "token_acc": 0.2926429497720558 }, { "epoch": 2.4438581061272355, "grad_norm": 0.3713852778755407, "learning_rate": 0.00013801496573159327, "loss": 3.1093242168426514, "step": 4169, "token_acc": 0.28587765033928 }, { "epoch": 2.4444444444444446, "grad_norm": 0.37071487779933704, "learning_rate": 0.0001380133611906131, "loss": 3.0888121128082275, "step": 4170, "token_acc": 0.2895343922349478 }, { "epoch": 2.4450307827616533, "grad_norm": 0.34995738420647576, "learning_rate": 0.00013801175601073907, "loss": 3.0730843544006348, "step": 4171, "token_acc": 0.29114552620961487 }, { "epoch": 2.4456171210788624, "grad_norm": 0.32570562175495255, "learning_rate": 0.00013801015019198625, "loss": 3.0866737365722656, "step": 4172, "token_acc": 0.2884402972899968 }, { "epoch": 2.4462034593960715, "grad_norm": 0.2835373071013756, "learning_rate": 0.0001380085437343697, "loss": 3.082204818725586, "step": 4173, "token_acc": 0.28977380800387714 }, { "epoch": 2.4467897977132806, "grad_norm": 0.35010643052180407, "learning_rate": 0.00013800693663790453, "loss": 3.092050790786743, "step": 4174, "token_acc": 0.28818902764248816 }, { "epoch": 2.4473761360304898, "grad_norm": 0.2870860082419632, "learning_rate": 0.0001380053289026058, "loss": 3.0260977745056152, "step": 4175, "token_acc": 0.29896858303394214 }, { "epoch": 2.4479624743476984, "grad_norm": 0.34355627229648456, "learning_rate": 0.00013800372052848867, "loss": 3.1050281524658203, "step": 4176, "token_acc": 0.28405991671328235 }, { "epoch": 2.4485488126649075, "grad_norm": 0.3586457357644718, "learning_rate": 0.00013800211151556823, "loss": 3.0616133213043213, "step": 4177, "token_acc": 0.2927519312582496 }, { "epoch": 2.4491351509821166, "grad_norm": 0.3453333050459176, "learning_rate": 0.00013800050186385955, "loss": 3.0728774070739746, "step": 4178, "token_acc": 0.2904125880523282 }, { "epoch": 2.4497214892993258, "grad_norm": 0.3650670633350832, "learning_rate": 0.00013799889157337783, "loss": 3.056148052215576, "step": 4179, "token_acc": 0.2930542741914226 }, { "epoch": 2.450307827616535, "grad_norm": 0.3082911038262733, "learning_rate": 0.00013799728064413814, "loss": 3.043065071105957, "step": 4180, "token_acc": 0.2955905180998006 }, { "epoch": 2.450894165933744, "grad_norm": 0.3087389667276047, "learning_rate": 0.00013799566907615561, "loss": 3.0644872188568115, "step": 4181, "token_acc": 0.2908530888045474 }, { "epoch": 2.4514805042509527, "grad_norm": 0.37281592886865744, "learning_rate": 0.0001379940568694454, "loss": 3.0889673233032227, "step": 4182, "token_acc": 0.28861430524441983 }, { "epoch": 2.4520668425681618, "grad_norm": 0.3629402595002331, "learning_rate": 0.00013799244402402266, "loss": 3.100498676300049, "step": 4183, "token_acc": 0.28660604494153213 }, { "epoch": 2.452653180885371, "grad_norm": 0.2949962127882119, "learning_rate": 0.00013799083053990253, "loss": 3.0804247856140137, "step": 4184, "token_acc": 0.2911146412069702 }, { "epoch": 2.45323951920258, "grad_norm": 0.30235329723241045, "learning_rate": 0.00013798921641710015, "loss": 3.0473012924194336, "step": 4185, "token_acc": 0.2922458091670044 }, { "epoch": 2.453825857519789, "grad_norm": 0.3237300262754433, "learning_rate": 0.0001379876016556307, "loss": 3.003633737564087, "step": 4186, "token_acc": 0.3018541302656411 }, { "epoch": 2.4544121958369978, "grad_norm": 0.3355090260537687, "learning_rate": 0.00013798598625550936, "loss": 3.0517759323120117, "step": 4187, "token_acc": 0.2939339760203214 }, { "epoch": 2.454998534154207, "grad_norm": 0.3345100329267295, "learning_rate": 0.00013798437021675128, "loss": 3.115407943725586, "step": 4188, "token_acc": 0.28513363392798796 }, { "epoch": 2.455584872471416, "grad_norm": 0.33651046669721235, "learning_rate": 0.00013798275353937167, "loss": 3.0267457962036133, "step": 4189, "token_acc": 0.297492838492154 }, { "epoch": 2.456171210788625, "grad_norm": 0.3347140999805152, "learning_rate": 0.00013798113622338567, "loss": 3.089869737625122, "step": 4190, "token_acc": 0.287722173687832 }, { "epoch": 2.456757549105834, "grad_norm": 0.3191418236261717, "learning_rate": 0.00013797951826880855, "loss": 2.9955966472625732, "step": 4191, "token_acc": 0.2995206632983547 }, { "epoch": 2.4573438874230433, "grad_norm": 0.3282229446733897, "learning_rate": 0.0001379778996756554, "loss": 3.0222959518432617, "step": 4192, "token_acc": 0.2986648738930849 }, { "epoch": 2.457930225740252, "grad_norm": 0.3467069275934962, "learning_rate": 0.00013797628044394153, "loss": 3.0708184242248535, "step": 4193, "token_acc": 0.29112522506278904 }, { "epoch": 2.458516564057461, "grad_norm": 0.38295441068961344, "learning_rate": 0.0001379746605736821, "loss": 3.0587100982666016, "step": 4194, "token_acc": 0.2913565488423632 }, { "epoch": 2.45910290237467, "grad_norm": 0.33147888650912616, "learning_rate": 0.0001379730400648923, "loss": 3.0722427368164062, "step": 4195, "token_acc": 0.2930065703022339 }, { "epoch": 2.4596892406918793, "grad_norm": 0.3835019517146763, "learning_rate": 0.00013797141891758738, "loss": 3.0927271842956543, "step": 4196, "token_acc": 0.28814234229693286 }, { "epoch": 2.460275579009088, "grad_norm": 0.3781963606717039, "learning_rate": 0.00013796979713178259, "loss": 3.069444179534912, "step": 4197, "token_acc": 0.2915625394251908 }, { "epoch": 2.460861917326297, "grad_norm": 0.4383793149311926, "learning_rate": 0.00013796817470749316, "loss": 3.0476956367492676, "step": 4198, "token_acc": 0.2950219135255517 }, { "epoch": 2.4614482556435062, "grad_norm": 0.3301844083927975, "learning_rate": 0.00013796655164473431, "loss": 3.073068141937256, "step": 4199, "token_acc": 0.28976582082442937 }, { "epoch": 2.4620345939607153, "grad_norm": 0.3167805654552109, "learning_rate": 0.00013796492794352128, "loss": 3.021474838256836, "step": 4200, "token_acc": 0.2974620437821783 }, { "epoch": 2.4626209322779244, "grad_norm": 0.3595840095340389, "learning_rate": 0.00013796330360386935, "loss": 3.061807155609131, "step": 4201, "token_acc": 0.2928636238327075 }, { "epoch": 2.4632072705951336, "grad_norm": 0.3277382270771204, "learning_rate": 0.00013796167862579375, "loss": 3.113220453262329, "step": 4202, "token_acc": 0.28578664035046497 }, { "epoch": 2.4637936089123427, "grad_norm": 0.3726005889623694, "learning_rate": 0.00013796005300930977, "loss": 3.129694700241089, "step": 4203, "token_acc": 0.28386860262637637 }, { "epoch": 2.4643799472295513, "grad_norm": 0.344278735875687, "learning_rate": 0.00013795842675443266, "loss": 3.055192708969116, "step": 4204, "token_acc": 0.2935059750352647 }, { "epoch": 2.4649662855467604, "grad_norm": 0.2731972046043155, "learning_rate": 0.0001379567998611777, "loss": 3.0766844749450684, "step": 4205, "token_acc": 0.28991060025542786 }, { "epoch": 2.4655526238639696, "grad_norm": 0.33538039210407306, "learning_rate": 0.0001379551723295602, "loss": 3.046630859375, "step": 4206, "token_acc": 0.29371758971167805 }, { "epoch": 2.4661389621811787, "grad_norm": 0.32186913923262134, "learning_rate": 0.00013795354415959543, "loss": 3.061422824859619, "step": 4207, "token_acc": 0.2916192995073815 }, { "epoch": 2.4667253004983873, "grad_norm": 0.325554838692534, "learning_rate": 0.00013795191535129867, "loss": 3.100823163986206, "step": 4208, "token_acc": 0.28755966331591776 }, { "epoch": 2.4673116388155965, "grad_norm": 0.30385234679839923, "learning_rate": 0.00013795028590468523, "loss": 3.0388662815093994, "step": 4209, "token_acc": 0.2945364843938463 }, { "epoch": 2.4678979771328056, "grad_norm": 0.2899638734538185, "learning_rate": 0.0001379486558197704, "loss": 3.061492919921875, "step": 4210, "token_acc": 0.29130047521224944 }, { "epoch": 2.4684843154500147, "grad_norm": 0.3441627767640145, "learning_rate": 0.00013794702509656954, "loss": 3.083706855773926, "step": 4211, "token_acc": 0.28896107270931864 }, { "epoch": 2.469070653767224, "grad_norm": 0.34356333370041703, "learning_rate": 0.00013794539373509793, "loss": 3.121530532836914, "step": 4212, "token_acc": 0.2831282601188637 }, { "epoch": 2.469656992084433, "grad_norm": 0.37041470260947146, "learning_rate": 0.0001379437617353709, "loss": 3.0889649391174316, "step": 4213, "token_acc": 0.28678788374195985 }, { "epoch": 2.4702433304016416, "grad_norm": 0.31331731876694996, "learning_rate": 0.00013794212909740378, "loss": 3.0927577018737793, "step": 4214, "token_acc": 0.28719045872914956 }, { "epoch": 2.4708296687188507, "grad_norm": 0.4463708742821819, "learning_rate": 0.0001379404958212119, "loss": 3.0862932205200195, "step": 4215, "token_acc": 0.2897759689627629 }, { "epoch": 2.47141600703606, "grad_norm": 0.4261334077994018, "learning_rate": 0.00013793886190681065, "loss": 3.037044048309326, "step": 4216, "token_acc": 0.2944011487500816 }, { "epoch": 2.472002345353269, "grad_norm": 0.40796280358072695, "learning_rate": 0.00013793722735421532, "loss": 3.0571494102478027, "step": 4217, "token_acc": 0.2939505625805196 }, { "epoch": 2.472588683670478, "grad_norm": 0.30887898669813973, "learning_rate": 0.00013793559216344127, "loss": 3.0305864810943604, "step": 4218, "token_acc": 0.2959044953141475 }, { "epoch": 2.4731750219876867, "grad_norm": 0.38995289323023974, "learning_rate": 0.0001379339563345039, "loss": 3.0861611366271973, "step": 4219, "token_acc": 0.2879450013474881 }, { "epoch": 2.473761360304896, "grad_norm": 0.36253828854885484, "learning_rate": 0.00013793231986741853, "loss": 3.0524117946624756, "step": 4220, "token_acc": 0.2953382423054511 }, { "epoch": 2.474347698622105, "grad_norm": 0.34332029062738817, "learning_rate": 0.0001379306827622006, "loss": 3.061717987060547, "step": 4221, "token_acc": 0.29321430066125387 }, { "epoch": 2.474934036939314, "grad_norm": 0.4183935803845294, "learning_rate": 0.00013792904501886539, "loss": 3.054370880126953, "step": 4222, "token_acc": 0.29172475814433646 }, { "epoch": 2.475520375256523, "grad_norm": 0.39349046268948934, "learning_rate": 0.00013792740663742836, "loss": 3.078822374343872, "step": 4223, "token_acc": 0.28989740094487537 }, { "epoch": 2.4761067135737322, "grad_norm": 0.38669689335923935, "learning_rate": 0.00013792576761790487, "loss": 3.0848100185394287, "step": 4224, "token_acc": 0.29057060071638796 }, { "epoch": 2.476693051890941, "grad_norm": 0.41822095385725067, "learning_rate": 0.00013792412796031033, "loss": 3.0765552520751953, "step": 4225, "token_acc": 0.2908542049172496 }, { "epoch": 2.47727939020815, "grad_norm": 0.3324041443033771, "learning_rate": 0.00013792248766466013, "loss": 3.041658878326416, "step": 4226, "token_acc": 0.2961091843321947 }, { "epoch": 2.477865728525359, "grad_norm": 0.3819159779147814, "learning_rate": 0.0001379208467309697, "loss": 3.069956064224243, "step": 4227, "token_acc": 0.29074014182830726 }, { "epoch": 2.4784520668425682, "grad_norm": 0.2911843360175998, "learning_rate": 0.00013791920515925443, "loss": 3.0632269382476807, "step": 4228, "token_acc": 0.29243242964270416 }, { "epoch": 2.4790384051597774, "grad_norm": 0.3641360918598509, "learning_rate": 0.00013791756294952975, "loss": 3.089081287384033, "step": 4229, "token_acc": 0.2892145005573902 }, { "epoch": 2.479624743476986, "grad_norm": 0.31224454446394195, "learning_rate": 0.0001379159201018111, "loss": 2.983107566833496, "step": 4230, "token_acc": 0.30269377030335065 }, { "epoch": 2.480211081794195, "grad_norm": 0.33907558095597723, "learning_rate": 0.00013791427661611388, "loss": 3.045147180557251, "step": 4231, "token_acc": 0.29418266915872365 }, { "epoch": 2.4807974201114043, "grad_norm": 0.29503397085956434, "learning_rate": 0.00013791263249245356, "loss": 3.080207586288452, "step": 4232, "token_acc": 0.2906566606114851 }, { "epoch": 2.4813837584286134, "grad_norm": 0.40526288667606025, "learning_rate": 0.00013791098773084556, "loss": 3.0536293983459473, "step": 4233, "token_acc": 0.2935106608885778 }, { "epoch": 2.4819700967458225, "grad_norm": 0.32034152184078535, "learning_rate": 0.00013790934233130534, "loss": 3.0912976264953613, "step": 4234, "token_acc": 0.2869190322153884 }, { "epoch": 2.4825564350630316, "grad_norm": 0.3282582083662335, "learning_rate": 0.00013790769629384836, "loss": 3.055314779281616, "step": 4235, "token_acc": 0.2945630239260668 }, { "epoch": 2.4831427733802403, "grad_norm": 0.30870432327588987, "learning_rate": 0.0001379060496184901, "loss": 3.088980197906494, "step": 4236, "token_acc": 0.28895415817392295 }, { "epoch": 2.4837291116974494, "grad_norm": 0.3173176333099791, "learning_rate": 0.00013790440230524597, "loss": 3.051692485809326, "step": 4237, "token_acc": 0.2938765194177441 }, { "epoch": 2.4843154500146585, "grad_norm": 0.31368686713275207, "learning_rate": 0.0001379027543541315, "loss": 3.049684524536133, "step": 4238, "token_acc": 0.2953380117975514 }, { "epoch": 2.4849017883318676, "grad_norm": 0.3149809801837422, "learning_rate": 0.00013790110576516218, "loss": 3.0769524574279785, "step": 4239, "token_acc": 0.2891963342448858 }, { "epoch": 2.4854881266490767, "grad_norm": 0.3462843572435515, "learning_rate": 0.00013789945653835346, "loss": 3.114223003387451, "step": 4240, "token_acc": 0.2855528626980354 }, { "epoch": 2.4860744649662854, "grad_norm": 0.28886109673806776, "learning_rate": 0.00013789780667372082, "loss": 3.0680408477783203, "step": 4241, "token_acc": 0.29057683042781146 }, { "epoch": 2.4866608032834945, "grad_norm": 0.349299940684079, "learning_rate": 0.00013789615617127977, "loss": 3.0425429344177246, "step": 4242, "token_acc": 0.296533724071615 }, { "epoch": 2.4872471416007036, "grad_norm": 0.31346690884541845, "learning_rate": 0.00013789450503104585, "loss": 3.055014133453369, "step": 4243, "token_acc": 0.29257387398356083 }, { "epoch": 2.4878334799179127, "grad_norm": 0.3650110410265722, "learning_rate": 0.00013789285325303453, "loss": 3.0652599334716797, "step": 4244, "token_acc": 0.2924835504680499 }, { "epoch": 2.488419818235122, "grad_norm": 0.3465783492132816, "learning_rate": 0.00013789120083726133, "loss": 3.0709056854248047, "step": 4245, "token_acc": 0.29106642147532985 }, { "epoch": 2.489006156552331, "grad_norm": 0.3420653267437592, "learning_rate": 0.00013788954778374182, "loss": 3.0507068634033203, "step": 4246, "token_acc": 0.29285746173218175 }, { "epoch": 2.4895924948695396, "grad_norm": 0.3499246183981242, "learning_rate": 0.00013788789409249148, "loss": 3.08219838142395, "step": 4247, "token_acc": 0.2891330973883964 }, { "epoch": 2.4901788331867487, "grad_norm": 0.3282782280895836, "learning_rate": 0.00013788623976352583, "loss": 3.064849376678467, "step": 4248, "token_acc": 0.2926813604947059 }, { "epoch": 2.490765171503958, "grad_norm": 0.34318138967780115, "learning_rate": 0.00013788458479686044, "loss": 3.058095932006836, "step": 4249, "token_acc": 0.2913211778210844 }, { "epoch": 2.491351509821167, "grad_norm": 0.30523612056741006, "learning_rate": 0.00013788292919251086, "loss": 3.0718131065368652, "step": 4250, "token_acc": 0.29100949209334837 }, { "epoch": 2.4919378481383756, "grad_norm": 0.3435281735714235, "learning_rate": 0.00013788127295049266, "loss": 3.0578694343566895, "step": 4251, "token_acc": 0.2934601132060105 }, { "epoch": 2.4925241864555847, "grad_norm": 0.3066973459152552, "learning_rate": 0.00013787961607082135, "loss": 3.060603618621826, "step": 4252, "token_acc": 0.29185448265582514 }, { "epoch": 2.493110524772794, "grad_norm": 0.3260400895731516, "learning_rate": 0.0001378779585535125, "loss": 3.0854859352111816, "step": 4253, "token_acc": 0.28763019131041095 }, { "epoch": 2.493696863090003, "grad_norm": 0.34171137484417746, "learning_rate": 0.0001378763003985817, "loss": 3.055359125137329, "step": 4254, "token_acc": 0.2943689986650297 }, { "epoch": 2.494283201407212, "grad_norm": 0.3255718948641697, "learning_rate": 0.00013787464160604454, "loss": 3.0776758193969727, "step": 4255, "token_acc": 0.2912002894801509 }, { "epoch": 2.494869539724421, "grad_norm": 0.40041071378112875, "learning_rate": 0.0001378729821759166, "loss": 3.0606276988983154, "step": 4256, "token_acc": 0.29183415865618023 }, { "epoch": 2.49545587804163, "grad_norm": 0.33064984618074594, "learning_rate": 0.00013787132210821342, "loss": 3.0584356784820557, "step": 4257, "token_acc": 0.29300502640453013 }, { "epoch": 2.496042216358839, "grad_norm": 0.3348112213850654, "learning_rate": 0.00013786966140295063, "loss": 3.060304641723633, "step": 4258, "token_acc": 0.29230683942091423 }, { "epoch": 2.496628554676048, "grad_norm": 0.3233111763876964, "learning_rate": 0.00013786800006014384, "loss": 3.010420322418213, "step": 4259, "token_acc": 0.29789607905642335 }, { "epoch": 2.497214892993257, "grad_norm": 0.3513053485669945, "learning_rate": 0.00013786633807980864, "loss": 3.024261951446533, "step": 4260, "token_acc": 0.2988729363715654 }, { "epoch": 2.4978012313104663, "grad_norm": 0.33377933403005405, "learning_rate": 0.0001378646754619606, "loss": 3.1106936931610107, "step": 4261, "token_acc": 0.2863433497307132 }, { "epoch": 2.498387569627675, "grad_norm": 0.32618028403367255, "learning_rate": 0.00013786301220661544, "loss": 3.039785861968994, "step": 4262, "token_acc": 0.29525250714443896 }, { "epoch": 2.498973907944884, "grad_norm": 0.32642125152802226, "learning_rate": 0.0001378613483137887, "loss": 3.0649285316467285, "step": 4263, "token_acc": 0.2920402547809539 }, { "epoch": 2.499560246262093, "grad_norm": 0.3279319853392137, "learning_rate": 0.00013785968378349608, "loss": 3.109575033187866, "step": 4264, "token_acc": 0.2874243555102476 }, { "epoch": 2.5001465845793023, "grad_norm": 0.34062724187402776, "learning_rate": 0.00013785801861575312, "loss": 3.059476613998413, "step": 4265, "token_acc": 0.2932670943639223 }, { "epoch": 2.5007329228965114, "grad_norm": 0.35822122953879704, "learning_rate": 0.00013785635281057552, "loss": 3.057438373565674, "step": 4266, "token_acc": 0.29250561493031696 }, { "epoch": 2.5013192612137205, "grad_norm": 0.2878873626445056, "learning_rate": 0.00013785468636797894, "loss": 3.0433664321899414, "step": 4267, "token_acc": 0.29565203337876494 }, { "epoch": 2.5019055995309296, "grad_norm": 0.34055077064709366, "learning_rate": 0.00013785301928797902, "loss": 3.068225145339966, "step": 4268, "token_acc": 0.29149898173607247 }, { "epoch": 2.5024919378481383, "grad_norm": 0.2897239315356686, "learning_rate": 0.0001378513515705914, "loss": 3.0642426013946533, "step": 4269, "token_acc": 0.2906341777380225 }, { "epoch": 2.5030782761653474, "grad_norm": 0.3189977555595917, "learning_rate": 0.00013784968321583178, "loss": 3.09875750541687, "step": 4270, "token_acc": 0.28753457743280625 }, { "epoch": 2.5036646144825565, "grad_norm": 0.34650821922634417, "learning_rate": 0.00013784801422371583, "loss": 3.0858874320983887, "step": 4271, "token_acc": 0.2894563405059801 }, { "epoch": 2.5042509527997656, "grad_norm": 0.33144367372316236, "learning_rate": 0.00013784634459425917, "loss": 3.0393478870391846, "step": 4272, "token_acc": 0.2951814964532814 }, { "epoch": 2.5048372911169743, "grad_norm": 0.34466773472240236, "learning_rate": 0.00013784467432747757, "loss": 3.0754594802856445, "step": 4273, "token_acc": 0.28756313289317587 }, { "epoch": 2.5054236294341834, "grad_norm": 0.3910145280389394, "learning_rate": 0.00013784300342338662, "loss": 3.0994250774383545, "step": 4274, "token_acc": 0.28978384818313335 }, { "epoch": 2.5060099677513925, "grad_norm": 0.3157179757155058, "learning_rate": 0.00013784133188200214, "loss": 3.048058032989502, "step": 4275, "token_acc": 0.2947905013864074 }, { "epoch": 2.5065963060686016, "grad_norm": 0.3567279973630884, "learning_rate": 0.0001378396597033397, "loss": 3.0974514484405518, "step": 4276, "token_acc": 0.28893859998024607 }, { "epoch": 2.5071826443858107, "grad_norm": 0.43479296556506014, "learning_rate": 0.0001378379868874151, "loss": 3.068594455718994, "step": 4277, "token_acc": 0.2904294314801297 }, { "epoch": 2.50776898270302, "grad_norm": 0.379952031943408, "learning_rate": 0.00013783631343424404, "loss": 3.087218761444092, "step": 4278, "token_acc": 0.28977908627195903 }, { "epoch": 2.5083553210202285, "grad_norm": 0.323089690761266, "learning_rate": 0.00013783463934384223, "loss": 3.067884922027588, "step": 4279, "token_acc": 0.2924608166591504 }, { "epoch": 2.5089416593374376, "grad_norm": 0.38301383582038023, "learning_rate": 0.00013783296461622536, "loss": 3.068693161010742, "step": 4280, "token_acc": 0.2913203074416759 }, { "epoch": 2.5095279976546467, "grad_norm": 0.3335160923088287, "learning_rate": 0.00013783128925140922, "loss": 3.076441764831543, "step": 4281, "token_acc": 0.29036081954779547 }, { "epoch": 2.510114335971856, "grad_norm": 0.32782932537831677, "learning_rate": 0.00013782961324940952, "loss": 3.0684382915496826, "step": 4282, "token_acc": 0.2917029886245009 }, { "epoch": 2.5107006742890645, "grad_norm": 0.3482103885560477, "learning_rate": 0.00013782793661024198, "loss": 3.0610227584838867, "step": 4283, "token_acc": 0.29191147460573036 }, { "epoch": 2.5112870126062736, "grad_norm": 0.28217670080206, "learning_rate": 0.00013782625933392238, "loss": 3.080874443054199, "step": 4284, "token_acc": 0.28883241914324376 }, { "epoch": 2.5118733509234827, "grad_norm": 0.3524378323998762, "learning_rate": 0.0001378245814204665, "loss": 3.096837043762207, "step": 4285, "token_acc": 0.2887206501310321 }, { "epoch": 2.512459689240692, "grad_norm": 0.29302820360266363, "learning_rate": 0.00013782290286989005, "loss": 3.076169013977051, "step": 4286, "token_acc": 0.2922551717671974 }, { "epoch": 2.513046027557901, "grad_norm": 0.3516223373681763, "learning_rate": 0.00013782122368220882, "loss": 3.0763235092163086, "step": 4287, "token_acc": 0.2910851470418489 }, { "epoch": 2.51363236587511, "grad_norm": 0.3276716543671828, "learning_rate": 0.0001378195438574386, "loss": 3.082284688949585, "step": 4288, "token_acc": 0.28893827226602764 }, { "epoch": 2.514218704192319, "grad_norm": 0.3058665190306901, "learning_rate": 0.00013781786339559513, "loss": 3.071730136871338, "step": 4289, "token_acc": 0.29066122692671276 }, { "epoch": 2.514805042509528, "grad_norm": 0.30779023975600356, "learning_rate": 0.00013781618229669423, "loss": 3.0408101081848145, "step": 4290, "token_acc": 0.2950144008919262 }, { "epoch": 2.515391380826737, "grad_norm": 0.3202775836283203, "learning_rate": 0.00013781450056075167, "loss": 3.051098108291626, "step": 4291, "token_acc": 0.2926012300491859 }, { "epoch": 2.515977719143946, "grad_norm": 0.3006103798600977, "learning_rate": 0.0001378128181877833, "loss": 3.074449062347412, "step": 4292, "token_acc": 0.2913724072017152 }, { "epoch": 2.516564057461155, "grad_norm": 0.2997866337920747, "learning_rate": 0.00013781113517780482, "loss": 3.071866035461426, "step": 4293, "token_acc": 0.2908849892036599 }, { "epoch": 2.517150395778364, "grad_norm": 0.3612803089453866, "learning_rate": 0.00013780945153083214, "loss": 3.0768675804138184, "step": 4294, "token_acc": 0.2905536803683768 }, { "epoch": 2.517736734095573, "grad_norm": 0.3492051065809907, "learning_rate": 0.00013780776724688104, "loss": 3.0407323837280273, "step": 4295, "token_acc": 0.2955518063254867 }, { "epoch": 2.518323072412782, "grad_norm": 0.32233859543079346, "learning_rate": 0.00013780608232596733, "loss": 3.0610733032226562, "step": 4296, "token_acc": 0.29308770439583776 }, { "epoch": 2.518909410729991, "grad_norm": 0.2898712553405986, "learning_rate": 0.00013780439676810684, "loss": 3.0600483417510986, "step": 4297, "token_acc": 0.2937109165624442 }, { "epoch": 2.5194957490472003, "grad_norm": 0.357939894791837, "learning_rate": 0.0001378027105733154, "loss": 3.102938175201416, "step": 4298, "token_acc": 0.28729378278539025 }, { "epoch": 2.5200820873644094, "grad_norm": 0.34509748601686585, "learning_rate": 0.0001378010237416089, "loss": 3.080904722213745, "step": 4299, "token_acc": 0.28989886547981897 }, { "epoch": 2.5206684256816185, "grad_norm": 0.31208613715378564, "learning_rate": 0.00013779933627300312, "loss": 3.067377805709839, "step": 4300, "token_acc": 0.290322498878364 }, { "epoch": 2.521254763998827, "grad_norm": 0.3994709338086788, "learning_rate": 0.00013779764816751393, "loss": 3.050339698791504, "step": 4301, "token_acc": 0.2936385796307557 }, { "epoch": 2.5218411023160363, "grad_norm": 0.2721892782996403, "learning_rate": 0.0001377959594251572, "loss": 3.055325508117676, "step": 4302, "token_acc": 0.29275273794710127 }, { "epoch": 2.5224274406332454, "grad_norm": 0.32841498187703594, "learning_rate": 0.0001377942700459488, "loss": 3.057478427886963, "step": 4303, "token_acc": 0.2921735958404406 }, { "epoch": 2.5230137789504545, "grad_norm": 0.3451368924238096, "learning_rate": 0.00013779258002990456, "loss": 3.0445172786712646, "step": 4304, "token_acc": 0.29383035599467006 }, { "epoch": 2.523600117267663, "grad_norm": 0.32853011091045264, "learning_rate": 0.0001377908893770404, "loss": 3.054691791534424, "step": 4305, "token_acc": 0.29331485205548496 }, { "epoch": 2.5241864555848723, "grad_norm": 0.3054309590948566, "learning_rate": 0.00013778919808737217, "loss": 3.0670371055603027, "step": 4306, "token_acc": 0.29250297038105744 }, { "epoch": 2.5247727939020814, "grad_norm": 0.3418705175986472, "learning_rate": 0.00013778750616091578, "loss": 3.0597779750823975, "step": 4307, "token_acc": 0.2910893950140542 }, { "epoch": 2.5253591322192905, "grad_norm": 0.2898373075052804, "learning_rate": 0.00013778581359768713, "loss": 3.0802512168884277, "step": 4308, "token_acc": 0.2887073114304818 }, { "epoch": 2.5259454705364996, "grad_norm": 0.40397109090333105, "learning_rate": 0.0001377841203977021, "loss": 3.0814902782440186, "step": 4309, "token_acc": 0.28875920544149664 }, { "epoch": 2.5265318088537088, "grad_norm": 0.411873298303554, "learning_rate": 0.00013778242656097657, "loss": 3.0762405395507812, "step": 4310, "token_acc": 0.2892339643486944 }, { "epoch": 2.527118147170918, "grad_norm": 0.35473920263633446, "learning_rate": 0.0001377807320875265, "loss": 3.0415313243865967, "step": 4311, "token_acc": 0.29668444817921213 }, { "epoch": 2.5277044854881265, "grad_norm": 0.3793501293213284, "learning_rate": 0.0001377790369773678, "loss": 3.0777368545532227, "step": 4312, "token_acc": 0.2898947193898698 }, { "epoch": 2.5282908238053357, "grad_norm": 0.3204294208503375, "learning_rate": 0.00013777734123051634, "loss": 3.054312229156494, "step": 4313, "token_acc": 0.293183333874517 }, { "epoch": 2.5288771621225448, "grad_norm": 0.34049830855116353, "learning_rate": 0.00013777564484698815, "loss": 3.134559154510498, "step": 4314, "token_acc": 0.28249032152792214 }, { "epoch": 2.529463500439754, "grad_norm": 0.31651240769526845, "learning_rate": 0.00013777394782679906, "loss": 3.0874271392822266, "step": 4315, "token_acc": 0.28961005643919957 }, { "epoch": 2.5300498387569625, "grad_norm": 0.366071591727774, "learning_rate": 0.00013777225016996507, "loss": 3.068683624267578, "step": 4316, "token_acc": 0.29148187716601087 }, { "epoch": 2.5306361770741717, "grad_norm": 0.3357283503405669, "learning_rate": 0.00013777055187650213, "loss": 3.119310140609741, "step": 4317, "token_acc": 0.2856781330306944 }, { "epoch": 2.5312225153913808, "grad_norm": 0.31886789637334434, "learning_rate": 0.00013776885294642616, "loss": 3.0851244926452637, "step": 4318, "token_acc": 0.2885755906077981 }, { "epoch": 2.53180885370859, "grad_norm": 0.32345173941705485, "learning_rate": 0.00013776715337975314, "loss": 3.085324764251709, "step": 4319, "token_acc": 0.28887107971251225 }, { "epoch": 2.532395192025799, "grad_norm": 0.41026587487479954, "learning_rate": 0.00013776545317649902, "loss": 3.040525197982788, "step": 4320, "token_acc": 0.29548629808945015 }, { "epoch": 2.532981530343008, "grad_norm": 0.29701460776718747, "learning_rate": 0.0001377637523366798, "loss": 3.042804718017578, "step": 4321, "token_acc": 0.2950354609929078 }, { "epoch": 2.533567868660217, "grad_norm": 0.3341609948907965, "learning_rate": 0.00013776205086031142, "loss": 3.0456385612487793, "step": 4322, "token_acc": 0.29432571651446476 }, { "epoch": 2.534154206977426, "grad_norm": 0.3270859982234725, "learning_rate": 0.0001377603487474099, "loss": 3.0311717987060547, "step": 4323, "token_acc": 0.2976740437634372 }, { "epoch": 2.534740545294635, "grad_norm": 0.34589640576174213, "learning_rate": 0.00013775864599799122, "loss": 3.078972816467285, "step": 4324, "token_acc": 0.2886036429434313 }, { "epoch": 2.535326883611844, "grad_norm": 0.3798025012938151, "learning_rate": 0.00013775694261207132, "loss": 3.035907030105591, "step": 4325, "token_acc": 0.2963933913587325 }, { "epoch": 2.535913221929053, "grad_norm": 0.3411297394985252, "learning_rate": 0.0001377552385896663, "loss": 3.0305399894714355, "step": 4326, "token_acc": 0.295746139780577 }, { "epoch": 2.536499560246262, "grad_norm": 0.36270059116668296, "learning_rate": 0.00013775353393079208, "loss": 3.0688321590423584, "step": 4327, "token_acc": 0.29262879857055923 }, { "epoch": 2.537085898563471, "grad_norm": 0.3178199827036867, "learning_rate": 0.00013775182863546472, "loss": 3.0445261001586914, "step": 4328, "token_acc": 0.29479870778572076 }, { "epoch": 2.53767223688068, "grad_norm": 0.3572778037203079, "learning_rate": 0.00013775012270370024, "loss": 3.067387819290161, "step": 4329, "token_acc": 0.2911311900756086 }, { "epoch": 2.538258575197889, "grad_norm": 0.3756105965781068, "learning_rate": 0.00013774841613551462, "loss": 3.101994514465332, "step": 4330, "token_acc": 0.28699396668822347 }, { "epoch": 2.5388449135150983, "grad_norm": 0.31387746581542386, "learning_rate": 0.00013774670893092395, "loss": 3.0922460556030273, "step": 4331, "token_acc": 0.2873343540194689 }, { "epoch": 2.5394312518323074, "grad_norm": 0.34828542273918517, "learning_rate": 0.00013774500108994426, "loss": 3.057373523712158, "step": 4332, "token_acc": 0.2925273236743204 }, { "epoch": 2.540017590149516, "grad_norm": 0.34236386419673176, "learning_rate": 0.00013774329261259153, "loss": 3.0289864540100098, "step": 4333, "token_acc": 0.2971059136912446 }, { "epoch": 2.5406039284667252, "grad_norm": 0.306711554906487, "learning_rate": 0.00013774158349888187, "loss": 3.0758490562438965, "step": 4334, "token_acc": 0.289871342810008 }, { "epoch": 2.5411902667839343, "grad_norm": 0.2958320791178048, "learning_rate": 0.00013773987374883132, "loss": 3.0591182708740234, "step": 4335, "token_acc": 0.29313643748931073 }, { "epoch": 2.5417766051011434, "grad_norm": 0.30292561541597957, "learning_rate": 0.0001377381633624559, "loss": 3.087907314300537, "step": 4336, "token_acc": 0.28757834066277427 }, { "epoch": 2.542362943418352, "grad_norm": 0.3095580043840721, "learning_rate": 0.00013773645233977177, "loss": 3.035019636154175, "step": 4337, "token_acc": 0.2948293623271184 }, { "epoch": 2.5429492817355612, "grad_norm": 0.34191527654712034, "learning_rate": 0.00013773474068079492, "loss": 3.115413188934326, "step": 4338, "token_acc": 0.28637658596513527 }, { "epoch": 2.5435356200527703, "grad_norm": 0.4868577647090121, "learning_rate": 0.00013773302838554144, "loss": 3.0621137619018555, "step": 4339, "token_acc": 0.2913441723760179 }, { "epoch": 2.5441219583699795, "grad_norm": 0.45508902422407604, "learning_rate": 0.00013773131545402744, "loss": 3.048271656036377, "step": 4340, "token_acc": 0.2947804487010453 }, { "epoch": 2.5447082966871886, "grad_norm": 0.34032648949072936, "learning_rate": 0.000137729601886269, "loss": 3.0537521839141846, "step": 4341, "token_acc": 0.294519686030233 }, { "epoch": 2.5452946350043977, "grad_norm": 0.4238679292428818, "learning_rate": 0.00013772788768228223, "loss": 3.075558662414551, "step": 4342, "token_acc": 0.2908509224505294 }, { "epoch": 2.545880973321607, "grad_norm": 0.3412280308881297, "learning_rate": 0.00013772617284208322, "loss": 3.08547306060791, "step": 4343, "token_acc": 0.28871251388374675 }, { "epoch": 2.5464673116388155, "grad_norm": 0.3445728552098471, "learning_rate": 0.00013772445736568806, "loss": 3.0621161460876465, "step": 4344, "token_acc": 0.292539300267252 }, { "epoch": 2.5470536499560246, "grad_norm": 0.39660816859009007, "learning_rate": 0.0001377227412531129, "loss": 3.058565616607666, "step": 4345, "token_acc": 0.2931757312086949 }, { "epoch": 2.5476399882732337, "grad_norm": 0.31858294672020915, "learning_rate": 0.00013772102450437384, "loss": 3.098022699356079, "step": 4346, "token_acc": 0.2871662657022624 }, { "epoch": 2.548226326590443, "grad_norm": 0.3704959565672287, "learning_rate": 0.000137719307119487, "loss": 3.129561424255371, "step": 4347, "token_acc": 0.2836818821636839 }, { "epoch": 2.5488126649076515, "grad_norm": 0.30503791512508854, "learning_rate": 0.00013771758909846853, "loss": 3.0888030529022217, "step": 4348, "token_acc": 0.28812711252648393 }, { "epoch": 2.5493990032248606, "grad_norm": 0.3249582948008529, "learning_rate": 0.00013771587044133458, "loss": 3.0452113151550293, "step": 4349, "token_acc": 0.29335038696962973 }, { "epoch": 2.5499853415420697, "grad_norm": 0.3627027210169185, "learning_rate": 0.00013771415114810124, "loss": 3.0877909660339355, "step": 4350, "token_acc": 0.2896060295726454 }, { "epoch": 2.550571679859279, "grad_norm": 0.2863728945010151, "learning_rate": 0.0001377124312187847, "loss": 3.0836799144744873, "step": 4351, "token_acc": 0.28822915703701746 }, { "epoch": 2.551158018176488, "grad_norm": 0.32658920826979826, "learning_rate": 0.00013771071065340112, "loss": 3.0722928047180176, "step": 4352, "token_acc": 0.2915955693473332 }, { "epoch": 2.551744356493697, "grad_norm": 0.30630292575854784, "learning_rate": 0.00013770898945196665, "loss": 3.0500426292419434, "step": 4353, "token_acc": 0.2935960199795438 }, { "epoch": 2.552330694810906, "grad_norm": 0.33534661856836845, "learning_rate": 0.00013770726761449747, "loss": 3.0550284385681152, "step": 4354, "token_acc": 0.2929844358919804 }, { "epoch": 2.552917033128115, "grad_norm": 0.3354512800164192, "learning_rate": 0.00013770554514100974, "loss": 3.109459400177002, "step": 4355, "token_acc": 0.2850557203559689 }, { "epoch": 2.553503371445324, "grad_norm": 0.28807130552191323, "learning_rate": 0.00013770382203151968, "loss": 3.05765438079834, "step": 4356, "token_acc": 0.29253885431328774 }, { "epoch": 2.554089709762533, "grad_norm": 0.32104397198699836, "learning_rate": 0.0001377020982860434, "loss": 3.0617847442626953, "step": 4357, "token_acc": 0.2930197893376279 }, { "epoch": 2.554676048079742, "grad_norm": 0.33569226700335814, "learning_rate": 0.00013770037390459718, "loss": 3.030935287475586, "step": 4358, "token_acc": 0.2970840059775139 }, { "epoch": 2.555262386396951, "grad_norm": 0.26900104899872107, "learning_rate": 0.00013769864888719716, "loss": 3.08833646774292, "step": 4359, "token_acc": 0.2887131646276103 }, { "epoch": 2.55584872471416, "grad_norm": 0.3346223539889032, "learning_rate": 0.00013769692323385954, "loss": 3.0506608486175537, "step": 4360, "token_acc": 0.29234854762995344 }, { "epoch": 2.556435063031369, "grad_norm": 0.33583176041228646, "learning_rate": 0.00013769519694460056, "loss": 3.034132957458496, "step": 4361, "token_acc": 0.29706054216102235 }, { "epoch": 2.557021401348578, "grad_norm": 0.29859857114913757, "learning_rate": 0.00013769347001943641, "loss": 3.113027811050415, "step": 4362, "token_acc": 0.2839442084840858 }, { "epoch": 2.5576077396657872, "grad_norm": 0.3481407533014389, "learning_rate": 0.00013769174245838333, "loss": 3.1086015701293945, "step": 4363, "token_acc": 0.2844130172333736 }, { "epoch": 2.5581940779829964, "grad_norm": 0.32083260961973054, "learning_rate": 0.00013769001426145757, "loss": 3.0893335342407227, "step": 4364, "token_acc": 0.2881651092336576 }, { "epoch": 2.5587804163002055, "grad_norm": 0.3464046346646123, "learning_rate": 0.00013768828542867534, "loss": 3.066728115081787, "step": 4365, "token_acc": 0.29186022203096706 }, { "epoch": 2.559366754617414, "grad_norm": 0.3557937497058574, "learning_rate": 0.00013768655596005285, "loss": 3.0987534523010254, "step": 4366, "token_acc": 0.285965893837859 }, { "epoch": 2.5599530929346233, "grad_norm": 0.33181976637690075, "learning_rate": 0.0001376848258556064, "loss": 3.0534462928771973, "step": 4367, "token_acc": 0.2924701525889038 }, { "epoch": 2.5605394312518324, "grad_norm": 0.35568632908601167, "learning_rate": 0.00013768309511535222, "loss": 3.087346315383911, "step": 4368, "token_acc": 0.2880468422418785 }, { "epoch": 2.5611257695690415, "grad_norm": 0.30334467315122554, "learning_rate": 0.00013768136373930654, "loss": 3.0978758335113525, "step": 4369, "token_acc": 0.2873987975951904 }, { "epoch": 2.56171210788625, "grad_norm": 0.30311635466958564, "learning_rate": 0.00013767963172748565, "loss": 3.102729558944702, "step": 4370, "token_acc": 0.28758861664955937 }, { "epoch": 2.5622984462034593, "grad_norm": 0.3505588600232638, "learning_rate": 0.00013767789907990582, "loss": 3.0719733238220215, "step": 4371, "token_acc": 0.2896677532532818 }, { "epoch": 2.5628847845206684, "grad_norm": 0.31030622273303227, "learning_rate": 0.00013767616579658335, "loss": 3.068525552749634, "step": 4372, "token_acc": 0.2900499979339697 }, { "epoch": 2.5634711228378775, "grad_norm": 0.31996443758474635, "learning_rate": 0.0001376744318775345, "loss": 3.091991662979126, "step": 4373, "token_acc": 0.28940168478652173 }, { "epoch": 2.5640574611550866, "grad_norm": 0.33831918930364835, "learning_rate": 0.00013767269732277554, "loss": 3.077623128890991, "step": 4374, "token_acc": 0.29023333289954845 }, { "epoch": 2.5646437994722957, "grad_norm": 0.3533177035662115, "learning_rate": 0.00013767096213232276, "loss": 3.0500428676605225, "step": 4375, "token_acc": 0.29363014658919195 }, { "epoch": 2.565230137789505, "grad_norm": 0.3759404470112113, "learning_rate": 0.00013766922630619252, "loss": 3.113585948944092, "step": 4376, "token_acc": 0.28521281838868323 }, { "epoch": 2.5658164761067135, "grad_norm": 0.28073353947347796, "learning_rate": 0.00013766748984440105, "loss": 3.0696945190429688, "step": 4377, "token_acc": 0.2909410936399906 }, { "epoch": 2.5664028144239226, "grad_norm": 0.3388425200840157, "learning_rate": 0.0001376657527469647, "loss": 3.0615196228027344, "step": 4378, "token_acc": 0.29084006733150597 }, { "epoch": 2.5669891527411317, "grad_norm": 0.3225644700019978, "learning_rate": 0.0001376640150138998, "loss": 3.0399904251098633, "step": 4379, "token_acc": 0.2940290073460162 }, { "epoch": 2.567575491058341, "grad_norm": 0.2794387992121348, "learning_rate": 0.00013766227664522266, "loss": 3.0509774684906006, "step": 4380, "token_acc": 0.29323887303964896 }, { "epoch": 2.5681618293755495, "grad_norm": 0.29651086689929507, "learning_rate": 0.0001376605376409496, "loss": 3.104763984680176, "step": 4381, "token_acc": 0.28595544492969993 }, { "epoch": 2.5687481676927586, "grad_norm": 0.28481032604770207, "learning_rate": 0.00013765879800109695, "loss": 3.037874460220337, "step": 4382, "token_acc": 0.2940751099318874 }, { "epoch": 2.5693345060099677, "grad_norm": 0.3023541757431778, "learning_rate": 0.00013765705772568107, "loss": 3.062717914581299, "step": 4383, "token_acc": 0.292416659887314 }, { "epoch": 2.569920844327177, "grad_norm": 0.3484368017764343, "learning_rate": 0.0001376553168147183, "loss": 3.0805723667144775, "step": 4384, "token_acc": 0.28964922606693305 }, { "epoch": 2.570507182644386, "grad_norm": 0.2978548722386638, "learning_rate": 0.000137653575268225, "loss": 3.062324047088623, "step": 4385, "token_acc": 0.29283459244429827 }, { "epoch": 2.571093520961595, "grad_norm": 0.3597604048050465, "learning_rate": 0.00013765183308621752, "loss": 3.0830540657043457, "step": 4386, "token_acc": 0.2922269150288201 }, { "epoch": 2.5716798592788037, "grad_norm": 0.3932886884573602, "learning_rate": 0.00013765009026871225, "loss": 3.0906801223754883, "step": 4387, "token_acc": 0.28683908419433796 }, { "epoch": 2.572266197596013, "grad_norm": 0.44127435472880217, "learning_rate": 0.00013764834681572553, "loss": 3.078768253326416, "step": 4388, "token_acc": 0.2906192997864125 }, { "epoch": 2.572852535913222, "grad_norm": 0.3287074289209757, "learning_rate": 0.00013764660272727375, "loss": 3.0593152046203613, "step": 4389, "token_acc": 0.29274742398365383 }, { "epoch": 2.573438874230431, "grad_norm": 0.34523479112255184, "learning_rate": 0.00013764485800337326, "loss": 3.093247175216675, "step": 4390, "token_acc": 0.28899363064011147 }, { "epoch": 2.5740252125476397, "grad_norm": 0.35931216919659087, "learning_rate": 0.0001376431126440405, "loss": 3.0674726963043213, "step": 4391, "token_acc": 0.2914914151279091 }, { "epoch": 2.574611550864849, "grad_norm": 0.32560710316110114, "learning_rate": 0.00013764136664929185, "loss": 3.068941593170166, "step": 4392, "token_acc": 0.2888528913564522 }, { "epoch": 2.575197889182058, "grad_norm": 0.31078626938133513, "learning_rate": 0.00013763962001914372, "loss": 3.033418893814087, "step": 4393, "token_acc": 0.2967011077365646 }, { "epoch": 2.575784227499267, "grad_norm": 0.29736251295161276, "learning_rate": 0.00013763787275361251, "loss": 3.080585479736328, "step": 4394, "token_acc": 0.291277608484794 }, { "epoch": 2.576370565816476, "grad_norm": 0.36419033723368166, "learning_rate": 0.00013763612485271463, "loss": 3.06382417678833, "step": 4395, "token_acc": 0.2932593747184923 }, { "epoch": 2.5769569041336853, "grad_norm": 0.33271998331515396, "learning_rate": 0.0001376343763164665, "loss": 3.0995993614196777, "step": 4396, "token_acc": 0.2853398844997897 }, { "epoch": 2.5775432424508944, "grad_norm": 0.3288883294718207, "learning_rate": 0.0001376326271448845, "loss": 3.1448240280151367, "step": 4397, "token_acc": 0.2794351218838478 }, { "epoch": 2.578129580768103, "grad_norm": 0.39622551231249903, "learning_rate": 0.00013763087733798513, "loss": 3.0747480392456055, "step": 4398, "token_acc": 0.29000966898704983 }, { "epoch": 2.578715919085312, "grad_norm": 0.32933547860736917, "learning_rate": 0.0001376291268957848, "loss": 3.082432985305786, "step": 4399, "token_acc": 0.2898494915254237 }, { "epoch": 2.5793022574025213, "grad_norm": 0.31636511042155024, "learning_rate": 0.00013762737581829998, "loss": 3.08817720413208, "step": 4400, "token_acc": 0.2888427965578293 }, { "epoch": 2.5798885957197304, "grad_norm": 0.3799530777196542, "learning_rate": 0.00013762562410554705, "loss": 3.0421314239501953, "step": 4401, "token_acc": 0.2951059971233819 }, { "epoch": 2.580474934036939, "grad_norm": 0.3271510053089545, "learning_rate": 0.00013762387175754253, "loss": 3.0729103088378906, "step": 4402, "token_acc": 0.29212287893726324 }, { "epoch": 2.581061272354148, "grad_norm": 0.30043137893886745, "learning_rate": 0.00013762211877430284, "loss": 3.0770773887634277, "step": 4403, "token_acc": 0.2898860148939099 }, { "epoch": 2.5816476106713573, "grad_norm": 0.30752389383480555, "learning_rate": 0.00013762036515584453, "loss": 3.0742204189300537, "step": 4404, "token_acc": 0.29042238432892925 }, { "epoch": 2.5822339489885664, "grad_norm": 0.3423163134917392, "learning_rate": 0.00013761861090218395, "loss": 3.0139851570129395, "step": 4405, "token_acc": 0.2978366013407122 }, { "epoch": 2.5828202873057755, "grad_norm": 0.3306011378717173, "learning_rate": 0.00013761685601333765, "loss": 3.0308685302734375, "step": 4406, "token_acc": 0.29587622558392096 }, { "epoch": 2.5834066256229846, "grad_norm": 0.3382412141939098, "learning_rate": 0.0001376151004893221, "loss": 3.030334949493408, "step": 4407, "token_acc": 0.29731246276912604 }, { "epoch": 2.5839929639401937, "grad_norm": 0.34161288647340077, "learning_rate": 0.0001376133443301538, "loss": 3.067458152770996, "step": 4408, "token_acc": 0.29262269107175126 }, { "epoch": 2.5845793022574024, "grad_norm": 0.34660139986282135, "learning_rate": 0.00013761158753584923, "loss": 3.044356107711792, "step": 4409, "token_acc": 0.2941715928980975 }, { "epoch": 2.5851656405746115, "grad_norm": 0.44300900563454804, "learning_rate": 0.0001376098301064249, "loss": 3.1110329627990723, "step": 4410, "token_acc": 0.2852833491892515 }, { "epoch": 2.5857519788918206, "grad_norm": 0.4185773792082414, "learning_rate": 0.00013760807204189735, "loss": 3.0119833946228027, "step": 4411, "token_acc": 0.2983083851206553 }, { "epoch": 2.5863383172090297, "grad_norm": 0.3245734483685519, "learning_rate": 0.00013760631334228305, "loss": 3.1030757427215576, "step": 4412, "token_acc": 0.2866843719828082 }, { "epoch": 2.5869246555262384, "grad_norm": 0.3918930447489074, "learning_rate": 0.00013760455400759855, "loss": 3.040175437927246, "step": 4413, "token_acc": 0.2951974680776859 }, { "epoch": 2.5875109938434475, "grad_norm": 0.3280369382564077, "learning_rate": 0.00013760279403786033, "loss": 3.0834498405456543, "step": 4414, "token_acc": 0.28941022860091187 }, { "epoch": 2.5880973321606566, "grad_norm": 0.3721016978335643, "learning_rate": 0.00013760103343308497, "loss": 3.050204277038574, "step": 4415, "token_acc": 0.2933115954617739 }, { "epoch": 2.5886836704778657, "grad_norm": 0.36765475093283806, "learning_rate": 0.000137599272193289, "loss": 3.0471296310424805, "step": 4416, "token_acc": 0.29502397064944275 }, { "epoch": 2.589270008795075, "grad_norm": 0.3164681351459282, "learning_rate": 0.00013759751031848898, "loss": 3.0877580642700195, "step": 4417, "token_acc": 0.28837652965171007 }, { "epoch": 2.589856347112284, "grad_norm": 0.38515495908202785, "learning_rate": 0.0001375957478087014, "loss": 3.120756149291992, "step": 4418, "token_acc": 0.2851830119052276 }, { "epoch": 2.590442685429493, "grad_norm": 0.32897531029187227, "learning_rate": 0.00013759398466394287, "loss": 3.0457823276519775, "step": 4419, "token_acc": 0.2943217151097315 }, { "epoch": 2.5910290237467017, "grad_norm": 0.3476061476502033, "learning_rate": 0.00013759222088422993, "loss": 3.0698466300964355, "step": 4420, "token_acc": 0.29171058117343013 }, { "epoch": 2.591615362063911, "grad_norm": 0.32636911759625703, "learning_rate": 0.0001375904564695792, "loss": 3.059525966644287, "step": 4421, "token_acc": 0.2919048731728847 }, { "epoch": 2.59220170038112, "grad_norm": 0.2797280582960858, "learning_rate": 0.00013758869142000714, "loss": 3.068411350250244, "step": 4422, "token_acc": 0.29225366165143474 }, { "epoch": 2.592788038698329, "grad_norm": 0.3190398761294155, "learning_rate": 0.00013758692573553048, "loss": 3.0230913162231445, "step": 4423, "token_acc": 0.2979669297058839 }, { "epoch": 2.5933743770155377, "grad_norm": 0.3234341558786688, "learning_rate": 0.00013758515941616567, "loss": 3.0942068099975586, "step": 4424, "token_acc": 0.2883273609039131 }, { "epoch": 2.593960715332747, "grad_norm": 0.33706690408519574, "learning_rate": 0.00013758339246192937, "loss": 3.071619987487793, "step": 4425, "token_acc": 0.29056958058674753 }, { "epoch": 2.594547053649956, "grad_norm": 0.3033290298802906, "learning_rate": 0.00013758162487283816, "loss": 3.081899404525757, "step": 4426, "token_acc": 0.2907523591975111 }, { "epoch": 2.595133391967165, "grad_norm": 0.3345095515346861, "learning_rate": 0.00013757985664890866, "loss": 3.038045644760132, "step": 4427, "token_acc": 0.2950555972013993 }, { "epoch": 2.595719730284374, "grad_norm": 0.32593143230337646, "learning_rate": 0.00013757808779015748, "loss": 3.0481486320495605, "step": 4428, "token_acc": 0.29386033713499543 }, { "epoch": 2.5963060686015833, "grad_norm": 0.30887006936395417, "learning_rate": 0.00013757631829660125, "loss": 3.048729181289673, "step": 4429, "token_acc": 0.2955832829970524 }, { "epoch": 2.5968924069187924, "grad_norm": 0.4095987781046983, "learning_rate": 0.00013757454816825654, "loss": 3.075457811355591, "step": 4430, "token_acc": 0.2919607656315585 }, { "epoch": 2.597478745236001, "grad_norm": 0.28132534412322546, "learning_rate": 0.00013757277740514, "loss": 3.0803680419921875, "step": 4431, "token_acc": 0.289418904138588 }, { "epoch": 2.59806508355321, "grad_norm": 0.37715632161876317, "learning_rate": 0.00013757100600726827, "loss": 3.0984158515930176, "step": 4432, "token_acc": 0.284995511820597 }, { "epoch": 2.5986514218704193, "grad_norm": 0.364524089161537, "learning_rate": 0.00013756923397465802, "loss": 3.0697059631347656, "step": 4433, "token_acc": 0.2905751769119277 }, { "epoch": 2.5992377601876284, "grad_norm": 0.3695863138738358, "learning_rate": 0.00013756746130732587, "loss": 3.063328981399536, "step": 4434, "token_acc": 0.29063575697287175 }, { "epoch": 2.599824098504837, "grad_norm": 0.3057216796050019, "learning_rate": 0.00013756568800528843, "loss": 3.055208921432495, "step": 4435, "token_acc": 0.29375575458274045 }, { "epoch": 2.600410436822046, "grad_norm": 0.39354702922972107, "learning_rate": 0.00013756391406856243, "loss": 3.0608880519866943, "step": 4436, "token_acc": 0.29189058711809734 }, { "epoch": 2.6009967751392553, "grad_norm": 0.3220702758052491, "learning_rate": 0.00013756213949716448, "loss": 3.0279791355133057, "step": 4437, "token_acc": 0.2958560226815017 }, { "epoch": 2.6015831134564644, "grad_norm": 0.3077477488023142, "learning_rate": 0.0001375603642911113, "loss": 3.069695472717285, "step": 4438, "token_acc": 0.2913389516765209 }, { "epoch": 2.6021694517736735, "grad_norm": 0.32640741989334876, "learning_rate": 0.0001375585884504195, "loss": 3.0521297454833984, "step": 4439, "token_acc": 0.2927884900088313 }, { "epoch": 2.6027557900908826, "grad_norm": 0.2758617142478373, "learning_rate": 0.00013755681197510583, "loss": 3.0593180656433105, "step": 4440, "token_acc": 0.29155545291966295 }, { "epoch": 2.6033421284080913, "grad_norm": 0.3163299292395558, "learning_rate": 0.00013755503486518692, "loss": 3.076005458831787, "step": 4441, "token_acc": 0.29140460661224815 }, { "epoch": 2.6039284667253004, "grad_norm": 0.3315894656420227, "learning_rate": 0.00013755325712067951, "loss": 3.105449914932251, "step": 4442, "token_acc": 0.2865914345649145 }, { "epoch": 2.6045148050425095, "grad_norm": 0.28247757609454416, "learning_rate": 0.00013755147874160026, "loss": 3.0808606147766113, "step": 4443, "token_acc": 0.28992357850067063 }, { "epoch": 2.6051011433597187, "grad_norm": 0.33773511037182347, "learning_rate": 0.0001375496997279659, "loss": 3.0509166717529297, "step": 4444, "token_acc": 0.29406827834741944 }, { "epoch": 2.6056874816769273, "grad_norm": 0.30187671189376414, "learning_rate": 0.00013754792007979313, "loss": 3.1000943183898926, "step": 4445, "token_acc": 0.28549702458230225 }, { "epoch": 2.6062738199941364, "grad_norm": 0.30011680398563206, "learning_rate": 0.00013754613979709868, "loss": 3.0710039138793945, "step": 4446, "token_acc": 0.28889464549387356 }, { "epoch": 2.6068601583113455, "grad_norm": 0.307711419659515, "learning_rate": 0.00013754435887989926, "loss": 3.0676560401916504, "step": 4447, "token_acc": 0.29186240537301117 }, { "epoch": 2.6074464966285547, "grad_norm": 0.2868450376223833, "learning_rate": 0.00013754257732821162, "loss": 3.095599889755249, "step": 4448, "token_acc": 0.2883357052561279 }, { "epoch": 2.6080328349457638, "grad_norm": 0.32378027935979525, "learning_rate": 0.00013754079514205248, "loss": 3.037041187286377, "step": 4449, "token_acc": 0.29625375439467905 }, { "epoch": 2.608619173262973, "grad_norm": 0.33579876256775354, "learning_rate": 0.00013753901232143857, "loss": 3.047914981842041, "step": 4450, "token_acc": 0.29306351620072524 }, { "epoch": 2.609205511580182, "grad_norm": 0.324412759674356, "learning_rate": 0.00013753722886638664, "loss": 3.067213535308838, "step": 4451, "token_acc": 0.29269742359394124 }, { "epoch": 2.6097918498973907, "grad_norm": 0.3606715210221491, "learning_rate": 0.00013753544477691348, "loss": 3.0669684410095215, "step": 4452, "token_acc": 0.2927171373909994 }, { "epoch": 2.6103781882145998, "grad_norm": 0.33450755618091776, "learning_rate": 0.00013753366005303581, "loss": 3.0537023544311523, "step": 4453, "token_acc": 0.2930190654106549 }, { "epoch": 2.610964526531809, "grad_norm": 0.3109836402167432, "learning_rate": 0.0001375318746947704, "loss": 3.071597099304199, "step": 4454, "token_acc": 0.29061296115337587 }, { "epoch": 2.611550864849018, "grad_norm": 0.37296756072227977, "learning_rate": 0.00013753008870213404, "loss": 3.047229766845703, "step": 4455, "token_acc": 0.29453239254590785 }, { "epoch": 2.6121372031662267, "grad_norm": 0.33034906505470774, "learning_rate": 0.00013752830207514348, "loss": 3.0468077659606934, "step": 4456, "token_acc": 0.2935726126423417 }, { "epoch": 2.6127235414834358, "grad_norm": 0.39796619993887566, "learning_rate": 0.00013752651481381553, "loss": 3.0859241485595703, "step": 4457, "token_acc": 0.28787343161360446 }, { "epoch": 2.613309879800645, "grad_norm": 0.35352470214403897, "learning_rate": 0.00013752472691816694, "loss": 3.0704493522644043, "step": 4458, "token_acc": 0.2898268877820356 }, { "epoch": 2.613896218117854, "grad_norm": 0.35075359172371434, "learning_rate": 0.00013752293838821457, "loss": 3.0824742317199707, "step": 4459, "token_acc": 0.28955281945598116 }, { "epoch": 2.614482556435063, "grad_norm": 0.42655089327280327, "learning_rate": 0.00013752114922397515, "loss": 3.1209373474121094, "step": 4460, "token_acc": 0.2851773182565961 }, { "epoch": 2.615068894752272, "grad_norm": 0.32057052336688263, "learning_rate": 0.00013751935942546556, "loss": 3.0794143676757812, "step": 4461, "token_acc": 0.2891218949196332 }, { "epoch": 2.6156552330694813, "grad_norm": 0.3858109294160279, "learning_rate": 0.00013751756899270251, "loss": 3.050518035888672, "step": 4462, "token_acc": 0.29322694502464486 }, { "epoch": 2.61624157138669, "grad_norm": 0.272830021710739, "learning_rate": 0.00013751577792570294, "loss": 3.029107093811035, "step": 4463, "token_acc": 0.29660354163178926 }, { "epoch": 2.616827909703899, "grad_norm": 0.3473448606413895, "learning_rate": 0.00013751398622448359, "loss": 3.011425495147705, "step": 4464, "token_acc": 0.29936184826724943 }, { "epoch": 2.6174142480211082, "grad_norm": 0.31838458676233844, "learning_rate": 0.0001375121938890613, "loss": 3.0856447219848633, "step": 4465, "token_acc": 0.28835524913298705 }, { "epoch": 2.6180005863383173, "grad_norm": 0.3380809321529088, "learning_rate": 0.00013751040091945294, "loss": 3.077059745788574, "step": 4466, "token_acc": 0.2920450274522378 }, { "epoch": 2.618586924655526, "grad_norm": 0.3051797385265674, "learning_rate": 0.00013750860731567534, "loss": 3.0890941619873047, "step": 4467, "token_acc": 0.2890020382503846 }, { "epoch": 2.619173262972735, "grad_norm": 0.306081934016556, "learning_rate": 0.00013750681307774532, "loss": 3.0404391288757324, "step": 4468, "token_acc": 0.29479730043257013 }, { "epoch": 2.6197596012899442, "grad_norm": 0.316741423511763, "learning_rate": 0.00013750501820567978, "loss": 3.0462069511413574, "step": 4469, "token_acc": 0.2945326594129943 }, { "epoch": 2.6203459396071533, "grad_norm": 0.33421666301891484, "learning_rate": 0.00013750322269949556, "loss": 3.0583081245422363, "step": 4470, "token_acc": 0.29159750383459754 }, { "epoch": 2.6209322779243625, "grad_norm": 0.3160489514279233, "learning_rate": 0.00013750142655920953, "loss": 3.039483070373535, "step": 4471, "token_acc": 0.2962294702752718 }, { "epoch": 2.6215186162415716, "grad_norm": 0.2779608535115818, "learning_rate": 0.00013749962978483853, "loss": 3.030332565307617, "step": 4472, "token_acc": 0.29782767121197373 }, { "epoch": 2.6221049545587807, "grad_norm": 0.29113334568278476, "learning_rate": 0.00013749783237639948, "loss": 3.060206413269043, "step": 4473, "token_acc": 0.29365464274072367 }, { "epoch": 2.6226912928759893, "grad_norm": 0.3057461016192328, "learning_rate": 0.00013749603433390925, "loss": 3.1124448776245117, "step": 4474, "token_acc": 0.28595810307158465 }, { "epoch": 2.6232776311931985, "grad_norm": 0.26337725656864613, "learning_rate": 0.00013749423565738471, "loss": 3.0445902347564697, "step": 4475, "token_acc": 0.2937385512517298 }, { "epoch": 2.6238639695104076, "grad_norm": 0.29607821166623827, "learning_rate": 0.0001374924363468428, "loss": 3.070188045501709, "step": 4476, "token_acc": 0.2901865209510495 }, { "epoch": 2.6244503078276167, "grad_norm": 0.28914789226147614, "learning_rate": 0.0001374906364023004, "loss": 3.081263780593872, "step": 4477, "token_acc": 0.28879240822952995 }, { "epoch": 2.6250366461448253, "grad_norm": 0.2727191568924578, "learning_rate": 0.0001374888358237744, "loss": 3.0323848724365234, "step": 4478, "token_acc": 0.2962986572560699 }, { "epoch": 2.6256229844620345, "grad_norm": 0.28170033082298745, "learning_rate": 0.00013748703461128174, "loss": 3.0561113357543945, "step": 4479, "token_acc": 0.2924859420051294 }, { "epoch": 2.6262093227792436, "grad_norm": 0.2577371445828078, "learning_rate": 0.00013748523276483932, "loss": 3.0492372512817383, "step": 4480, "token_acc": 0.2936592405519978 }, { "epoch": 2.6267956610964527, "grad_norm": 0.2572849648200553, "learning_rate": 0.00013748343028446408, "loss": 3.0889220237731934, "step": 4481, "token_acc": 0.2887773722627737 }, { "epoch": 2.627381999413662, "grad_norm": 0.29117434856664287, "learning_rate": 0.00013748162717017293, "loss": 3.0544140338897705, "step": 4482, "token_acc": 0.2933192810191257 }, { "epoch": 2.627968337730871, "grad_norm": 0.30358482516992324, "learning_rate": 0.00013747982342198284, "loss": 3.0444793701171875, "step": 4483, "token_acc": 0.2931739951069783 }, { "epoch": 2.62855467604808, "grad_norm": 0.300915252790484, "learning_rate": 0.00013747801903991075, "loss": 3.068734645843506, "step": 4484, "token_acc": 0.2921289370896701 }, { "epoch": 2.6291410143652887, "grad_norm": 0.307208419764696, "learning_rate": 0.0001374762140239736, "loss": 3.096848487854004, "step": 4485, "token_acc": 0.28724592818851796 }, { "epoch": 2.629727352682498, "grad_norm": 0.3692342632576996, "learning_rate": 0.00013747440837418834, "loss": 3.0606870651245117, "step": 4486, "token_acc": 0.29231684567699157 }, { "epoch": 2.630313690999707, "grad_norm": 0.3472495600363577, "learning_rate": 0.00013747260209057193, "loss": 3.0597872734069824, "step": 4487, "token_acc": 0.2942744754076003 }, { "epoch": 2.630900029316916, "grad_norm": 0.2932074819906137, "learning_rate": 0.00013747079517314133, "loss": 3.0616931915283203, "step": 4488, "token_acc": 0.29272130158225546 }, { "epoch": 2.6314863676341247, "grad_norm": 0.4343772973672611, "learning_rate": 0.00013746898762191355, "loss": 3.0955214500427246, "step": 4489, "token_acc": 0.2870414856144803 }, { "epoch": 2.632072705951334, "grad_norm": 0.3402355524762752, "learning_rate": 0.00013746717943690555, "loss": 3.0270819664001465, "step": 4490, "token_acc": 0.29558606573718416 }, { "epoch": 2.632659044268543, "grad_norm": 0.3213517946158592, "learning_rate": 0.0001374653706181343, "loss": 3.103055953979492, "step": 4491, "token_acc": 0.2873525911500595 }, { "epoch": 2.633245382585752, "grad_norm": 0.3312927647702127, "learning_rate": 0.00013746356116561682, "loss": 3.144172430038452, "step": 4492, "token_acc": 0.28198694389205287 }, { "epoch": 2.633831720902961, "grad_norm": 0.34072900136920803, "learning_rate": 0.00013746175107937005, "loss": 3.0486061573028564, "step": 4493, "token_acc": 0.29520381059940604 }, { "epoch": 2.6344180592201702, "grad_norm": 0.29328336667025795, "learning_rate": 0.0001374599403594111, "loss": 3.0564751625061035, "step": 4494, "token_acc": 0.29197452744160435 }, { "epoch": 2.635004397537379, "grad_norm": 0.32864576559021946, "learning_rate": 0.00013745812900575687, "loss": 3.080198287963867, "step": 4495, "token_acc": 0.2895390402983495 }, { "epoch": 2.635590735854588, "grad_norm": 0.2732465883518336, "learning_rate": 0.00013745631701842442, "loss": 3.0655946731567383, "step": 4496, "token_acc": 0.2917062689653501 }, { "epoch": 2.636177074171797, "grad_norm": 0.34501286930516945, "learning_rate": 0.00013745450439743077, "loss": 3.0551295280456543, "step": 4497, "token_acc": 0.29316107781197687 }, { "epoch": 2.6367634124890063, "grad_norm": 0.2881453255632399, "learning_rate": 0.00013745269114279294, "loss": 3.056288242340088, "step": 4498, "token_acc": 0.29484974408747083 }, { "epoch": 2.637349750806215, "grad_norm": 0.30700088102205325, "learning_rate": 0.00013745087725452798, "loss": 3.066913604736328, "step": 4499, "token_acc": 0.2908676264492292 }, { "epoch": 2.637936089123424, "grad_norm": 0.2692430028490861, "learning_rate": 0.00013744906273265294, "loss": 3.0516200065612793, "step": 4500, "token_acc": 0.2940430129467871 }, { "epoch": 2.638522427440633, "grad_norm": 0.3371302974384048, "learning_rate": 0.00013744724757718484, "loss": 3.1064674854278564, "step": 4501, "token_acc": 0.28612330648853934 }, { "epoch": 2.6391087657578423, "grad_norm": 0.2705574252096661, "learning_rate": 0.0001374454317881407, "loss": 3.0408406257629395, "step": 4502, "token_acc": 0.2951778955544638 }, { "epoch": 2.6396951040750514, "grad_norm": 0.33431306198481364, "learning_rate": 0.00013744361536553764, "loss": 3.070269823074341, "step": 4503, "token_acc": 0.2926407530608605 }, { "epoch": 2.6402814423922605, "grad_norm": 0.2829075479525803, "learning_rate": 0.00013744179830939269, "loss": 3.0330653190612793, "step": 4504, "token_acc": 0.2955782740245192 }, { "epoch": 2.6408677807094696, "grad_norm": 0.2997351415327728, "learning_rate": 0.00013743998061972293, "loss": 3.0734310150146484, "step": 4505, "token_acc": 0.2900255995971211 }, { "epoch": 2.6414541190266783, "grad_norm": 0.27911715752868643, "learning_rate": 0.0001374381622965454, "loss": 3.0682992935180664, "step": 4506, "token_acc": 0.2897983102468502 }, { "epoch": 2.6420404573438874, "grad_norm": 0.2989643405773793, "learning_rate": 0.00013743634333987726, "loss": 3.0760579109191895, "step": 4507, "token_acc": 0.28815674371585664 }, { "epoch": 2.6426267956610965, "grad_norm": 0.27972314388874864, "learning_rate": 0.0001374345237497355, "loss": 3.0545825958251953, "step": 4508, "token_acc": 0.2933210485145344 }, { "epoch": 2.6432131339783056, "grad_norm": 0.319858181247578, "learning_rate": 0.00013743270352613727, "loss": 3.088914632797241, "step": 4509, "token_acc": 0.28934416389900364 }, { "epoch": 2.6437994722955143, "grad_norm": 0.3012741651006681, "learning_rate": 0.00013743088266909967, "loss": 3.0659196376800537, "step": 4510, "token_acc": 0.29071482844065205 }, { "epoch": 2.6443858106127234, "grad_norm": 0.33819822899657687, "learning_rate": 0.0001374290611786398, "loss": 3.09185791015625, "step": 4511, "token_acc": 0.2878797493254682 }, { "epoch": 2.6449721489299325, "grad_norm": 0.3544651689972899, "learning_rate": 0.00013742723905477472, "loss": 3.0865135192871094, "step": 4512, "token_acc": 0.2891197809369042 }, { "epoch": 2.6455584872471416, "grad_norm": 0.3543055835806596, "learning_rate": 0.00013742541629752162, "loss": 3.0942201614379883, "step": 4513, "token_acc": 0.2874524504238238 }, { "epoch": 2.6461448255643507, "grad_norm": 0.37593491510183574, "learning_rate": 0.00013742359290689759, "loss": 3.073152542114258, "step": 4514, "token_acc": 0.2879362786205141 }, { "epoch": 2.64673116388156, "grad_norm": 0.2948952297093295, "learning_rate": 0.00013742176888291975, "loss": 3.0756428241729736, "step": 4515, "token_acc": 0.2893849421583674 }, { "epoch": 2.647317502198769, "grad_norm": 0.37272138290929685, "learning_rate": 0.00013741994422560524, "loss": 3.086709499359131, "step": 4516, "token_acc": 0.2891275546309253 }, { "epoch": 2.6479038405159776, "grad_norm": 0.3702762574763534, "learning_rate": 0.0001374181189349712, "loss": 3.0641942024230957, "step": 4517, "token_acc": 0.2911838899201857 }, { "epoch": 2.6484901788331867, "grad_norm": 0.3714521513994398, "learning_rate": 0.0001374162930110348, "loss": 3.080141067504883, "step": 4518, "token_acc": 0.2902737611057273 }, { "epoch": 2.649076517150396, "grad_norm": 0.3661789248513293, "learning_rate": 0.00013741446645381317, "loss": 3.0809543132781982, "step": 4519, "token_acc": 0.28962052248926295 }, { "epoch": 2.649662855467605, "grad_norm": 0.315240988394481, "learning_rate": 0.00013741263926332346, "loss": 3.0562877655029297, "step": 4520, "token_acc": 0.29198376329773723 }, { "epoch": 2.6502491937848136, "grad_norm": 0.3712949001031072, "learning_rate": 0.00013741081143958284, "loss": 3.0590438842773438, "step": 4521, "token_acc": 0.2934216090427257 }, { "epoch": 2.6508355321020227, "grad_norm": 0.3658295166517285, "learning_rate": 0.0001374089829826085, "loss": 3.073647975921631, "step": 4522, "token_acc": 0.2888377357336229 }, { "epoch": 2.651421870419232, "grad_norm": 0.3197818920745739, "learning_rate": 0.0001374071538924176, "loss": 3.09137225151062, "step": 4523, "token_acc": 0.28804149775949295 }, { "epoch": 2.652008208736441, "grad_norm": 0.3004364801907915, "learning_rate": 0.0001374053241690273, "loss": 3.04290771484375, "step": 4524, "token_acc": 0.29553432606151925 }, { "epoch": 2.65259454705365, "grad_norm": 0.31207437644952923, "learning_rate": 0.00013740349381245485, "loss": 3.0680551528930664, "step": 4525, "token_acc": 0.29005338737218916 }, { "epoch": 2.653180885370859, "grad_norm": 0.3246646728601106, "learning_rate": 0.00013740166282271733, "loss": 3.035292863845825, "step": 4526, "token_acc": 0.2955817630218348 }, { "epoch": 2.6537672236880683, "grad_norm": 0.3527508068069239, "learning_rate": 0.00013739983119983207, "loss": 3.0933501720428467, "step": 4527, "token_acc": 0.28773853013932216 }, { "epoch": 2.654353562005277, "grad_norm": 0.3478749265821684, "learning_rate": 0.00013739799894381622, "loss": 3.1205596923828125, "step": 4528, "token_acc": 0.2843326681100424 }, { "epoch": 2.654939900322486, "grad_norm": 0.2979163614673156, "learning_rate": 0.00013739616605468698, "loss": 3.0574023723602295, "step": 4529, "token_acc": 0.29246849935464564 }, { "epoch": 2.655526238639695, "grad_norm": 0.3082733700046681, "learning_rate": 0.00013739433253246155, "loss": 3.064596176147461, "step": 4530, "token_acc": 0.2941308830809224 }, { "epoch": 2.6561125769569043, "grad_norm": 0.29164374333019905, "learning_rate": 0.0001373924983771572, "loss": 3.057131052017212, "step": 4531, "token_acc": 0.2916910708296414 }, { "epoch": 2.656698915274113, "grad_norm": 0.2884205576629347, "learning_rate": 0.00013739066358879113, "loss": 3.112107276916504, "step": 4532, "token_acc": 0.2869369888570502 }, { "epoch": 2.657285253591322, "grad_norm": 0.26717993918944666, "learning_rate": 0.0001373888281673806, "loss": 3.0781047344207764, "step": 4533, "token_acc": 0.2894382033970287 }, { "epoch": 2.657871591908531, "grad_norm": 0.29665961866738705, "learning_rate": 0.00013738699211294285, "loss": 3.070359230041504, "step": 4534, "token_acc": 0.29100672568253716 }, { "epoch": 2.6584579302257403, "grad_norm": 0.2755225384393486, "learning_rate": 0.0001373851554254951, "loss": 3.1055407524108887, "step": 4535, "token_acc": 0.2872393670266011 }, { "epoch": 2.6590442685429494, "grad_norm": 0.37738379992075227, "learning_rate": 0.00013738331810505462, "loss": 3.031566858291626, "step": 4536, "token_acc": 0.2966010752319606 }, { "epoch": 2.6596306068601585, "grad_norm": 0.37504910233581834, "learning_rate": 0.00013738148015163867, "loss": 3.0826618671417236, "step": 4537, "token_acc": 0.28885332598253166 }, { "epoch": 2.660216945177367, "grad_norm": 0.2771024387116616, "learning_rate": 0.0001373796415652645, "loss": 3.063094139099121, "step": 4538, "token_acc": 0.29270691661828296 }, { "epoch": 2.6608032834945763, "grad_norm": 0.35361566203100014, "learning_rate": 0.0001373778023459494, "loss": 3.1228065490722656, "step": 4539, "token_acc": 0.2840617129903761 }, { "epoch": 2.6613896218117854, "grad_norm": 0.3326145793451327, "learning_rate": 0.00013737596249371065, "loss": 3.110495090484619, "step": 4540, "token_acc": 0.2858684887294944 }, { "epoch": 2.6619759601289945, "grad_norm": 0.28741494314364296, "learning_rate": 0.0001373741220085655, "loss": 3.070624351501465, "step": 4541, "token_acc": 0.2913099903302967 }, { "epoch": 2.6625622984462036, "grad_norm": 0.3109902208545587, "learning_rate": 0.00013737228089053127, "loss": 3.058377742767334, "step": 4542, "token_acc": 0.2934938262682716 }, { "epoch": 2.6631486367634123, "grad_norm": 0.27994703002618154, "learning_rate": 0.00013737043913962524, "loss": 3.0249228477478027, "step": 4543, "token_acc": 0.2974201448703846 }, { "epoch": 2.6637349750806214, "grad_norm": 0.30004857084991077, "learning_rate": 0.0001373685967558647, "loss": 3.077030658721924, "step": 4544, "token_acc": 0.28988123986080666 }, { "epoch": 2.6643213133978305, "grad_norm": 0.2900047286148718, "learning_rate": 0.00013736675373926703, "loss": 3.0895135402679443, "step": 4545, "token_acc": 0.2880103228552805 }, { "epoch": 2.6649076517150396, "grad_norm": 0.3282769241765511, "learning_rate": 0.00013736491008984944, "loss": 3.0604395866394043, "step": 4546, "token_acc": 0.2948131349830582 }, { "epoch": 2.6654939900322487, "grad_norm": 0.28861671693521695, "learning_rate": 0.00013736306580762933, "loss": 3.0964484214782715, "step": 4547, "token_acc": 0.28817332454774797 }, { "epoch": 2.666080328349458, "grad_norm": 0.3044410405818131, "learning_rate": 0.00013736122089262395, "loss": 3.0955069065093994, "step": 4548, "token_acc": 0.2877980648137111 }, { "epoch": 2.6666666666666665, "grad_norm": 0.3083182021792271, "learning_rate": 0.0001373593753448507, "loss": 3.0277023315429688, "step": 4549, "token_acc": 0.29706689708586453 }, { "epoch": 2.6672530049838756, "grad_norm": 0.28363840179118355, "learning_rate": 0.00013735752916432687, "loss": 3.0810670852661133, "step": 4550, "token_acc": 0.288914767801457 }, { "epoch": 2.6678393433010847, "grad_norm": 0.3000481584331715, "learning_rate": 0.00013735568235106983, "loss": 3.072765827178955, "step": 4551, "token_acc": 0.2904116373292308 }, { "epoch": 2.668425681618294, "grad_norm": 0.30055561835411904, "learning_rate": 0.0001373538349050969, "loss": 3.0768237113952637, "step": 4552, "token_acc": 0.289230257885186 }, { "epoch": 2.6690120199355025, "grad_norm": 0.2954632151915868, "learning_rate": 0.00013735198682642547, "loss": 3.0627124309539795, "step": 4553, "token_acc": 0.2910717054159962 }, { "epoch": 2.6695983582527116, "grad_norm": 0.2968581250922359, "learning_rate": 0.00013735013811507288, "loss": 3.0866482257843018, "step": 4554, "token_acc": 0.2888657711960346 }, { "epoch": 2.6701846965699207, "grad_norm": 0.2858234387371888, "learning_rate": 0.0001373482887710565, "loss": 3.062004804611206, "step": 4555, "token_acc": 0.2901282701571896 }, { "epoch": 2.67077103488713, "grad_norm": 0.31495469774461377, "learning_rate": 0.0001373464387943937, "loss": 3.0563271045684814, "step": 4556, "token_acc": 0.2919139894840176 }, { "epoch": 2.671357373204339, "grad_norm": 0.28392186348033, "learning_rate": 0.00013734458818510185, "loss": 3.1017322540283203, "step": 4557, "token_acc": 0.28585819871240414 }, { "epoch": 2.671943711521548, "grad_norm": 0.3006992878182451, "learning_rate": 0.00013734273694319835, "loss": 3.052558183670044, "step": 4558, "token_acc": 0.2944821777664449 }, { "epoch": 2.672530049838757, "grad_norm": 0.28622296687827337, "learning_rate": 0.00013734088506870055, "loss": 3.093113899230957, "step": 4559, "token_acc": 0.2875392275935338 }, { "epoch": 2.673116388155966, "grad_norm": 0.2880224484646094, "learning_rate": 0.0001373390325616259, "loss": 3.05501651763916, "step": 4560, "token_acc": 0.2918912053869371 }, { "epoch": 2.673702726473175, "grad_norm": 0.38004753707376665, "learning_rate": 0.0001373371794219918, "loss": 3.091177225112915, "step": 4561, "token_acc": 0.28923965172044513 }, { "epoch": 2.674289064790384, "grad_norm": 0.44895660179823377, "learning_rate": 0.0001373353256498156, "loss": 3.10707426071167, "step": 4562, "token_acc": 0.2853962338695469 }, { "epoch": 2.674875403107593, "grad_norm": 0.34871977897799034, "learning_rate": 0.0001373334712451148, "loss": 3.1100454330444336, "step": 4563, "token_acc": 0.2850130411700892 }, { "epoch": 2.675461741424802, "grad_norm": 0.3378394758294805, "learning_rate": 0.00013733161620790673, "loss": 3.0971598625183105, "step": 4564, "token_acc": 0.28840658264053765 }, { "epoch": 2.676048079742011, "grad_norm": 0.3507666743998226, "learning_rate": 0.00013732976053820885, "loss": 3.1092798709869385, "step": 4565, "token_acc": 0.28592710168007845 }, { "epoch": 2.67663441805922, "grad_norm": 0.31230355473321864, "learning_rate": 0.00013732790423603863, "loss": 3.0767576694488525, "step": 4566, "token_acc": 0.2891803099908666 }, { "epoch": 2.677220756376429, "grad_norm": 0.3027973413143864, "learning_rate": 0.00013732604730141347, "loss": 3.068896770477295, "step": 4567, "token_acc": 0.29142429906542056 }, { "epoch": 2.6778070946936383, "grad_norm": 0.3346302994019165, "learning_rate": 0.0001373241897343508, "loss": 3.12660551071167, "step": 4568, "token_acc": 0.2820050167136875 }, { "epoch": 2.6783934330108474, "grad_norm": 0.2685555032115937, "learning_rate": 0.00013732233153486808, "loss": 3.0713300704956055, "step": 4569, "token_acc": 0.29165210753508974 }, { "epoch": 2.6789797713280565, "grad_norm": 0.291504687941692, "learning_rate": 0.00013732047270298282, "loss": 3.0642271041870117, "step": 4570, "token_acc": 0.2908073611502897 }, { "epoch": 2.679566109645265, "grad_norm": 0.32981816400438313, "learning_rate": 0.00013731861323871238, "loss": 3.0596375465393066, "step": 4571, "token_acc": 0.2913828373242508 }, { "epoch": 2.6801524479624743, "grad_norm": 0.3597449519855727, "learning_rate": 0.00013731675314207432, "loss": 3.0819168090820312, "step": 4572, "token_acc": 0.28928950048578717 }, { "epoch": 2.6807387862796834, "grad_norm": 0.3329706862045801, "learning_rate": 0.00013731489241308606, "loss": 3.0376362800598145, "step": 4573, "token_acc": 0.2963131666422649 }, { "epoch": 2.6813251245968925, "grad_norm": 0.3345787609661259, "learning_rate": 0.0001373130310517651, "loss": 3.025294780731201, "step": 4574, "token_acc": 0.29589207862903766 }, { "epoch": 2.681911462914101, "grad_norm": 0.2889130824878145, "learning_rate": 0.0001373111690581289, "loss": 3.023744583129883, "step": 4575, "token_acc": 0.2972737819025522 }, { "epoch": 2.6824978012313103, "grad_norm": 0.30946782320387745, "learning_rate": 0.00013730930643219498, "loss": 3.0749611854553223, "step": 4576, "token_acc": 0.29202005127218456 }, { "epoch": 2.6830841395485194, "grad_norm": 0.33347309829399296, "learning_rate": 0.00013730744317398086, "loss": 3.071925640106201, "step": 4577, "token_acc": 0.2905604532243669 }, { "epoch": 2.6836704778657285, "grad_norm": 0.2725724513314644, "learning_rate": 0.00013730557928350395, "loss": 3.040964365005493, "step": 4578, "token_acc": 0.29513947901790133 }, { "epoch": 2.6842568161829377, "grad_norm": 0.324208926488268, "learning_rate": 0.00013730371476078186, "loss": 3.0867202281951904, "step": 4579, "token_acc": 0.28794161422565734 }, { "epoch": 2.6848431545001468, "grad_norm": 0.3132618742465307, "learning_rate": 0.00013730184960583205, "loss": 3.095482349395752, "step": 4580, "token_acc": 0.2862830323543633 }, { "epoch": 2.685429492817356, "grad_norm": 0.2879602181704379, "learning_rate": 0.00013729998381867205, "loss": 3.0938520431518555, "step": 4581, "token_acc": 0.28762722736441315 }, { "epoch": 2.6860158311345645, "grad_norm": 0.40888468437613634, "learning_rate": 0.0001372981173993194, "loss": 3.143092632293701, "step": 4582, "token_acc": 0.28189501538429795 }, { "epoch": 2.6866021694517737, "grad_norm": 0.37559990334447524, "learning_rate": 0.00013729625034779162, "loss": 3.072305679321289, "step": 4583, "token_acc": 0.2902383439369481 }, { "epoch": 2.6871885077689828, "grad_norm": 0.3140639331353125, "learning_rate": 0.00013729438266410623, "loss": 3.0520448684692383, "step": 4584, "token_acc": 0.29372024192510615 }, { "epoch": 2.687774846086192, "grad_norm": 0.42503434050472433, "learning_rate": 0.00013729251434828083, "loss": 3.0619285106658936, "step": 4585, "token_acc": 0.2918940513762934 }, { "epoch": 2.6883611844034006, "grad_norm": 0.33292024848165375, "learning_rate": 0.0001372906454003329, "loss": 3.0980563163757324, "step": 4586, "token_acc": 0.287998634571997 }, { "epoch": 2.6889475227206097, "grad_norm": 0.3281519261921524, "learning_rate": 0.00013728877582028004, "loss": 3.085754871368408, "step": 4587, "token_acc": 0.2880705214006192 }, { "epoch": 2.6895338610378188, "grad_norm": 0.25373747842140015, "learning_rate": 0.00013728690560813983, "loss": 3.0732369422912598, "step": 4588, "token_acc": 0.2910603827632016 }, { "epoch": 2.690120199355028, "grad_norm": 0.35994000399555687, "learning_rate": 0.0001372850347639298, "loss": 3.037281036376953, "step": 4589, "token_acc": 0.29614052893129633 }, { "epoch": 2.690706537672237, "grad_norm": 0.27046230531460835, "learning_rate": 0.00013728316328766752, "loss": 3.0872697830200195, "step": 4590, "token_acc": 0.288361827264557 }, { "epoch": 2.691292875989446, "grad_norm": 0.360549886814419, "learning_rate": 0.0001372812911793706, "loss": 3.051240921020508, "step": 4591, "token_acc": 0.29471915668046483 }, { "epoch": 2.6918792143066548, "grad_norm": 0.3013384453048884, "learning_rate": 0.00013727941843905662, "loss": 3.0594334602355957, "step": 4592, "token_acc": 0.293474875282752 }, { "epoch": 2.692465552623864, "grad_norm": 0.31123662333392366, "learning_rate": 0.00013727754506674314, "loss": 3.083174705505371, "step": 4593, "token_acc": 0.2888921773054416 }, { "epoch": 2.693051890941073, "grad_norm": 0.3450814393652759, "learning_rate": 0.0001372756710624478, "loss": 3.011199951171875, "step": 4594, "token_acc": 0.29820594387086496 }, { "epoch": 2.693638229258282, "grad_norm": 0.2991844292590364, "learning_rate": 0.0001372737964261882, "loss": 3.0423927307128906, "step": 4595, "token_acc": 0.29420687478684904 }, { "epoch": 2.6942245675754912, "grad_norm": 0.3083194728818996, "learning_rate": 0.00013727192115798188, "loss": 3.1141276359558105, "step": 4596, "token_acc": 0.28490790104076413 }, { "epoch": 2.6948109058927, "grad_norm": 0.3340472489379645, "learning_rate": 0.00013727004525784656, "loss": 3.0891127586364746, "step": 4597, "token_acc": 0.28709002820591056 }, { "epoch": 2.695397244209909, "grad_norm": 0.282345460141468, "learning_rate": 0.00013726816872579979, "loss": 3.080362558364868, "step": 4598, "token_acc": 0.2896671327167133 }, { "epoch": 2.695983582527118, "grad_norm": 0.34572595262021083, "learning_rate": 0.00013726629156185922, "loss": 3.083634853363037, "step": 4599, "token_acc": 0.28858676569166763 }, { "epoch": 2.6965699208443272, "grad_norm": 0.33409714084629816, "learning_rate": 0.0001372644137660425, "loss": 3.03096342086792, "step": 4600, "token_acc": 0.29632946098863655 }, { "epoch": 2.6971562591615363, "grad_norm": 0.284284990655958, "learning_rate": 0.00013726253533836725, "loss": 3.0745086669921875, "step": 4601, "token_acc": 0.2888514532837172 }, { "epoch": 2.6977425974787455, "grad_norm": 0.3545023195050065, "learning_rate": 0.0001372606562788511, "loss": 3.093515634536743, "step": 4602, "token_acc": 0.28666612227399624 }, { "epoch": 2.698328935795954, "grad_norm": 0.359696574238944, "learning_rate": 0.00013725877658751174, "loss": 3.115234375, "step": 4603, "token_acc": 0.28445914959438484 }, { "epoch": 2.6989152741131632, "grad_norm": 0.31530924166126517, "learning_rate": 0.0001372568962643668, "loss": 3.046837568283081, "step": 4604, "token_acc": 0.29512913831414156 }, { "epoch": 2.6995016124303723, "grad_norm": 0.3018615579284375, "learning_rate": 0.00013725501530943395, "loss": 3.002796173095703, "step": 4605, "token_acc": 0.3021430059368816 }, { "epoch": 2.7000879507475815, "grad_norm": 0.3133603057599074, "learning_rate": 0.00013725313372273085, "loss": 3.0402657985687256, "step": 4606, "token_acc": 0.2940854750108718 }, { "epoch": 2.70067428906479, "grad_norm": 0.28798801816123304, "learning_rate": 0.00013725125150427521, "loss": 3.0450315475463867, "step": 4607, "token_acc": 0.29462965656779383 }, { "epoch": 2.7012606273819992, "grad_norm": 0.29052676188547427, "learning_rate": 0.00013724936865408465, "loss": 3.0572915077209473, "step": 4608, "token_acc": 0.2932958411950096 }, { "epoch": 2.7018469656992083, "grad_norm": 0.30348748587655283, "learning_rate": 0.00013724748517217688, "loss": 3.0782089233398438, "step": 4609, "token_acc": 0.2909379369689147 }, { "epoch": 2.7024333040164175, "grad_norm": 0.3239671233552501, "learning_rate": 0.00013724560105856965, "loss": 3.027193307876587, "step": 4610, "token_acc": 0.29525377449405715 }, { "epoch": 2.7030196423336266, "grad_norm": 0.3000059637709648, "learning_rate": 0.00013724371631328058, "loss": 3.054741382598877, "step": 4611, "token_acc": 0.2928138952996532 }, { "epoch": 2.7036059806508357, "grad_norm": 0.3216486896668394, "learning_rate": 0.00013724183093632742, "loss": 3.0811262130737305, "step": 4612, "token_acc": 0.2890756432818811 }, { "epoch": 2.704192318968045, "grad_norm": 0.4335428447454894, "learning_rate": 0.00013723994492772788, "loss": 3.0160369873046875, "step": 4613, "token_acc": 0.2986327512286939 }, { "epoch": 2.7047786572852535, "grad_norm": 0.4681177517672436, "learning_rate": 0.00013723805828749964, "loss": 3.0675861835479736, "step": 4614, "token_acc": 0.2904853276025277 }, { "epoch": 2.7053649956024626, "grad_norm": 0.34053942585608354, "learning_rate": 0.00013723617101566048, "loss": 3.069357395172119, "step": 4615, "token_acc": 0.29021048659301035 }, { "epoch": 2.7059513339196717, "grad_norm": 0.32679711959989066, "learning_rate": 0.00013723428311222805, "loss": 3.074840545654297, "step": 4616, "token_acc": 0.29085700569806516 }, { "epoch": 2.706537672236881, "grad_norm": 0.3631607233836758, "learning_rate": 0.00013723239457722014, "loss": 3.1168670654296875, "step": 4617, "token_acc": 0.28500542278293595 }, { "epoch": 2.7071240105540895, "grad_norm": 0.30944801798831184, "learning_rate": 0.0001372305054106545, "loss": 3.1093590259552, "step": 4618, "token_acc": 0.28613807687250736 }, { "epoch": 2.7077103488712986, "grad_norm": 0.3091127128134372, "learning_rate": 0.00013722861561254885, "loss": 3.1028690338134766, "step": 4619, "token_acc": 0.2852865263601132 }, { "epoch": 2.7082966871885077, "grad_norm": 0.3125408254625326, "learning_rate": 0.00013722672518292096, "loss": 3.0512280464172363, "step": 4620, "token_acc": 0.29162727370885466 }, { "epoch": 2.708883025505717, "grad_norm": 0.3366863659368551, "learning_rate": 0.00013722483412178857, "loss": 3.052410840988159, "step": 4621, "token_acc": 0.2929979092407393 }, { "epoch": 2.709469363822926, "grad_norm": 0.29294366976051567, "learning_rate": 0.0001372229424291694, "loss": 3.072221279144287, "step": 4622, "token_acc": 0.29019422943433787 }, { "epoch": 2.710055702140135, "grad_norm": 0.2981835171320265, "learning_rate": 0.00013722105010508133, "loss": 3.0546960830688477, "step": 4623, "token_acc": 0.2925256309907228 }, { "epoch": 2.710642040457344, "grad_norm": 0.33784872283514544, "learning_rate": 0.00013721915714954205, "loss": 3.0673251152038574, "step": 4624, "token_acc": 0.2911468892864242 }, { "epoch": 2.711228378774553, "grad_norm": 0.3250408321371394, "learning_rate": 0.0001372172635625694, "loss": 3.0613882541656494, "step": 4625, "token_acc": 0.2907878127602516 }, { "epoch": 2.711814717091762, "grad_norm": 0.3551579961697743, "learning_rate": 0.0001372153693441811, "loss": 3.105496644973755, "step": 4626, "token_acc": 0.2864916365637308 }, { "epoch": 2.712401055408971, "grad_norm": 0.3292667703478197, "learning_rate": 0.000137213474494395, "loss": 3.1070785522460938, "step": 4627, "token_acc": 0.28521062014822157 }, { "epoch": 2.71298739372618, "grad_norm": 0.3249840887173426, "learning_rate": 0.00013721157901322884, "loss": 3.0705509185791016, "step": 4628, "token_acc": 0.29172157206918964 }, { "epoch": 2.713573732043389, "grad_norm": 0.3534857655964296, "learning_rate": 0.00013720968290070051, "loss": 3.059196710586548, "step": 4629, "token_acc": 0.29233970105376694 }, { "epoch": 2.714160070360598, "grad_norm": 0.30552468839914665, "learning_rate": 0.00013720778615682774, "loss": 3.0478081703186035, "step": 4630, "token_acc": 0.2924005032021958 }, { "epoch": 2.714746408677807, "grad_norm": 0.2932979069073854, "learning_rate": 0.0001372058887816284, "loss": 3.060421943664551, "step": 4631, "token_acc": 0.29230218651767614 }, { "epoch": 2.715332746995016, "grad_norm": 0.3198345435771246, "learning_rate": 0.0001372039907751203, "loss": 3.0909838676452637, "step": 4632, "token_acc": 0.288681456338142 }, { "epoch": 2.7159190853122253, "grad_norm": 0.3030597503644988, "learning_rate": 0.00013720209213732124, "loss": 3.087268352508545, "step": 4633, "token_acc": 0.2887917063387557 }, { "epoch": 2.7165054236294344, "grad_norm": 0.2949125865011794, "learning_rate": 0.00013720019286824912, "loss": 3.0773468017578125, "step": 4634, "token_acc": 0.28858600313312116 }, { "epoch": 2.7170917619466435, "grad_norm": 0.25734936655133034, "learning_rate": 0.00013719829296792173, "loss": 3.1264283657073975, "step": 4635, "token_acc": 0.2829478059224129 }, { "epoch": 2.717678100263852, "grad_norm": 0.31215351922702717, "learning_rate": 0.00013719639243635692, "loss": 3.0454394817352295, "step": 4636, "token_acc": 0.29405746197521654 }, { "epoch": 2.7182644385810613, "grad_norm": 0.303838470528605, "learning_rate": 0.00013719449127357254, "loss": 3.0922412872314453, "step": 4637, "token_acc": 0.2884730919880683 }, { "epoch": 2.7188507768982704, "grad_norm": 0.30572499640591616, "learning_rate": 0.00013719258947958646, "loss": 3.0443553924560547, "step": 4638, "token_acc": 0.2930546878847814 }, { "epoch": 2.7194371152154795, "grad_norm": 0.3125310572118373, "learning_rate": 0.00013719068705441657, "loss": 3.092749834060669, "step": 4639, "token_acc": 0.288975850388257 }, { "epoch": 2.720023453532688, "grad_norm": 0.3511025429775157, "learning_rate": 0.0001371887839980807, "loss": 3.0956149101257324, "step": 4640, "token_acc": 0.2858726763116649 }, { "epoch": 2.7206097918498973, "grad_norm": 0.3441957897563102, "learning_rate": 0.00013718688031059674, "loss": 3.0626916885375977, "step": 4641, "token_acc": 0.2915112795744941 }, { "epoch": 2.7211961301671064, "grad_norm": 0.3668622720711865, "learning_rate": 0.0001371849759919826, "loss": 3.081163167953491, "step": 4642, "token_acc": 0.28903033673022016 }, { "epoch": 2.7217824684843155, "grad_norm": 0.3864319472591011, "learning_rate": 0.00013718307104225612, "loss": 3.0808796882629395, "step": 4643, "token_acc": 0.28866641325676246 }, { "epoch": 2.7223688068015246, "grad_norm": 0.4052525282319413, "learning_rate": 0.00013718116546143522, "loss": 3.067661762237549, "step": 4644, "token_acc": 0.2921307622762245 }, { "epoch": 2.7229551451187337, "grad_norm": 0.3062027632945052, "learning_rate": 0.0001371792592495378, "loss": 3.08237624168396, "step": 4645, "token_acc": 0.2893445603683615 }, { "epoch": 2.7235414834359424, "grad_norm": 0.31900900898352386, "learning_rate": 0.00013717735240658174, "loss": 3.076756000518799, "step": 4646, "token_acc": 0.289236042966859 }, { "epoch": 2.7241278217531515, "grad_norm": 0.385087227463911, "learning_rate": 0.000137175444932585, "loss": 3.0756025314331055, "step": 4647, "token_acc": 0.29054068148395373 }, { "epoch": 2.7247141600703606, "grad_norm": 0.26957241858256603, "learning_rate": 0.0001371735368275655, "loss": 3.0599937438964844, "step": 4648, "token_acc": 0.2932118245969828 }, { "epoch": 2.7253004983875697, "grad_norm": 0.3796463293172065, "learning_rate": 0.0001371716280915411, "loss": 3.027343273162842, "step": 4649, "token_acc": 0.2979689018708606 }, { "epoch": 2.7258868367047784, "grad_norm": 0.36920687937172897, "learning_rate": 0.0001371697187245298, "loss": 3.0219480991363525, "step": 4650, "token_acc": 0.29866804488297843 }, { "epoch": 2.7264731750219875, "grad_norm": 0.2651930395952895, "learning_rate": 0.00013716780872654948, "loss": 3.023284673690796, "step": 4651, "token_acc": 0.29661877193029196 }, { "epoch": 2.7270595133391966, "grad_norm": 0.4031078134454338, "learning_rate": 0.0001371658980976181, "loss": 3.0579094886779785, "step": 4652, "token_acc": 0.2920567424228522 }, { "epoch": 2.7276458516564057, "grad_norm": 0.26127779771668336, "learning_rate": 0.00013716398683775365, "loss": 3.0470476150512695, "step": 4653, "token_acc": 0.29355656403503383 }, { "epoch": 2.728232189973615, "grad_norm": 0.303747678696168, "learning_rate": 0.00013716207494697403, "loss": 3.0965912342071533, "step": 4654, "token_acc": 0.2873409619808418 }, { "epoch": 2.728818528290824, "grad_norm": 0.29855832942073834, "learning_rate": 0.00013716016242529722, "loss": 3.099052906036377, "step": 4655, "token_acc": 0.28842650546005966 }, { "epoch": 2.729404866608033, "grad_norm": 0.3342339819084472, "learning_rate": 0.00013715824927274116, "loss": 3.014275550842285, "step": 4656, "token_acc": 0.29808178565981597 }, { "epoch": 2.7299912049252417, "grad_norm": 0.3043326481389358, "learning_rate": 0.00013715633548932386, "loss": 3.0775539875030518, "step": 4657, "token_acc": 0.2892878810066735 }, { "epoch": 2.730577543242451, "grad_norm": 0.2817202425683157, "learning_rate": 0.00013715442107506328, "loss": 3.0511786937713623, "step": 4658, "token_acc": 0.29335321687260335 }, { "epoch": 2.73116388155966, "grad_norm": 0.28392520773135965, "learning_rate": 0.00013715250602997745, "loss": 3.036102533340454, "step": 4659, "token_acc": 0.2954363658226723 }, { "epoch": 2.731750219876869, "grad_norm": 0.29323828946359787, "learning_rate": 0.00013715059035408425, "loss": 3.0630078315734863, "step": 4660, "token_acc": 0.2921022757440224 }, { "epoch": 2.7323365581940777, "grad_norm": 0.3278566182677005, "learning_rate": 0.00013714867404740177, "loss": 3.062596082687378, "step": 4661, "token_acc": 0.2921800965947674 }, { "epoch": 2.732922896511287, "grad_norm": 0.30987588372629177, "learning_rate": 0.000137146757109948, "loss": 3.0138630867004395, "step": 4662, "token_acc": 0.2994056424204828 }, { "epoch": 2.733509234828496, "grad_norm": 0.3056280754597164, "learning_rate": 0.0001371448395417409, "loss": 3.075791597366333, "step": 4663, "token_acc": 0.2885157277338287 }, { "epoch": 2.734095573145705, "grad_norm": 0.25058130906685194, "learning_rate": 0.0001371429213427985, "loss": 3.0427842140197754, "step": 4664, "token_acc": 0.294549490382951 }, { "epoch": 2.734681911462914, "grad_norm": 0.2921262056888084, "learning_rate": 0.00013714100251313886, "loss": 3.0665647983551025, "step": 4665, "token_acc": 0.2929076825920532 }, { "epoch": 2.7352682497801233, "grad_norm": 0.33380965684513686, "learning_rate": 0.00013713908305277995, "loss": 3.0743348598480225, "step": 4666, "token_acc": 0.29085381435748553 }, { "epoch": 2.7358545880973324, "grad_norm": 0.275615936048703, "learning_rate": 0.00013713716296173984, "loss": 3.074038505554199, "step": 4667, "token_acc": 0.29078432451843056 }, { "epoch": 2.736440926414541, "grad_norm": 0.28679042983555186, "learning_rate": 0.00013713524224003655, "loss": 3.008634090423584, "step": 4668, "token_acc": 0.2977739502596623 }, { "epoch": 2.73702726473175, "grad_norm": 0.29273435585158947, "learning_rate": 0.00013713332088768814, "loss": 3.1060876846313477, "step": 4669, "token_acc": 0.2859662969506267 }, { "epoch": 2.7376136030489593, "grad_norm": 0.2712365948103812, "learning_rate": 0.00013713139890471265, "loss": 3.068361520767212, "step": 4670, "token_acc": 0.29063215164730777 }, { "epoch": 2.7381999413661684, "grad_norm": 0.2821305526209546, "learning_rate": 0.0001371294762911281, "loss": 3.028829574584961, "step": 4671, "token_acc": 0.29626467888374913 }, { "epoch": 2.738786279683377, "grad_norm": 0.2817901581959042, "learning_rate": 0.0001371275530469526, "loss": 3.076486587524414, "step": 4672, "token_acc": 0.28893197327631187 }, { "epoch": 2.739372618000586, "grad_norm": 0.32906803019899245, "learning_rate": 0.00013712562917220415, "loss": 3.0902698040008545, "step": 4673, "token_acc": 0.28736995466164544 }, { "epoch": 2.7399589563177953, "grad_norm": 0.33837683482148806, "learning_rate": 0.0001371237046669009, "loss": 3.051807403564453, "step": 4674, "token_acc": 0.2962123534061396 }, { "epoch": 2.7405452946350044, "grad_norm": 0.3638746615834578, "learning_rate": 0.00013712177953106088, "loss": 3.0917587280273438, "step": 4675, "token_acc": 0.28699874265560626 }, { "epoch": 2.7411316329522135, "grad_norm": 0.3834274493168949, "learning_rate": 0.00013711985376470222, "loss": 3.0815956592559814, "step": 4676, "token_acc": 0.2897532678221256 }, { "epoch": 2.7417179712694226, "grad_norm": 0.2953184745806514, "learning_rate": 0.00013711792736784297, "loss": 3.06042218208313, "step": 4677, "token_acc": 0.2919671267407744 }, { "epoch": 2.7423043095866317, "grad_norm": 0.3141705413655958, "learning_rate": 0.0001371160003405012, "loss": 3.1010549068450928, "step": 4678, "token_acc": 0.28532810070035175 }, { "epoch": 2.7428906479038404, "grad_norm": 0.3089294994230565, "learning_rate": 0.0001371140726826951, "loss": 3.0901567935943604, "step": 4679, "token_acc": 0.2874934315574634 }, { "epoch": 2.7434769862210495, "grad_norm": 0.28533671772180463, "learning_rate": 0.0001371121443944427, "loss": 3.039623260498047, "step": 4680, "token_acc": 0.29444060437697606 }, { "epoch": 2.7440633245382586, "grad_norm": 0.30368584965023476, "learning_rate": 0.00013711021547576212, "loss": 3.1114554405212402, "step": 4681, "token_acc": 0.2857752536873687 }, { "epoch": 2.7446496628554677, "grad_norm": 0.35458459299404604, "learning_rate": 0.00013710828592667152, "loss": 3.0802512168884277, "step": 4682, "token_acc": 0.2884036477069367 }, { "epoch": 2.7452360011726764, "grad_norm": 0.3112718888385984, "learning_rate": 0.00013710635574718902, "loss": 3.021833896636963, "step": 4683, "token_acc": 0.29513349461409843 }, { "epoch": 2.7458223394898855, "grad_norm": 0.3562722640745905, "learning_rate": 0.00013710442493733267, "loss": 3.0417211055755615, "step": 4684, "token_acc": 0.2959423493383204 }, { "epoch": 2.7464086778070946, "grad_norm": 0.33513570712228946, "learning_rate": 0.00013710249349712075, "loss": 3.079073190689087, "step": 4685, "token_acc": 0.2885143173801939 }, { "epoch": 2.7469950161243037, "grad_norm": 0.32798886218575923, "learning_rate": 0.00013710056142657127, "loss": 3.06345796585083, "step": 4686, "token_acc": 0.29214969962656273 }, { "epoch": 2.747581354441513, "grad_norm": 0.3365384549801903, "learning_rate": 0.00013709862872570244, "loss": 3.029172897338867, "step": 4687, "token_acc": 0.29769283171167465 }, { "epoch": 2.748167692758722, "grad_norm": 0.26849010899258874, "learning_rate": 0.00013709669539453242, "loss": 3.05407452583313, "step": 4688, "token_acc": 0.2943078316815033 }, { "epoch": 2.748754031075931, "grad_norm": 0.3921573648411156, "learning_rate": 0.0001370947614330794, "loss": 3.086358070373535, "step": 4689, "token_acc": 0.2892593099671413 }, { "epoch": 2.7493403693931397, "grad_norm": 0.28813338622658896, "learning_rate": 0.00013709282684136145, "loss": 3.053274154663086, "step": 4690, "token_acc": 0.29485944523853913 }, { "epoch": 2.749926707710349, "grad_norm": 0.3579032688645566, "learning_rate": 0.0001370908916193968, "loss": 3.0473785400390625, "step": 4691, "token_acc": 0.2943172799536413 }, { "epoch": 2.750513046027558, "grad_norm": 0.3534504534079015, "learning_rate": 0.0001370889557672037, "loss": 3.0648417472839355, "step": 4692, "token_acc": 0.2914199837234759 }, { "epoch": 2.751099384344767, "grad_norm": 0.2740320608447911, "learning_rate": 0.0001370870192848002, "loss": 3.0985889434814453, "step": 4693, "token_acc": 0.2854227019689745 }, { "epoch": 2.7516857226619758, "grad_norm": 0.34343294461616114, "learning_rate": 0.00013708508217220457, "loss": 3.114053726196289, "step": 4694, "token_acc": 0.28550011651692087 }, { "epoch": 2.752272060979185, "grad_norm": 0.2939976027840481, "learning_rate": 0.00013708314442943497, "loss": 3.062159776687622, "step": 4695, "token_acc": 0.2916887021266064 }, { "epoch": 2.752858399296394, "grad_norm": 0.3406318833595693, "learning_rate": 0.00013708120605650963, "loss": 3.0345864295959473, "step": 4696, "token_acc": 0.2951055989843479 }, { "epoch": 2.753444737613603, "grad_norm": 0.2961960943850363, "learning_rate": 0.0001370792670534468, "loss": 3.0678000450134277, "step": 4697, "token_acc": 0.2892025213907601 }, { "epoch": 2.754031075930812, "grad_norm": 0.3089197689768157, "learning_rate": 0.0001370773274202646, "loss": 3.1106622219085693, "step": 4698, "token_acc": 0.28458876156757157 }, { "epoch": 2.7546174142480213, "grad_norm": 0.3101572549155367, "learning_rate": 0.00013707538715698132, "loss": 3.0618839263916016, "step": 4699, "token_acc": 0.29171420480943966 }, { "epoch": 2.75520375256523, "grad_norm": 0.27500096270362484, "learning_rate": 0.00013707344626361515, "loss": 3.071516513824463, "step": 4700, "token_acc": 0.290153928890332 }, { "epoch": 2.755790090882439, "grad_norm": 0.26219293122341814, "learning_rate": 0.00013707150474018433, "loss": 3.0642361640930176, "step": 4701, "token_acc": 0.29142087714798665 }, { "epoch": 2.756376429199648, "grad_norm": 0.28405478903105286, "learning_rate": 0.00013706956258670712, "loss": 3.076244354248047, "step": 4702, "token_acc": 0.29035838883602916 }, { "epoch": 2.7569627675168573, "grad_norm": 0.31324206999762183, "learning_rate": 0.00013706761980320173, "loss": 3.0728230476379395, "step": 4703, "token_acc": 0.2904097509251044 }, { "epoch": 2.757549105834066, "grad_norm": 0.25028313187446083, "learning_rate": 0.00013706567638968644, "loss": 3.0894627571105957, "step": 4704, "token_acc": 0.2875475857345793 }, { "epoch": 2.758135444151275, "grad_norm": 0.3101241656121886, "learning_rate": 0.00013706373234617948, "loss": 3.0572173595428467, "step": 4705, "token_acc": 0.2940423514538559 }, { "epoch": 2.758721782468484, "grad_norm": 0.3085137301156558, "learning_rate": 0.00013706178767269913, "loss": 3.0969886779785156, "step": 4706, "token_acc": 0.2885033629762671 }, { "epoch": 2.7593081207856933, "grad_norm": 0.3162419988991531, "learning_rate": 0.00013705984236926367, "loss": 3.051300048828125, "step": 4707, "token_acc": 0.2953059884492208 }, { "epoch": 2.7598944591029024, "grad_norm": 0.34356460213921486, "learning_rate": 0.00013705789643589134, "loss": 3.0466763973236084, "step": 4708, "token_acc": 0.29424901836879086 }, { "epoch": 2.7604807974201115, "grad_norm": 0.3572469750640098, "learning_rate": 0.00013705594987260044, "loss": 3.0647950172424316, "step": 4709, "token_acc": 0.2927943059900481 }, { "epoch": 2.7610671357373207, "grad_norm": 0.34518077114134166, "learning_rate": 0.00013705400267940925, "loss": 3.074469804763794, "step": 4710, "token_acc": 0.28942934603648496 }, { "epoch": 2.7616534740545293, "grad_norm": 0.3866458586441254, "learning_rate": 0.00013705205485633603, "loss": 3.1213059425354004, "step": 4711, "token_acc": 0.2843682004891753 }, { "epoch": 2.7622398123717384, "grad_norm": 0.3630731899911009, "learning_rate": 0.00013705010640339914, "loss": 3.0784640312194824, "step": 4712, "token_acc": 0.288812678879881 }, { "epoch": 2.7628261506889475, "grad_norm": 0.305861782829285, "learning_rate": 0.00013704815732061684, "loss": 3.07456636428833, "step": 4713, "token_acc": 0.29005863726072445 }, { "epoch": 2.7634124890061567, "grad_norm": 0.33521756626415367, "learning_rate": 0.00013704620760800743, "loss": 3.08918833732605, "step": 4714, "token_acc": 0.2880615956485588 }, { "epoch": 2.7639988273233653, "grad_norm": 0.3427837982771273, "learning_rate": 0.0001370442572655893, "loss": 3.075580596923828, "step": 4715, "token_acc": 0.2889680260386751 }, { "epoch": 2.7645851656405744, "grad_norm": 0.3191998850909233, "learning_rate": 0.0001370423062933807, "loss": 3.051447868347168, "step": 4716, "token_acc": 0.2932676596776404 }, { "epoch": 2.7651715039577835, "grad_norm": 0.33656892160255814, "learning_rate": 0.00013704035469139992, "loss": 3.082031726837158, "step": 4717, "token_acc": 0.28997611797562406 }, { "epoch": 2.7657578422749927, "grad_norm": 0.32789469239635893, "learning_rate": 0.00013703840245966542, "loss": 3.0598161220550537, "step": 4718, "token_acc": 0.2942095977053003 }, { "epoch": 2.7663441805922018, "grad_norm": 0.29972958430977487, "learning_rate": 0.00013703644959819542, "loss": 3.059307098388672, "step": 4719, "token_acc": 0.29243407766810886 }, { "epoch": 2.766930518909411, "grad_norm": 0.27789885280737897, "learning_rate": 0.0001370344961070083, "loss": 3.053579330444336, "step": 4720, "token_acc": 0.29429505310434206 }, { "epoch": 2.76751685722662, "grad_norm": 0.32262742642120495, "learning_rate": 0.00013703254198612244, "loss": 3.0015058517456055, "step": 4721, "token_acc": 0.30103998977897023 }, { "epoch": 2.7681031955438287, "grad_norm": 0.3475917455122131, "learning_rate": 0.0001370305872355562, "loss": 3.0520219802856445, "step": 4722, "token_acc": 0.2935668502856675 }, { "epoch": 2.7686895338610378, "grad_norm": 0.3958104136592667, "learning_rate": 0.00013702863185532788, "loss": 3.037843942642212, "step": 4723, "token_acc": 0.29660543626660046 }, { "epoch": 2.769275872178247, "grad_norm": 0.38220230203313876, "learning_rate": 0.0001370266758454559, "loss": 3.0557992458343506, "step": 4724, "token_acc": 0.29322974180995953 }, { "epoch": 2.769862210495456, "grad_norm": 0.31961681829721456, "learning_rate": 0.0001370247192059586, "loss": 3.0636849403381348, "step": 4725, "token_acc": 0.2914145267828588 }, { "epoch": 2.7704485488126647, "grad_norm": 0.35240628567111343, "learning_rate": 0.0001370227619368544, "loss": 3.0894243717193604, "step": 4726, "token_acc": 0.2887275646478394 }, { "epoch": 2.771034887129874, "grad_norm": 0.32621550091359836, "learning_rate": 0.00013702080403816164, "loss": 3.0906317234039307, "step": 4727, "token_acc": 0.288836742754979 }, { "epoch": 2.771621225447083, "grad_norm": 0.30997750700599325, "learning_rate": 0.00013701884550989878, "loss": 3.054356098175049, "step": 4728, "token_acc": 0.2930217778268492 }, { "epoch": 2.772207563764292, "grad_norm": 0.3320253424704229, "learning_rate": 0.00013701688635208415, "loss": 3.126520872116089, "step": 4729, "token_acc": 0.28327438125474846 }, { "epoch": 2.772793902081501, "grad_norm": 0.38854927055120986, "learning_rate": 0.00013701492656473618, "loss": 3.0934243202209473, "step": 4730, "token_acc": 0.2862656729371103 }, { "epoch": 2.7733802403987102, "grad_norm": 0.3285079400934574, "learning_rate": 0.0001370129661478733, "loss": 3.027575731277466, "step": 4731, "token_acc": 0.2970539716135458 }, { "epoch": 2.7739665787159193, "grad_norm": 0.29171581241204786, "learning_rate": 0.0001370110051015139, "loss": 3.0499985218048096, "step": 4732, "token_acc": 0.29461574819550257 }, { "epoch": 2.774552917033128, "grad_norm": 0.3742543628374655, "learning_rate": 0.00013700904342567636, "loss": 3.0911457538604736, "step": 4733, "token_acc": 0.2874119127949567 }, { "epoch": 2.775139255350337, "grad_norm": 0.3434006243030413, "learning_rate": 0.00013700708112037918, "loss": 3.0453970432281494, "step": 4734, "token_acc": 0.29446659241077955 }, { "epoch": 2.7757255936675462, "grad_norm": 0.3114028043875064, "learning_rate": 0.0001370051181856408, "loss": 3.06387996673584, "step": 4735, "token_acc": 0.29158163976647916 }, { "epoch": 2.7763119319847553, "grad_norm": 0.3533483550458655, "learning_rate": 0.0001370031546214796, "loss": 3.063957452774048, "step": 4736, "token_acc": 0.29160827791240523 }, { "epoch": 2.776898270301964, "grad_norm": 0.37500628721929075, "learning_rate": 0.00013700119042791404, "loss": 3.0680840015411377, "step": 4737, "token_acc": 0.2912414290587785 }, { "epoch": 2.777484608619173, "grad_norm": 0.2934992988918814, "learning_rate": 0.0001369992256049626, "loss": 3.10247802734375, "step": 4738, "token_acc": 0.2858267779804424 }, { "epoch": 2.7780709469363822, "grad_norm": 0.28676405215913686, "learning_rate": 0.0001369972601526437, "loss": 3.0179991722106934, "step": 4739, "token_acc": 0.2991765299144848 }, { "epoch": 2.7786572852535913, "grad_norm": 0.2921200128809918, "learning_rate": 0.00013699529407097582, "loss": 3.035616159439087, "step": 4740, "token_acc": 0.29539959713907105 }, { "epoch": 2.7792436235708005, "grad_norm": 0.2910749418337646, "learning_rate": 0.00013699332735997742, "loss": 3.101915121078491, "step": 4741, "token_acc": 0.28593646730521083 }, { "epoch": 2.7798299618880096, "grad_norm": 0.32873953854336596, "learning_rate": 0.000136991360019667, "loss": 3.0812268257141113, "step": 4742, "token_acc": 0.28828423831697986 }, { "epoch": 2.7804163002052187, "grad_norm": 0.3149760976726917, "learning_rate": 0.00013698939205006305, "loss": 3.0557236671447754, "step": 4743, "token_acc": 0.2927200628677193 }, { "epoch": 2.7810026385224274, "grad_norm": 0.2963351197516334, "learning_rate": 0.000136987423451184, "loss": 3.0593719482421875, "step": 4744, "token_acc": 0.29033246275825186 }, { "epoch": 2.7815889768396365, "grad_norm": 0.32678462102514294, "learning_rate": 0.00013698545422304837, "loss": 3.0411734580993652, "step": 4745, "token_acc": 0.29531782088465086 }, { "epoch": 2.7821753151568456, "grad_norm": 0.2819593216147927, "learning_rate": 0.00013698348436567468, "loss": 3.093870162963867, "step": 4746, "token_acc": 0.2879781076339647 }, { "epoch": 2.7827616534740547, "grad_norm": 0.3611208088463207, "learning_rate": 0.0001369815138790814, "loss": 3.072493076324463, "step": 4747, "token_acc": 0.28977192744812125 }, { "epoch": 2.7833479917912634, "grad_norm": 0.38582350438606794, "learning_rate": 0.00013697954276328708, "loss": 3.0771806240081787, "step": 4748, "token_acc": 0.28853124358537974 }, { "epoch": 2.7839343301084725, "grad_norm": 0.3232222985530676, "learning_rate": 0.00013697757101831018, "loss": 3.0764048099517822, "step": 4749, "token_acc": 0.288356383414513 }, { "epoch": 2.7845206684256816, "grad_norm": 0.3178239880814871, "learning_rate": 0.00013697559864416927, "loss": 3.041761875152588, "step": 4750, "token_acc": 0.29676185633088076 }, { "epoch": 2.7851070067428907, "grad_norm": 0.3123355856322726, "learning_rate": 0.00013697362564088288, "loss": 3.103762149810791, "step": 4751, "token_acc": 0.28761483352149 }, { "epoch": 2.7856933450601, "grad_norm": 0.38316026906652556, "learning_rate": 0.00013697165200846949, "loss": 3.0562524795532227, "step": 4752, "token_acc": 0.29391848564906253 }, { "epoch": 2.786279683377309, "grad_norm": 0.2970264436833723, "learning_rate": 0.0001369696777469477, "loss": 3.0503549575805664, "step": 4753, "token_acc": 0.29155181414445064 }, { "epoch": 2.7868660216945176, "grad_norm": 0.32899263769294973, "learning_rate": 0.000136967702856336, "loss": 3.045626163482666, "step": 4754, "token_acc": 0.2926578099406911 }, { "epoch": 2.7874523600117267, "grad_norm": 0.318241058702945, "learning_rate": 0.000136965727336653, "loss": 3.0914764404296875, "step": 4755, "token_acc": 0.2873910179209256 }, { "epoch": 2.788038698328936, "grad_norm": 0.29274760618533635, "learning_rate": 0.00013696375118791722, "loss": 3.1226277351379395, "step": 4756, "token_acc": 0.2827084504908418 }, { "epoch": 2.788625036646145, "grad_norm": 0.2786488709164468, "learning_rate": 0.00013696177441014723, "loss": 3.078016519546509, "step": 4757, "token_acc": 0.2892528642891722 }, { "epoch": 2.7892113749633536, "grad_norm": 0.2826372736297748, "learning_rate": 0.0001369597970033616, "loss": 3.054342269897461, "step": 4758, "token_acc": 0.2914980596693791 }, { "epoch": 2.7897977132805627, "grad_norm": 0.3082364879523241, "learning_rate": 0.00013695781896757892, "loss": 3.074385166168213, "step": 4759, "token_acc": 0.2888890055268918 }, { "epoch": 2.790384051597772, "grad_norm": 0.33923244658162227, "learning_rate": 0.00013695584030281774, "loss": 3.048893690109253, "step": 4760, "token_acc": 0.29424171391766185 }, { "epoch": 2.790970389914981, "grad_norm": 0.3490246916468502, "learning_rate": 0.0001369538610090967, "loss": 3.0290298461914062, "step": 4761, "token_acc": 0.2954485902945184 }, { "epoch": 2.79155672823219, "grad_norm": 0.3281602696443412, "learning_rate": 0.00013695188108643432, "loss": 3.055243492126465, "step": 4762, "token_acc": 0.2942199929464321 }, { "epoch": 2.792143066549399, "grad_norm": 0.27840673885928363, "learning_rate": 0.00013694990053484923, "loss": 3.0571136474609375, "step": 4763, "token_acc": 0.2923231822710476 }, { "epoch": 2.7927294048666083, "grad_norm": 0.3539115109604466, "learning_rate": 0.00013694791935436008, "loss": 3.0462684631347656, "step": 4764, "token_acc": 0.2942701933316949 }, { "epoch": 2.793315743183817, "grad_norm": 0.3918344182511708, "learning_rate": 0.0001369459375449854, "loss": 3.1047658920288086, "step": 4765, "token_acc": 0.28669970372929093 }, { "epoch": 2.793902081501026, "grad_norm": 0.3037531971428858, "learning_rate": 0.00013694395510674388, "loss": 3.0559329986572266, "step": 4766, "token_acc": 0.29373066842229484 }, { "epoch": 2.794488419818235, "grad_norm": 0.2528598112190445, "learning_rate": 0.0001369419720396541, "loss": 3.0698189735412598, "step": 4767, "token_acc": 0.29163451370430704 }, { "epoch": 2.7950747581354443, "grad_norm": 0.31890910380488735, "learning_rate": 0.0001369399883437347, "loss": 3.083059549331665, "step": 4768, "token_acc": 0.2885939555248953 }, { "epoch": 2.795661096452653, "grad_norm": 0.3922064491103291, "learning_rate": 0.00013693800401900428, "loss": 3.067549705505371, "step": 4769, "token_acc": 0.29041119612102895 }, { "epoch": 2.796247434769862, "grad_norm": 0.3803570926882513, "learning_rate": 0.00013693601906548155, "loss": 3.0943784713745117, "step": 4770, "token_acc": 0.28819645926623383 }, { "epoch": 2.796833773087071, "grad_norm": 0.3189023354880703, "learning_rate": 0.0001369340334831851, "loss": 3.1087989807128906, "step": 4771, "token_acc": 0.28510372558371166 }, { "epoch": 2.7974201114042803, "grad_norm": 0.3268408381541908, "learning_rate": 0.00013693204727213362, "loss": 3.0855469703674316, "step": 4772, "token_acc": 0.2884823142628417 }, { "epoch": 2.7980064497214894, "grad_norm": 0.33730087300884776, "learning_rate": 0.00013693006043234574, "loss": 3.0664196014404297, "step": 4773, "token_acc": 0.29234207871104767 }, { "epoch": 2.7985927880386985, "grad_norm": 0.29093193732917083, "learning_rate": 0.0001369280729638401, "loss": 3.084001302719116, "step": 4774, "token_acc": 0.2878298762053106 }, { "epoch": 2.7991791263559076, "grad_norm": 0.3350478652774217, "learning_rate": 0.00013692608486663544, "loss": 3.081118583679199, "step": 4775, "token_acc": 0.28883791063626874 }, { "epoch": 2.7997654646731163, "grad_norm": 0.314888650428089, "learning_rate": 0.00013692409614075038, "loss": 3.0622735023498535, "step": 4776, "token_acc": 0.2917783508598458 }, { "epoch": 2.8003518029903254, "grad_norm": 0.28371550439869797, "learning_rate": 0.00013692210678620362, "loss": 3.080512046813965, "step": 4777, "token_acc": 0.28835288712888313 }, { "epoch": 2.8009381413075345, "grad_norm": 0.34206857932904094, "learning_rate": 0.00013692011680301386, "loss": 3.092446804046631, "step": 4778, "token_acc": 0.28570601618813674 }, { "epoch": 2.8015244796247436, "grad_norm": 0.2993711497932688, "learning_rate": 0.00013691812619119978, "loss": 3.0532729625701904, "step": 4779, "token_acc": 0.29376728639620686 }, { "epoch": 2.8021108179419523, "grad_norm": 0.3081203422916144, "learning_rate": 0.00013691613495078004, "loss": 3.099503517150879, "step": 4780, "token_acc": 0.2882668725803604 }, { "epoch": 2.8026971562591614, "grad_norm": 0.32324740218244474, "learning_rate": 0.00013691414308177342, "loss": 3.070456027984619, "step": 4781, "token_acc": 0.2907130998642137 }, { "epoch": 2.8032834945763705, "grad_norm": 0.26938460344931686, "learning_rate": 0.00013691215058419856, "loss": 3.0482892990112305, "step": 4782, "token_acc": 0.2915497195864094 }, { "epoch": 2.8038698328935796, "grad_norm": 0.31309943407560015, "learning_rate": 0.00013691015745807426, "loss": 3.0742347240448, "step": 4783, "token_acc": 0.28983569375214285 }, { "epoch": 2.8044561712107887, "grad_norm": 0.3236097487548583, "learning_rate": 0.00013690816370341916, "loss": 3.070676803588867, "step": 4784, "token_acc": 0.2899154878846546 }, { "epoch": 2.805042509527998, "grad_norm": 0.3005695172124386, "learning_rate": 0.00013690616932025203, "loss": 3.065809726715088, "step": 4785, "token_acc": 0.29104199703561945 }, { "epoch": 2.805628847845207, "grad_norm": 0.3164094430453737, "learning_rate": 0.0001369041743085916, "loss": 3.039031505584717, "step": 4786, "token_acc": 0.2948242068489255 }, { "epoch": 2.8062151861624156, "grad_norm": 0.30348470507080993, "learning_rate": 0.0001369021786684566, "loss": 3.0371341705322266, "step": 4787, "token_acc": 0.2938407967748096 }, { "epoch": 2.8068015244796247, "grad_norm": 0.3522932833056971, "learning_rate": 0.00013690018239986577, "loss": 3.0759778022766113, "step": 4788, "token_acc": 0.2890363706895648 }, { "epoch": 2.807387862796834, "grad_norm": 0.3098380097115769, "learning_rate": 0.00013689818550283788, "loss": 3.07541561126709, "step": 4789, "token_acc": 0.28975839950123805 }, { "epoch": 2.807974201114043, "grad_norm": 0.2581259928498899, "learning_rate": 0.00013689618797739172, "loss": 3.065826892852783, "step": 4790, "token_acc": 0.2910118737510011 }, { "epoch": 2.8085605394312516, "grad_norm": 0.2866558141754139, "learning_rate": 0.00013689418982354597, "loss": 3.0583341121673584, "step": 4791, "token_acc": 0.2925796568310867 }, { "epoch": 2.8091468777484607, "grad_norm": 0.29968475396954025, "learning_rate": 0.00013689219104131946, "loss": 3.068889856338501, "step": 4792, "token_acc": 0.28877639800680216 }, { "epoch": 2.80973321606567, "grad_norm": 0.27055266527258837, "learning_rate": 0.00013689019163073098, "loss": 3.0700814723968506, "step": 4793, "token_acc": 0.2911559246078819 }, { "epoch": 2.810319554382879, "grad_norm": 0.26627702526807207, "learning_rate": 0.00013688819159179925, "loss": 3.068039655685425, "step": 4794, "token_acc": 0.29043226099697067 }, { "epoch": 2.810905892700088, "grad_norm": 0.31406922780365176, "learning_rate": 0.00013688619092454312, "loss": 3.093127727508545, "step": 4795, "token_acc": 0.2861608541795642 }, { "epoch": 2.811492231017297, "grad_norm": 0.32985388667681365, "learning_rate": 0.00013688418962898134, "loss": 3.0470056533813477, "step": 4796, "token_acc": 0.293848686846234 }, { "epoch": 2.8120785693345063, "grad_norm": 0.33833355794818615, "learning_rate": 0.00013688218770513275, "loss": 3.0054283142089844, "step": 4797, "token_acc": 0.30026423058952423 }, { "epoch": 2.812664907651715, "grad_norm": 0.43280938093342003, "learning_rate": 0.0001368801851530161, "loss": 3.0971732139587402, "step": 4798, "token_acc": 0.2863970080762149 }, { "epoch": 2.813251245968924, "grad_norm": 0.37319613678111735, "learning_rate": 0.00013687818197265025, "loss": 3.0789685249328613, "step": 4799, "token_acc": 0.2891054159617353 }, { "epoch": 2.813837584286133, "grad_norm": 0.2826209236016134, "learning_rate": 0.00013687617816405398, "loss": 3.068892240524292, "step": 4800, "token_acc": 0.2920483832542828 }, { "epoch": 2.8144239226033423, "grad_norm": 0.38002410934182995, "learning_rate": 0.00013687417372724618, "loss": 3.0782599449157715, "step": 4801, "token_acc": 0.2892367039616482 }, { "epoch": 2.815010260920551, "grad_norm": 0.31998029321316696, "learning_rate": 0.0001368721686622456, "loss": 3.0505943298339844, "step": 4802, "token_acc": 0.29189764665902046 }, { "epoch": 2.81559659923776, "grad_norm": 0.3391896399130213, "learning_rate": 0.00013687016296907108, "loss": 3.0685372352600098, "step": 4803, "token_acc": 0.29149517735398534 }, { "epoch": 2.816182937554969, "grad_norm": 0.30201905973665605, "learning_rate": 0.00013686815664774152, "loss": 3.0228066444396973, "step": 4804, "token_acc": 0.29571094940626347 }, { "epoch": 2.8167692758721783, "grad_norm": 0.3284814038437341, "learning_rate": 0.00013686614969827575, "loss": 3.0386509895324707, "step": 4805, "token_acc": 0.2949472613263569 }, { "epoch": 2.8173556141893874, "grad_norm": 0.29273330648567913, "learning_rate": 0.00013686414212069257, "loss": 3.054908275604248, "step": 4806, "token_acc": 0.29197934453247504 }, { "epoch": 2.8179419525065965, "grad_norm": 0.33616106448900057, "learning_rate": 0.00013686213391501088, "loss": 3.057896852493286, "step": 4807, "token_acc": 0.2934615757291406 }, { "epoch": 2.818528290823805, "grad_norm": 0.27502343073472063, "learning_rate": 0.00013686012508124957, "loss": 3.110095739364624, "step": 4808, "token_acc": 0.2847718423251902 }, { "epoch": 2.8191146291410143, "grad_norm": 0.3023793785635317, "learning_rate": 0.00013685811561942745, "loss": 3.03559947013855, "step": 4809, "token_acc": 0.2954912308136777 }, { "epoch": 2.8197009674582234, "grad_norm": 0.2761874824929722, "learning_rate": 0.00013685610552956342, "loss": 3.0418167114257812, "step": 4810, "token_acc": 0.2957558256841068 }, { "epoch": 2.8202873057754325, "grad_norm": 0.29874201407219236, "learning_rate": 0.00013685409481167641, "loss": 3.0940213203430176, "step": 4811, "token_acc": 0.28725322847766227 }, { "epoch": 2.820873644092641, "grad_norm": 0.29095529846725116, "learning_rate": 0.00013685208346578522, "loss": 3.084400177001953, "step": 4812, "token_acc": 0.28848042062143014 }, { "epoch": 2.8214599824098503, "grad_norm": 0.2871357032288949, "learning_rate": 0.00013685007149190885, "loss": 3.0701496601104736, "step": 4813, "token_acc": 0.2904061270231468 }, { "epoch": 2.8220463207270594, "grad_norm": 0.32193496555587703, "learning_rate": 0.0001368480588900661, "loss": 3.0730557441711426, "step": 4814, "token_acc": 0.2909905784663084 }, { "epoch": 2.8226326590442685, "grad_norm": 0.26608695957878253, "learning_rate": 0.00013684604566027592, "loss": 3.0878686904907227, "step": 4815, "token_acc": 0.2904139087091064 }, { "epoch": 2.8232189973614776, "grad_norm": 0.24726333637178188, "learning_rate": 0.0001368440318025572, "loss": 3.069044589996338, "step": 4816, "token_acc": 0.2899743770096463 }, { "epoch": 2.8238053356786867, "grad_norm": 0.29347042842533366, "learning_rate": 0.0001368420173169289, "loss": 3.0654702186584473, "step": 4817, "token_acc": 0.29164684980183136 }, { "epoch": 2.824391673995896, "grad_norm": 0.27017825135396795, "learning_rate": 0.0001368400022034099, "loss": 3.0691819190979004, "step": 4818, "token_acc": 0.29113239692176235 }, { "epoch": 2.8249780123131045, "grad_norm": 0.2818885973854282, "learning_rate": 0.00013683798646201914, "loss": 3.080728530883789, "step": 4819, "token_acc": 0.2890754983519071 }, { "epoch": 2.8255643506303136, "grad_norm": 0.291875643258975, "learning_rate": 0.0001368359700927756, "loss": 3.0732407569885254, "step": 4820, "token_acc": 0.2907353627547581 }, { "epoch": 2.8261506889475227, "grad_norm": 0.24077896518560649, "learning_rate": 0.00013683395309569814, "loss": 3.044919967651367, "step": 4821, "token_acc": 0.29268044003219745 }, { "epoch": 2.826737027264732, "grad_norm": 0.33859762739920635, "learning_rate": 0.0001368319354708058, "loss": 3.0237138271331787, "step": 4822, "token_acc": 0.29725946113952373 }, { "epoch": 2.8273233655819405, "grad_norm": 0.3459640503824454, "learning_rate": 0.00013682991721811744, "loss": 3.0350289344787598, "step": 4823, "token_acc": 0.2947752440249152 }, { "epoch": 2.8279097038991496, "grad_norm": 0.3104087793704534, "learning_rate": 0.00013682789833765208, "loss": 3.067493200302124, "step": 4824, "token_acc": 0.29205522743757123 }, { "epoch": 2.8284960422163588, "grad_norm": 0.3487878820327055, "learning_rate": 0.00013682587882942864, "loss": 3.092806100845337, "step": 4825, "token_acc": 0.2871261078162629 }, { "epoch": 2.829082380533568, "grad_norm": 0.35960622493384414, "learning_rate": 0.00013682385869346616, "loss": 3.0770695209503174, "step": 4826, "token_acc": 0.28979139452954916 }, { "epoch": 2.829668718850777, "grad_norm": 0.297385156261709, "learning_rate": 0.00013682183792978355, "loss": 3.0324149131774902, "step": 4827, "token_acc": 0.2965683672491785 }, { "epoch": 2.830255057167986, "grad_norm": 0.33746750407006637, "learning_rate": 0.00013681981653839982, "loss": 3.0523533821105957, "step": 4828, "token_acc": 0.29146112600536195 }, { "epoch": 2.830841395485195, "grad_norm": 0.31423922734077525, "learning_rate": 0.00013681779451933397, "loss": 3.107745409011841, "step": 4829, "token_acc": 0.28478826676907987 }, { "epoch": 2.831427733802404, "grad_norm": 0.31190200623331843, "learning_rate": 0.00013681577187260496, "loss": 3.0194432735443115, "step": 4830, "token_acc": 0.2982536042550585 }, { "epoch": 2.832014072119613, "grad_norm": 0.36441946283135274, "learning_rate": 0.0001368137485982318, "loss": 3.0998167991638184, "step": 4831, "token_acc": 0.2873424716617471 }, { "epoch": 2.832600410436822, "grad_norm": 0.3407404971718265, "learning_rate": 0.00013681172469623353, "loss": 3.035048484802246, "step": 4832, "token_acc": 0.2969198471615159 }, { "epoch": 2.833186748754031, "grad_norm": 0.3662600972578241, "learning_rate": 0.00013680970016662913, "loss": 3.0480313301086426, "step": 4833, "token_acc": 0.2942160598636145 }, { "epoch": 2.83377308707124, "grad_norm": 0.3742693149677501, "learning_rate": 0.0001368076750094376, "loss": 3.071227550506592, "step": 4834, "token_acc": 0.29027533051552856 }, { "epoch": 2.834359425388449, "grad_norm": 0.3021615753510456, "learning_rate": 0.00013680564922467802, "loss": 3.0871729850769043, "step": 4835, "token_acc": 0.2890503492985797 }, { "epoch": 2.834945763705658, "grad_norm": 0.3593718784822757, "learning_rate": 0.00013680362281236937, "loss": 3.021775722503662, "step": 4836, "token_acc": 0.29833020711727903 }, { "epoch": 2.835532102022867, "grad_norm": 0.31635056119294747, "learning_rate": 0.0001368015957725307, "loss": 3.0891332626342773, "step": 4837, "token_acc": 0.2888171187570303 }, { "epoch": 2.8361184403400763, "grad_norm": 0.3044507510909209, "learning_rate": 0.00013679956810518106, "loss": 3.085489273071289, "step": 4838, "token_acc": 0.28895991304572866 }, { "epoch": 2.8367047786572854, "grad_norm": 0.3356597988160041, "learning_rate": 0.0001367975398103395, "loss": 3.0551071166992188, "step": 4839, "token_acc": 0.29262468409923154 }, { "epoch": 2.8372911169744945, "grad_norm": 0.33622878549585383, "learning_rate": 0.00013679551088802505, "loss": 3.07496976852417, "step": 4840, "token_acc": 0.2883614402199578 }, { "epoch": 2.837877455291703, "grad_norm": 0.36570114198749076, "learning_rate": 0.00013679348133825679, "loss": 3.0654499530792236, "step": 4841, "token_acc": 0.29034832066221217 }, { "epoch": 2.8384637936089123, "grad_norm": 0.3979051483140825, "learning_rate": 0.0001367914511610538, "loss": 3.06706166267395, "step": 4842, "token_acc": 0.29139474650242686 }, { "epoch": 2.8390501319261214, "grad_norm": 0.40268832825795103, "learning_rate": 0.00013678942035643508, "loss": 3.0585060119628906, "step": 4843, "token_acc": 0.2907058073012075 }, { "epoch": 2.8396364702433305, "grad_norm": 0.35173387747869767, "learning_rate": 0.00013678738892441977, "loss": 3.001384735107422, "step": 4844, "token_acc": 0.3005223052981791 }, { "epoch": 2.840222808560539, "grad_norm": 0.3094901798578128, "learning_rate": 0.00013678535686502698, "loss": 3.0669188499450684, "step": 4845, "token_acc": 0.28984726470543176 }, { "epoch": 2.8408091468777483, "grad_norm": 0.34643289458033594, "learning_rate": 0.00013678332417827572, "loss": 3.055863380432129, "step": 4846, "token_acc": 0.29233547457779707 }, { "epoch": 2.8413954851949574, "grad_norm": 0.3653440793059422, "learning_rate": 0.00013678129086418513, "loss": 3.0282843112945557, "step": 4847, "token_acc": 0.2980623228287288 }, { "epoch": 2.8419818235121665, "grad_norm": 0.35183779430964246, "learning_rate": 0.00013677925692277427, "loss": 3.0988895893096924, "step": 4848, "token_acc": 0.28619740750672334 }, { "epoch": 2.8425681618293757, "grad_norm": 0.31646442279955267, "learning_rate": 0.00013677722235406234, "loss": 3.100616693496704, "step": 4849, "token_acc": 0.28517103242253267 }, { "epoch": 2.8431545001465848, "grad_norm": 0.42407306794829913, "learning_rate": 0.00013677518715806834, "loss": 3.107865333557129, "step": 4850, "token_acc": 0.2847780859916782 }, { "epoch": 2.843740838463794, "grad_norm": 0.4028516181127952, "learning_rate": 0.00013677315133481146, "loss": 3.058948040008545, "step": 4851, "token_acc": 0.2926448634340227 }, { "epoch": 2.8443271767810026, "grad_norm": 0.32515598567762216, "learning_rate": 0.0001367711148843108, "loss": 3.0849685668945312, "step": 4852, "token_acc": 0.28916697557296794 }, { "epoch": 2.8449135150982117, "grad_norm": 0.35070076013248097, "learning_rate": 0.00013676907780658547, "loss": 3.0353379249572754, "step": 4853, "token_acc": 0.29581565660639103 }, { "epoch": 2.8454998534154208, "grad_norm": 0.3065234840073978, "learning_rate": 0.00013676704010165465, "loss": 3.0687308311462402, "step": 4854, "token_acc": 0.2897950887355971 }, { "epoch": 2.84608619173263, "grad_norm": 0.3514951568828879, "learning_rate": 0.00013676500176953743, "loss": 3.0938425064086914, "step": 4855, "token_acc": 0.28926369241929634 }, { "epoch": 2.8466725300498386, "grad_norm": 0.32461838431951184, "learning_rate": 0.000136762962810253, "loss": 3.059913396835327, "step": 4856, "token_acc": 0.29186982264056816 }, { "epoch": 2.8472588683670477, "grad_norm": 0.2933209698375005, "learning_rate": 0.0001367609232238205, "loss": 3.0574121475219727, "step": 4857, "token_acc": 0.29203549131914314 }, { "epoch": 2.847845206684257, "grad_norm": 0.3211857137889328, "learning_rate": 0.00013675888301025913, "loss": 3.1005334854125977, "step": 4858, "token_acc": 0.28561218598428684 }, { "epoch": 2.848431545001466, "grad_norm": 0.32804790095073016, "learning_rate": 0.00013675684216958795, "loss": 3.0724306106567383, "step": 4859, "token_acc": 0.2908846086614517 }, { "epoch": 2.849017883318675, "grad_norm": 0.30134229366354554, "learning_rate": 0.00013675480070182624, "loss": 3.080113410949707, "step": 4860, "token_acc": 0.2914634430030319 }, { "epoch": 2.849604221635884, "grad_norm": 0.2973486162355321, "learning_rate": 0.00013675275860699308, "loss": 3.0549845695495605, "step": 4861, "token_acc": 0.29094257712293625 }, { "epoch": 2.850190559953093, "grad_norm": 0.28798675063581863, "learning_rate": 0.00013675071588510775, "loss": 3.0893678665161133, "step": 4862, "token_acc": 0.2872112296695328 }, { "epoch": 2.850776898270302, "grad_norm": 0.3886744300605585, "learning_rate": 0.00013674867253618938, "loss": 3.073453426361084, "step": 4863, "token_acc": 0.29067181663791025 }, { "epoch": 2.851363236587511, "grad_norm": 0.30142321601121114, "learning_rate": 0.00013674662856025716, "loss": 3.022930860519409, "step": 4864, "token_acc": 0.2967893400366845 }, { "epoch": 2.85194957490472, "grad_norm": 0.29869302219091765, "learning_rate": 0.00013674458395733033, "loss": 3.054203510284424, "step": 4865, "token_acc": 0.2913743630858819 }, { "epoch": 2.852535913221929, "grad_norm": 0.31447334342393185, "learning_rate": 0.00013674253872742804, "loss": 3.069089889526367, "step": 4866, "token_acc": 0.292258108397993 }, { "epoch": 2.853122251539138, "grad_norm": 0.2754222280578462, "learning_rate": 0.00013674049287056957, "loss": 3.0077414512634277, "step": 4867, "token_acc": 0.29857659987420737 }, { "epoch": 2.853708589856347, "grad_norm": 0.3473956149775441, "learning_rate": 0.00013673844638677408, "loss": 3.095607280731201, "step": 4868, "token_acc": 0.28695696590473757 }, { "epoch": 2.854294928173556, "grad_norm": 0.3047428721590186, "learning_rate": 0.00013673639927606085, "loss": 3.0794405937194824, "step": 4869, "token_acc": 0.28940673288474816 }, { "epoch": 2.8548812664907652, "grad_norm": 0.29192429555898564, "learning_rate": 0.00013673435153844902, "loss": 3.1294727325439453, "step": 4870, "token_acc": 0.28206275374951467 }, { "epoch": 2.8554676048079743, "grad_norm": 0.2902944213606873, "learning_rate": 0.00013673230317395792, "loss": 3.1009063720703125, "step": 4871, "token_acc": 0.28620251928522605 }, { "epoch": 2.8560539431251835, "grad_norm": 0.32487802043584385, "learning_rate": 0.00013673025418260674, "loss": 3.044849395751953, "step": 4872, "token_acc": 0.2927915496737702 }, { "epoch": 2.856640281442392, "grad_norm": 0.2959498091724525, "learning_rate": 0.00013672820456441477, "loss": 3.068972587585449, "step": 4873, "token_acc": 0.29110256057414446 }, { "epoch": 2.8572266197596012, "grad_norm": 0.31659334286708124, "learning_rate": 0.00013672615431940122, "loss": 3.0513830184936523, "step": 4874, "token_acc": 0.292033804131898 }, { "epoch": 2.8578129580768104, "grad_norm": 0.29738089862197364, "learning_rate": 0.00013672410344758536, "loss": 3.0619168281555176, "step": 4875, "token_acc": 0.2921223311082706 }, { "epoch": 2.8583992963940195, "grad_norm": 0.3053260952536838, "learning_rate": 0.00013672205194898646, "loss": 3.0587351322174072, "step": 4876, "token_acc": 0.2917526926558384 }, { "epoch": 2.858985634711228, "grad_norm": 0.29495805110893697, "learning_rate": 0.00013671999982362379, "loss": 3.085895538330078, "step": 4877, "token_acc": 0.2874030488082529 }, { "epoch": 2.8595719730284372, "grad_norm": 0.3392844793731414, "learning_rate": 0.00013671794707151665, "loss": 3.0929605960845947, "step": 4878, "token_acc": 0.28745235962670435 }, { "epoch": 2.8601583113456464, "grad_norm": 0.2878131750494053, "learning_rate": 0.00013671589369268426, "loss": 3.0608572959899902, "step": 4879, "token_acc": 0.29080055980679964 }, { "epoch": 2.8607446496628555, "grad_norm": 0.3465144308092056, "learning_rate": 0.000136713839687146, "loss": 3.0645763874053955, "step": 4880, "token_acc": 0.2909594420818415 }, { "epoch": 2.8613309879800646, "grad_norm": 0.35238255020966314, "learning_rate": 0.00013671178505492108, "loss": 3.118668556213379, "step": 4881, "token_acc": 0.28325499536295423 }, { "epoch": 2.8619173262972737, "grad_norm": 0.33777528758421227, "learning_rate": 0.00013670972979602883, "loss": 3.0725622177124023, "step": 4882, "token_acc": 0.29071840031781765 }, { "epoch": 2.862503664614483, "grad_norm": 0.32051723051437714, "learning_rate": 0.0001367076739104886, "loss": 3.0584611892700195, "step": 4883, "token_acc": 0.29346060475107083 }, { "epoch": 2.8630900029316915, "grad_norm": 0.3177751718458312, "learning_rate": 0.0001367056173983196, "loss": 3.0840795040130615, "step": 4884, "token_acc": 0.2903938500748538 }, { "epoch": 2.8636763412489006, "grad_norm": 0.23776306473376105, "learning_rate": 0.00013670356025954127, "loss": 3.051553249359131, "step": 4885, "token_acc": 0.29301048440135014 }, { "epoch": 2.8642626795661097, "grad_norm": 0.28116221369243977, "learning_rate": 0.00013670150249417285, "loss": 3.075465679168701, "step": 4886, "token_acc": 0.2901275130825283 }, { "epoch": 2.864849017883319, "grad_norm": 0.27369227271637464, "learning_rate": 0.0001366994441022337, "loss": 3.039811611175537, "step": 4887, "token_acc": 0.29445651846228477 }, { "epoch": 2.8654353562005275, "grad_norm": 0.2976444854923505, "learning_rate": 0.00013669738508374315, "loss": 3.049858570098877, "step": 4888, "token_acc": 0.29420746305182854 }, { "epoch": 2.8660216945177366, "grad_norm": 0.3987067204305948, "learning_rate": 0.00013669532543872053, "loss": 3.0503129959106445, "step": 4889, "token_acc": 0.2947053987101183 }, { "epoch": 2.8666080328349457, "grad_norm": 0.3551301770206625, "learning_rate": 0.00013669326516718523, "loss": 3.1033287048339844, "step": 4890, "token_acc": 0.28611700683009267 }, { "epoch": 2.867194371152155, "grad_norm": 0.2821887441620424, "learning_rate": 0.00013669120426915656, "loss": 3.041935920715332, "step": 4891, "token_acc": 0.2951843642957888 }, { "epoch": 2.867780709469364, "grad_norm": 0.3235498199949132, "learning_rate": 0.00013668914274465388, "loss": 3.029634714126587, "step": 4892, "token_acc": 0.2962288141147479 }, { "epoch": 2.868367047786573, "grad_norm": 0.339912022339504, "learning_rate": 0.0001366870805936966, "loss": 3.0939648151397705, "step": 4893, "token_acc": 0.28822060263355004 }, { "epoch": 2.868953386103782, "grad_norm": 0.29674087057349996, "learning_rate": 0.000136685017816304, "loss": 3.0584259033203125, "step": 4894, "token_acc": 0.29103092155987503 }, { "epoch": 2.869539724420991, "grad_norm": 0.3214814444078491, "learning_rate": 0.0001366829544124956, "loss": 3.0968194007873535, "step": 4895, "token_acc": 0.2880253810607628 }, { "epoch": 2.8701260627382, "grad_norm": 0.30636589044520485, "learning_rate": 0.00013668089038229063, "loss": 3.0129079818725586, "step": 4896, "token_acc": 0.2972527775661454 }, { "epoch": 2.870712401055409, "grad_norm": 0.33679609271871797, "learning_rate": 0.0001366788257257086, "loss": 3.045170307159424, "step": 4897, "token_acc": 0.2953995346440339 }, { "epoch": 2.871298739372618, "grad_norm": 0.3210662201065323, "learning_rate": 0.0001366767604427688, "loss": 3.0557684898376465, "step": 4898, "token_acc": 0.2918737509547676 }, { "epoch": 2.871885077689827, "grad_norm": 0.31999383330853426, "learning_rate": 0.0001366746945334907, "loss": 3.066033124923706, "step": 4899, "token_acc": 0.2908794796523924 }, { "epoch": 2.872471416007036, "grad_norm": 0.32629072017380617, "learning_rate": 0.0001366726279978937, "loss": 3.058563470840454, "step": 4900, "token_acc": 0.29125229719086376 }, { "epoch": 2.873057754324245, "grad_norm": 0.29318065187281434, "learning_rate": 0.00013667056083599722, "loss": 3.0452301502227783, "step": 4901, "token_acc": 0.2942682269096295 }, { "epoch": 2.873644092641454, "grad_norm": 0.30219091282295585, "learning_rate": 0.00013666849304782064, "loss": 3.0923855304718018, "step": 4902, "token_acc": 0.28690012508516977 }, { "epoch": 2.8742304309586633, "grad_norm": 0.31948389210277356, "learning_rate": 0.0001366664246333834, "loss": 3.035770893096924, "step": 4903, "token_acc": 0.2943962811862545 }, { "epoch": 2.8748167692758724, "grad_norm": 0.2970642419305478, "learning_rate": 0.00013666435559270496, "loss": 3.090167999267578, "step": 4904, "token_acc": 0.2862461673236969 }, { "epoch": 2.875403107593081, "grad_norm": 0.28649489373498555, "learning_rate": 0.00013666228592580472, "loss": 3.0761327743530273, "step": 4905, "token_acc": 0.2888026268561477 }, { "epoch": 2.87598944591029, "grad_norm": 0.26724319208957553, "learning_rate": 0.00013666021563270213, "loss": 3.089818000793457, "step": 4906, "token_acc": 0.28790439593730566 }, { "epoch": 2.8765757842274993, "grad_norm": 0.3006492314693666, "learning_rate": 0.00013665814471341663, "loss": 3.067349910736084, "step": 4907, "token_acc": 0.2917036247057446 }, { "epoch": 2.8771621225447084, "grad_norm": 0.3354999411612963, "learning_rate": 0.0001366560731679677, "loss": 3.043978214263916, "step": 4908, "token_acc": 0.295146418388846 }, { "epoch": 2.8777484608619175, "grad_norm": 0.299872786539488, "learning_rate": 0.00013665400099637477, "loss": 3.0540919303894043, "step": 4909, "token_acc": 0.2917362374461086 }, { "epoch": 2.878334799179126, "grad_norm": 0.28126319251624243, "learning_rate": 0.00013665192819865732, "loss": 3.0354275703430176, "step": 4910, "token_acc": 0.2957064240733482 }, { "epoch": 2.8789211374963353, "grad_norm": 0.28991948010681506, "learning_rate": 0.00013664985477483482, "loss": 3.0851798057556152, "step": 4911, "token_acc": 0.28707460440248594 }, { "epoch": 2.8795074758135444, "grad_norm": 0.2905649602883529, "learning_rate": 0.00013664778072492673, "loss": 3.0583860874176025, "step": 4912, "token_acc": 0.29255924385310883 }, { "epoch": 2.8800938141307535, "grad_norm": 0.3587076888473605, "learning_rate": 0.00013664570604895258, "loss": 3.069845676422119, "step": 4913, "token_acc": 0.290209003838846 }, { "epoch": 2.8806801524479626, "grad_norm": 0.3228630111132636, "learning_rate": 0.00013664363074693183, "loss": 3.0557589530944824, "step": 4914, "token_acc": 0.290475705927245 }, { "epoch": 2.8812664907651717, "grad_norm": 0.27057614203002434, "learning_rate": 0.00013664155481888393, "loss": 3.023590087890625, "step": 4915, "token_acc": 0.29832397327863686 }, { "epoch": 2.8818528290823804, "grad_norm": 0.29470217416901895, "learning_rate": 0.00013663947826482846, "loss": 3.067108154296875, "step": 4916, "token_acc": 0.2915893573051738 }, { "epoch": 2.8824391673995895, "grad_norm": 0.32574490094099856, "learning_rate": 0.00013663740108478488, "loss": 3.0864930152893066, "step": 4917, "token_acc": 0.28842733034711554 }, { "epoch": 2.8830255057167986, "grad_norm": 0.2591103620837644, "learning_rate": 0.0001366353232787727, "loss": 3.0933420658111572, "step": 4918, "token_acc": 0.2889670838022435 }, { "epoch": 2.8836118440340077, "grad_norm": 0.290714480522424, "learning_rate": 0.00013663324484681146, "loss": 3.0583395957946777, "step": 4919, "token_acc": 0.293172069511594 }, { "epoch": 2.8841981823512164, "grad_norm": 0.2887105568243705, "learning_rate": 0.00013663116578892066, "loss": 3.0417308807373047, "step": 4920, "token_acc": 0.2936160786961546 }, { "epoch": 2.8847845206684255, "grad_norm": 0.27926865635544074, "learning_rate": 0.00013662908610511987, "loss": 3.0844526290893555, "step": 4921, "token_acc": 0.29137075043844174 }, { "epoch": 2.8853708589856346, "grad_norm": 0.27509805975777296, "learning_rate": 0.0001366270057954286, "loss": 3.027043342590332, "step": 4922, "token_acc": 0.29774420004676777 }, { "epoch": 2.8859571973028437, "grad_norm": 0.28623631160017565, "learning_rate": 0.00013662492485986638, "loss": 3.061530113220215, "step": 4923, "token_acc": 0.29166378618111427 }, { "epoch": 2.886543535620053, "grad_norm": 0.2524699999124869, "learning_rate": 0.00013662284329845275, "loss": 3.049622058868408, "step": 4924, "token_acc": 0.2933500525022683 }, { "epoch": 2.887129873937262, "grad_norm": 0.3062928589230968, "learning_rate": 0.00013662076111120732, "loss": 3.0496773719787598, "step": 4925, "token_acc": 0.29277194163689085 }, { "epoch": 2.887716212254471, "grad_norm": 0.3372451273507253, "learning_rate": 0.00013661867829814958, "loss": 3.069695472717285, "step": 4926, "token_acc": 0.29136025995678005 }, { "epoch": 2.8883025505716797, "grad_norm": 0.25958844368925366, "learning_rate": 0.00013661659485929913, "loss": 3.067404270172119, "step": 4927, "token_acc": 0.29189625155630494 }, { "epoch": 2.888888888888889, "grad_norm": 0.30297281398669845, "learning_rate": 0.00013661451079467556, "loss": 3.052337646484375, "step": 4928, "token_acc": 0.2939375636715236 }, { "epoch": 2.889475227206098, "grad_norm": 0.43104540607303826, "learning_rate": 0.00013661242610429842, "loss": 3.0546793937683105, "step": 4929, "token_acc": 0.2921234251580915 }, { "epoch": 2.890061565523307, "grad_norm": 0.3374516330558775, "learning_rate": 0.00013661034078818728, "loss": 3.0432088375091553, "step": 4930, "token_acc": 0.2947013234333822 }, { "epoch": 2.8906479038405157, "grad_norm": 0.3974203183202355, "learning_rate": 0.00013660825484636176, "loss": 3.078315258026123, "step": 4931, "token_acc": 0.2899718287293555 }, { "epoch": 2.891234242157725, "grad_norm": 0.3371193606717164, "learning_rate": 0.00013660616827884146, "loss": 3.076338768005371, "step": 4932, "token_acc": 0.28867767377845943 }, { "epoch": 2.891820580474934, "grad_norm": 0.2919640194703603, "learning_rate": 0.00013660408108564592, "loss": 3.037797212600708, "step": 4933, "token_acc": 0.2957846662642064 }, { "epoch": 2.892406918792143, "grad_norm": 0.3843348945411666, "learning_rate": 0.00013660199326679482, "loss": 3.093290328979492, "step": 4934, "token_acc": 0.28917575983668564 }, { "epoch": 2.892993257109352, "grad_norm": 0.2896491666684457, "learning_rate": 0.00013659990482230773, "loss": 3.0653223991394043, "step": 4935, "token_acc": 0.29117822052840625 }, { "epoch": 2.8935795954265613, "grad_norm": 0.3491344872250609, "learning_rate": 0.0001365978157522043, "loss": 3.0582046508789062, "step": 4936, "token_acc": 0.2936305850619878 }, { "epoch": 2.8941659337437704, "grad_norm": 0.3452016073242082, "learning_rate": 0.00013659572605650408, "loss": 3.097259998321533, "step": 4937, "token_acc": 0.28782538172403577 }, { "epoch": 2.894752272060979, "grad_norm": 0.30267139094015116, "learning_rate": 0.00013659363573522682, "loss": 3.0524775981903076, "step": 4938, "token_acc": 0.29223276083055627 }, { "epoch": 2.895338610378188, "grad_norm": 0.34179637618910697, "learning_rate": 0.00013659154478839203, "loss": 3.082977056503296, "step": 4939, "token_acc": 0.2890645446975004 }, { "epoch": 2.8959249486953973, "grad_norm": 0.33721198576623446, "learning_rate": 0.00013658945321601943, "loss": 3.032006025314331, "step": 4940, "token_acc": 0.2984387953184456 }, { "epoch": 2.8965112870126064, "grad_norm": 0.32688468461162323, "learning_rate": 0.00013658736101812867, "loss": 3.044804096221924, "step": 4941, "token_acc": 0.29463513276825665 }, { "epoch": 2.897097625329815, "grad_norm": 0.35081210327030254, "learning_rate": 0.00013658526819473936, "loss": 3.0757012367248535, "step": 4942, "token_acc": 0.2906472048738915 }, { "epoch": 2.897683963647024, "grad_norm": 0.3444259396545603, "learning_rate": 0.00013658317474587116, "loss": 3.0137124061584473, "step": 4943, "token_acc": 0.2986298941464974 }, { "epoch": 2.8982703019642333, "grad_norm": 0.29109573841778436, "learning_rate": 0.00013658108067154378, "loss": 3.073519706726074, "step": 4944, "token_acc": 0.2929093126344929 }, { "epoch": 2.8988566402814424, "grad_norm": 0.3204679143190589, "learning_rate": 0.00013657898597177687, "loss": 3.0761518478393555, "step": 4945, "token_acc": 0.29018853425407387 }, { "epoch": 2.8994429785986515, "grad_norm": 0.29703454911732785, "learning_rate": 0.0001365768906465901, "loss": 3.067534923553467, "step": 4946, "token_acc": 0.29181703217050675 }, { "epoch": 2.9000293169158606, "grad_norm": 0.30361983711839424, "learning_rate": 0.00013657479469600316, "loss": 3.060234546661377, "step": 4947, "token_acc": 0.29071343143468587 }, { "epoch": 2.9006156552330697, "grad_norm": 0.29778000566172885, "learning_rate": 0.00013657269812003572, "loss": 3.0399765968322754, "step": 4948, "token_acc": 0.2951117630970437 }, { "epoch": 2.9012019935502784, "grad_norm": 0.35153781734569994, "learning_rate": 0.0001365706009187075, "loss": 3.113093376159668, "step": 4949, "token_acc": 0.28482262007002707 }, { "epoch": 2.9017883318674875, "grad_norm": 0.36123649498069826, "learning_rate": 0.0001365685030920382, "loss": 3.114863634109497, "step": 4950, "token_acc": 0.2857342711053567 }, { "epoch": 2.9023746701846966, "grad_norm": 0.30646521125901743, "learning_rate": 0.00013656640464004748, "loss": 3.091856002807617, "step": 4951, "token_acc": 0.2882375903526077 }, { "epoch": 2.9029610085019057, "grad_norm": 0.28533678018414266, "learning_rate": 0.0001365643055627551, "loss": 3.056678295135498, "step": 4952, "token_acc": 0.29238449593012955 }, { "epoch": 2.9035473468191144, "grad_norm": 0.2689144862927939, "learning_rate": 0.0001365622058601808, "loss": 3.095412492752075, "step": 4953, "token_acc": 0.28751764448347267 }, { "epoch": 2.9041336851363235, "grad_norm": 0.3458216999362823, "learning_rate": 0.00013656010553234424, "loss": 3.029428005218506, "step": 4954, "token_acc": 0.29690173815128423 }, { "epoch": 2.9047200234535326, "grad_norm": 0.30200490007804476, "learning_rate": 0.0001365580045792652, "loss": 3.0824837684631348, "step": 4955, "token_acc": 0.28756752565328647 }, { "epoch": 2.9053063617707418, "grad_norm": 0.31702497963085324, "learning_rate": 0.00013655590300096335, "loss": 3.088308334350586, "step": 4956, "token_acc": 0.28978314365209634 }, { "epoch": 2.905892700087951, "grad_norm": 0.3233925598151169, "learning_rate": 0.00013655380079745851, "loss": 3.075321674346924, "step": 4957, "token_acc": 0.28840160050542274 }, { "epoch": 2.90647903840516, "grad_norm": 0.31153609092363393, "learning_rate": 0.0001365516979687704, "loss": 3.082887649536133, "step": 4958, "token_acc": 0.2884111481657932 }, { "epoch": 2.9070653767223686, "grad_norm": 0.2999460371432194, "learning_rate": 0.00013654959451491874, "loss": 3.0600123405456543, "step": 4959, "token_acc": 0.2920953090333645 }, { "epoch": 2.9076517150395778, "grad_norm": 0.28269850363610893, "learning_rate": 0.00013654749043592334, "loss": 3.0931074619293213, "step": 4960, "token_acc": 0.28744616164175324 }, { "epoch": 2.908238053356787, "grad_norm": 0.3097826456803799, "learning_rate": 0.00013654538573180393, "loss": 3.076824426651001, "step": 4961, "token_acc": 0.2896529039299678 }, { "epoch": 2.908824391673996, "grad_norm": 0.3006400490309168, "learning_rate": 0.00013654328040258032, "loss": 3.0987045764923096, "step": 4962, "token_acc": 0.28655580379944107 }, { "epoch": 2.909410729991205, "grad_norm": 0.35238033973078, "learning_rate": 0.0001365411744482722, "loss": 3.0468530654907227, "step": 4963, "token_acc": 0.29413446908699176 }, { "epoch": 2.9099970683084138, "grad_norm": 0.33259029517116534, "learning_rate": 0.00013653906786889947, "loss": 3.056656837463379, "step": 4964, "token_acc": 0.2929587859155906 }, { "epoch": 2.910583406625623, "grad_norm": 0.30081725641777596, "learning_rate": 0.0001365369606644818, "loss": 3.06235408782959, "step": 4965, "token_acc": 0.2926857627136161 }, { "epoch": 2.911169744942832, "grad_norm": 0.3031980699462104, "learning_rate": 0.0001365348528350391, "loss": 3.0619630813598633, "step": 4966, "token_acc": 0.29106779669878646 }, { "epoch": 2.911756083260041, "grad_norm": 0.27279503839745356, "learning_rate": 0.00013653274438059108, "loss": 3.0360772609710693, "step": 4967, "token_acc": 0.29285142145306337 }, { "epoch": 2.91234242157725, "grad_norm": 0.29152359546345913, "learning_rate": 0.0001365306353011576, "loss": 3.0718564987182617, "step": 4968, "token_acc": 0.289821742560575 }, { "epoch": 2.9129287598944593, "grad_norm": 0.26080717533141556, "learning_rate": 0.00013652852559675846, "loss": 3.084115505218506, "step": 4969, "token_acc": 0.28900555570323505 }, { "epoch": 2.913515098211668, "grad_norm": 0.2595279454232396, "learning_rate": 0.00013652641526741346, "loss": 3.028672933578491, "step": 4970, "token_acc": 0.2960706272456734 }, { "epoch": 2.914101436528877, "grad_norm": 0.3197042289023389, "learning_rate": 0.00013652430431314243, "loss": 3.0395450592041016, "step": 4971, "token_acc": 0.29597407296697764 }, { "epoch": 2.914687774846086, "grad_norm": 0.3550817028947399, "learning_rate": 0.0001365221927339652, "loss": 3.0052781105041504, "step": 4972, "token_acc": 0.30047953261726573 }, { "epoch": 2.9152741131632953, "grad_norm": 0.2979948974986824, "learning_rate": 0.00013652008052990162, "loss": 3.0458290576934814, "step": 4973, "token_acc": 0.294677615009131 }, { "epoch": 2.915860451480504, "grad_norm": 0.2959829023131944, "learning_rate": 0.00013651796770097153, "loss": 3.0879921913146973, "step": 4974, "token_acc": 0.2892638061152421 }, { "epoch": 2.916446789797713, "grad_norm": 0.33111155613564697, "learning_rate": 0.00013651585424719474, "loss": 3.0816097259521484, "step": 4975, "token_acc": 0.2889419149018947 }, { "epoch": 2.917033128114922, "grad_norm": 0.29407124018271347, "learning_rate": 0.00013651374016859113, "loss": 3.0749902725219727, "step": 4976, "token_acc": 0.2887900985097247 }, { "epoch": 2.9176194664321313, "grad_norm": 0.3175756157058336, "learning_rate": 0.00013651162546518057, "loss": 3.0616040229797363, "step": 4977, "token_acc": 0.2918483890674387 }, { "epoch": 2.9182058047493404, "grad_norm": 0.3031476481144282, "learning_rate": 0.0001365095101369829, "loss": 3.000711679458618, "step": 4978, "token_acc": 0.3012770632123404 }, { "epoch": 2.9187921430665495, "grad_norm": 0.27422554873944843, "learning_rate": 0.00013650739418401804, "loss": 3.043954610824585, "step": 4979, "token_acc": 0.29291409416817044 }, { "epoch": 2.9193784813837587, "grad_norm": 0.31289846560174794, "learning_rate": 0.00013650527760630582, "loss": 3.033825159072876, "step": 4980, "token_acc": 0.2956345008915349 }, { "epoch": 2.9199648197009673, "grad_norm": 0.29490525963999986, "learning_rate": 0.00013650316040386614, "loss": 3.0716962814331055, "step": 4981, "token_acc": 0.2919611036008357 }, { "epoch": 2.9205511580181764, "grad_norm": 0.30447082446066964, "learning_rate": 0.00013650104257671887, "loss": 3.053532838821411, "step": 4982, "token_acc": 0.29195695845443664 }, { "epoch": 2.9211374963353856, "grad_norm": 0.3487472529583517, "learning_rate": 0.0001364989241248839, "loss": 3.0250887870788574, "step": 4983, "token_acc": 0.296724289846936 }, { "epoch": 2.9217238346525947, "grad_norm": 0.32301375801029886, "learning_rate": 0.00013649680504838118, "loss": 3.0632290840148926, "step": 4984, "token_acc": 0.29146726771950976 }, { "epoch": 2.9223101729698033, "grad_norm": 0.30617853219810576, "learning_rate": 0.00013649468534723054, "loss": 3.06593656539917, "step": 4985, "token_acc": 0.29225025787095454 }, { "epoch": 2.9228965112870124, "grad_norm": 0.27267094658589364, "learning_rate": 0.00013649256502145198, "loss": 3.078068971633911, "step": 4986, "token_acc": 0.28980316817802093 }, { "epoch": 2.9234828496042216, "grad_norm": 0.31227488685094434, "learning_rate": 0.00013649044407106534, "loss": 3.0980372428894043, "step": 4987, "token_acc": 0.2860151905038599 }, { "epoch": 2.9240691879214307, "grad_norm": 0.32659256671658804, "learning_rate": 0.00013648832249609058, "loss": 3.0618433952331543, "step": 4988, "token_acc": 0.2912424005570035 }, { "epoch": 2.92465552623864, "grad_norm": 0.3047222930474552, "learning_rate": 0.00013648620029654764, "loss": 3.0806467533111572, "step": 4989, "token_acc": 0.2891033299474219 }, { "epoch": 2.925241864555849, "grad_norm": 0.2525980342334652, "learning_rate": 0.00013648407747245643, "loss": 3.07081937789917, "step": 4990, "token_acc": 0.28869326121479266 }, { "epoch": 2.925828202873058, "grad_norm": 0.27457147916556734, "learning_rate": 0.00013648195402383688, "loss": 3.0556640625, "step": 4991, "token_acc": 0.2948093588434121 }, { "epoch": 2.9264145411902667, "grad_norm": 0.27625396222602666, "learning_rate": 0.000136479829950709, "loss": 3.088117837905884, "step": 4992, "token_acc": 0.28773491620033925 }, { "epoch": 2.927000879507476, "grad_norm": 0.2676467427143431, "learning_rate": 0.00013647770525309266, "loss": 3.0141892433166504, "step": 4993, "token_acc": 0.2977599080987938 }, { "epoch": 2.927587217824685, "grad_norm": 0.30903482405417276, "learning_rate": 0.00013647557993100786, "loss": 3.0457448959350586, "step": 4994, "token_acc": 0.2927699411056864 }, { "epoch": 2.928173556141894, "grad_norm": 0.3985688906389621, "learning_rate": 0.0001364734539844746, "loss": 3.074188709259033, "step": 4995, "token_acc": 0.2897756101993987 }, { "epoch": 2.9287598944591027, "grad_norm": 0.42465521830582476, "learning_rate": 0.00013647132741351277, "loss": 3.061387777328491, "step": 4996, "token_acc": 0.2928401556920112 }, { "epoch": 2.929346232776312, "grad_norm": 0.32635835412107805, "learning_rate": 0.00013646920021814242, "loss": 3.0074830055236816, "step": 4997, "token_acc": 0.29969890187743536 }, { "epoch": 2.929932571093521, "grad_norm": 0.3503013173099579, "learning_rate": 0.0001364670723983835, "loss": 3.0533993244171143, "step": 4998, "token_acc": 0.2937325353436734 }, { "epoch": 2.93051890941073, "grad_norm": 0.36676561424188475, "learning_rate": 0.00013646494395425597, "loss": 3.068729877471924, "step": 4999, "token_acc": 0.2899127128476501 }, { "epoch": 2.931105247727939, "grad_norm": 0.29829901745341364, "learning_rate": 0.00013646281488577993, "loss": 3.048556327819824, "step": 5000, "token_acc": 0.2922724285919629 }, { "epoch": 2.9316915860451482, "grad_norm": 0.4084962256189079, "learning_rate": 0.00013646068519297523, "loss": 3.0652198791503906, "step": 5001, "token_acc": 0.29210373984971355 }, { "epoch": 2.9322779243623573, "grad_norm": 0.2683981941247538, "learning_rate": 0.00013645855487586197, "loss": 3.0624914169311523, "step": 5002, "token_acc": 0.2918852875216858 }, { "epoch": 2.932864262679566, "grad_norm": 0.35479440872004936, "learning_rate": 0.00013645642393446015, "loss": 3.0860254764556885, "step": 5003, "token_acc": 0.2869742572074307 }, { "epoch": 2.933450600996775, "grad_norm": 0.2812546149525364, "learning_rate": 0.00013645429236878976, "loss": 3.062725782394409, "step": 5004, "token_acc": 0.2911306561499056 }, { "epoch": 2.9340369393139842, "grad_norm": 0.32737679595888164, "learning_rate": 0.00013645216017887086, "loss": 3.062804698944092, "step": 5005, "token_acc": 0.29160920814205554 }, { "epoch": 2.9346232776311933, "grad_norm": 0.2765999510941062, "learning_rate": 0.00013645002736472348, "loss": 3.048184394836426, "step": 5006, "token_acc": 0.2951470122873116 }, { "epoch": 2.935209615948402, "grad_norm": 0.3236615863564798, "learning_rate": 0.0001364478939263676, "loss": 3.106018543243408, "step": 5007, "token_acc": 0.2862875321613709 }, { "epoch": 2.935795954265611, "grad_norm": 0.28419752540242177, "learning_rate": 0.0001364457598638233, "loss": 3.0738160610198975, "step": 5008, "token_acc": 0.2903925437104565 }, { "epoch": 2.9363822925828202, "grad_norm": 0.27126176401977553, "learning_rate": 0.00013644362517711064, "loss": 3.1003026962280273, "step": 5009, "token_acc": 0.2854216246390863 }, { "epoch": 2.9369686309000294, "grad_norm": 0.286879285616511, "learning_rate": 0.00013644148986624965, "loss": 3.063784599304199, "step": 5010, "token_acc": 0.2907265043481954 }, { "epoch": 2.9375549692172385, "grad_norm": 0.2753165379548571, "learning_rate": 0.00013643935393126036, "loss": 3.0766971111297607, "step": 5011, "token_acc": 0.2903539789969973 }, { "epoch": 2.9381413075344476, "grad_norm": 0.28601517825402545, "learning_rate": 0.0001364372173721629, "loss": 3.0568909645080566, "step": 5012, "token_acc": 0.2920344557068563 }, { "epoch": 2.9387276458516562, "grad_norm": 0.2716559850233657, "learning_rate": 0.0001364350801889773, "loss": 3.065601348876953, "step": 5013, "token_acc": 0.291685332388761 }, { "epoch": 2.9393139841688654, "grad_norm": 0.27357823747528187, "learning_rate": 0.00013643294238172365, "loss": 3.070812702178955, "step": 5014, "token_acc": 0.2908424085733901 }, { "epoch": 2.9399003224860745, "grad_norm": 0.2818159504292812, "learning_rate": 0.00013643080395042204, "loss": 3.071913242340088, "step": 5015, "token_acc": 0.2898937223579114 }, { "epoch": 2.9404866608032836, "grad_norm": 0.26146195808747574, "learning_rate": 0.0001364286648950925, "loss": 3.098595142364502, "step": 5016, "token_acc": 0.28553850386720064 }, { "epoch": 2.9410729991204922, "grad_norm": 0.2946662753083827, "learning_rate": 0.0001364265252157552, "loss": 3.0257067680358887, "step": 5017, "token_acc": 0.29813498705276065 }, { "epoch": 2.9416593374377014, "grad_norm": 0.27885021576765145, "learning_rate": 0.0001364243849124302, "loss": 3.04622483253479, "step": 5018, "token_acc": 0.29280674282245567 }, { "epoch": 2.9422456757549105, "grad_norm": 0.29761512837386006, "learning_rate": 0.00013642224398513762, "loss": 3.053049087524414, "step": 5019, "token_acc": 0.2926123010177535 }, { "epoch": 2.9428320140721196, "grad_norm": 0.3021773274975337, "learning_rate": 0.00013642010243389754, "loss": 3.0786664485931396, "step": 5020, "token_acc": 0.28989505891194073 }, { "epoch": 2.9434183523893287, "grad_norm": 0.2705874057321925, "learning_rate": 0.00013641796025873012, "loss": 3.0685856342315674, "step": 5021, "token_acc": 0.29262287688168903 }, { "epoch": 2.944004690706538, "grad_norm": 0.3070103263924474, "learning_rate": 0.00013641581745965547, "loss": 3.0478734970092773, "step": 5022, "token_acc": 0.29365975446854525 }, { "epoch": 2.944591029023747, "grad_norm": 0.37875277964159665, "learning_rate": 0.0001364136740366937, "loss": 3.0837907791137695, "step": 5023, "token_acc": 0.2899619508462835 }, { "epoch": 2.9451773673409556, "grad_norm": 0.36665526894275835, "learning_rate": 0.00013641152998986498, "loss": 3.0615291595458984, "step": 5024, "token_acc": 0.2937001998667555 }, { "epoch": 2.9457637056581647, "grad_norm": 0.3017481957160068, "learning_rate": 0.00013640938531918938, "loss": 3.064027786254883, "step": 5025, "token_acc": 0.2900226635972661 }, { "epoch": 2.946350043975374, "grad_norm": 0.30776079721288263, "learning_rate": 0.00013640724002468712, "loss": 3.084296226501465, "step": 5026, "token_acc": 0.28989610589298825 }, { "epoch": 2.946936382292583, "grad_norm": 0.3426569558191926, "learning_rate": 0.00013640509410637832, "loss": 3.14373779296875, "step": 5027, "token_acc": 0.2802171615657183 }, { "epoch": 2.9475227206097916, "grad_norm": 0.3497024310859073, "learning_rate": 0.00013640294756428315, "loss": 3.0802712440490723, "step": 5028, "token_acc": 0.28970475603497853 }, { "epoch": 2.9481090589270007, "grad_norm": 0.2544327623191903, "learning_rate": 0.00013640080039842173, "loss": 3.036038875579834, "step": 5029, "token_acc": 0.2946394998145829 }, { "epoch": 2.94869539724421, "grad_norm": 0.2908310395349893, "learning_rate": 0.00013639865260881432, "loss": 3.0567779541015625, "step": 5030, "token_acc": 0.2916891470161964 }, { "epoch": 2.949281735561419, "grad_norm": 0.2815486021729765, "learning_rate": 0.00013639650419548102, "loss": 3.0425474643707275, "step": 5031, "token_acc": 0.2939626458094532 }, { "epoch": 2.949868073878628, "grad_norm": 0.2721678776391117, "learning_rate": 0.000136394355158442, "loss": 3.0598974227905273, "step": 5032, "token_acc": 0.2910339072242214 }, { "epoch": 2.950454412195837, "grad_norm": 0.3416587668451502, "learning_rate": 0.00013639220549771752, "loss": 3.1210145950317383, "step": 5033, "token_acc": 0.2849699528506206 }, { "epoch": 2.9510407505130463, "grad_norm": 0.2936797079149699, "learning_rate": 0.00013639005521332774, "loss": 3.038461446762085, "step": 5034, "token_acc": 0.2947490679211061 }, { "epoch": 2.951627088830255, "grad_norm": 0.30741288119880117, "learning_rate": 0.00013638790430529283, "loss": 3.07912278175354, "step": 5035, "token_acc": 0.29012272992458504 }, { "epoch": 2.952213427147464, "grad_norm": 0.3109164145975344, "learning_rate": 0.00013638575277363302, "loss": 3.063770294189453, "step": 5036, "token_acc": 0.2908101915167029 }, { "epoch": 2.952799765464673, "grad_norm": 0.28501500180686534, "learning_rate": 0.00013638360061836853, "loss": 3.073878526687622, "step": 5037, "token_acc": 0.28878080879641055 }, { "epoch": 2.9533861037818823, "grad_norm": 0.2901779524179545, "learning_rate": 0.00013638144783951957, "loss": 3.036140203475952, "step": 5038, "token_acc": 0.29560738035028566 }, { "epoch": 2.953972442099091, "grad_norm": 0.3060192026031789, "learning_rate": 0.00013637929443710635, "loss": 3.062993049621582, "step": 5039, "token_acc": 0.2928577222264377 }, { "epoch": 2.9545587804163, "grad_norm": 0.357701489648701, "learning_rate": 0.0001363771404111491, "loss": 3.075578212738037, "step": 5040, "token_acc": 0.29103592188044775 }, { "epoch": 2.955145118733509, "grad_norm": 0.3167473843322589, "learning_rate": 0.00013637498576166805, "loss": 3.1182470321655273, "step": 5041, "token_acc": 0.2842987764663673 }, { "epoch": 2.9557314570507183, "grad_norm": 0.3354989515501741, "learning_rate": 0.00013637283048868347, "loss": 3.088789939880371, "step": 5042, "token_acc": 0.28890104894057816 }, { "epoch": 2.9563177953679274, "grad_norm": 0.3013429000464083, "learning_rate": 0.00013637067459221558, "loss": 3.06396222114563, "step": 5043, "token_acc": 0.2912091597696642 }, { "epoch": 2.9569041336851365, "grad_norm": 0.2816527102572718, "learning_rate": 0.00013636851807228466, "loss": 3.0484938621520996, "step": 5044, "token_acc": 0.2933572271378747 }, { "epoch": 2.9574904720023456, "grad_norm": 0.2814280883159842, "learning_rate": 0.0001363663609289109, "loss": 3.0736541748046875, "step": 5045, "token_acc": 0.2888682942329079 }, { "epoch": 2.9580768103195543, "grad_norm": 0.32474160527248996, "learning_rate": 0.00013636420316211464, "loss": 3.0716938972473145, "step": 5046, "token_acc": 0.29186426776306795 }, { "epoch": 2.9586631486367634, "grad_norm": 0.31426110042074556, "learning_rate": 0.0001363620447719161, "loss": 3.050072193145752, "step": 5047, "token_acc": 0.29182473719266744 }, { "epoch": 2.9592494869539725, "grad_norm": 0.3249605605291814, "learning_rate": 0.0001363598857583356, "loss": 3.100602149963379, "step": 5048, "token_acc": 0.28758258237972356 }, { "epoch": 2.9598358252711816, "grad_norm": 0.366975913555725, "learning_rate": 0.00013635772612139338, "loss": 3.0870258808135986, "step": 5049, "token_acc": 0.2885171062527213 }, { "epoch": 2.9604221635883903, "grad_norm": 0.41727694137482224, "learning_rate": 0.00013635556586110974, "loss": 3.0418734550476074, "step": 5050, "token_acc": 0.29484417904081467 }, { "epoch": 2.9610085019055994, "grad_norm": 0.4144930196425431, "learning_rate": 0.00013635340497750495, "loss": 3.069295644760132, "step": 5051, "token_acc": 0.2916479224190181 }, { "epoch": 2.9615948402228085, "grad_norm": 0.3271879148712174, "learning_rate": 0.0001363512434705994, "loss": 3.080421209335327, "step": 5052, "token_acc": 0.2898569297846176 }, { "epoch": 2.9621811785400176, "grad_norm": 0.29324398731142676, "learning_rate": 0.00013634908134041326, "loss": 3.0160815715789795, "step": 5053, "token_acc": 0.2982172231123813 }, { "epoch": 2.9627675168572267, "grad_norm": 0.3258940986720766, "learning_rate": 0.00013634691858696693, "loss": 3.056901454925537, "step": 5054, "token_acc": 0.29254112998895687 }, { "epoch": 2.963353855174436, "grad_norm": 0.2907213764140246, "learning_rate": 0.0001363447552102807, "loss": 3.0317893028259277, "step": 5055, "token_acc": 0.2964130852770742 }, { "epoch": 2.963940193491645, "grad_norm": 0.3062349911409993, "learning_rate": 0.0001363425912103749, "loss": 3.0666451454162598, "step": 5056, "token_acc": 0.2907197114429381 }, { "epoch": 2.9645265318088536, "grad_norm": 0.32636203550387194, "learning_rate": 0.00013634042658726983, "loss": 3.042525053024292, "step": 5057, "token_acc": 0.29569820860985097 }, { "epoch": 2.9651128701260627, "grad_norm": 0.24322285236328897, "learning_rate": 0.0001363382613409859, "loss": 3.020813226699829, "step": 5058, "token_acc": 0.2984867833293795 }, { "epoch": 2.965699208443272, "grad_norm": 0.36182778387954334, "learning_rate": 0.00013633609547154335, "loss": 3.032301425933838, "step": 5059, "token_acc": 0.29459446299566133 }, { "epoch": 2.966285546760481, "grad_norm": 0.2777151310650473, "learning_rate": 0.00013633392897896261, "loss": 3.117042064666748, "step": 5060, "token_acc": 0.2839379492522548 }, { "epoch": 2.9668718850776896, "grad_norm": 0.34344179944575226, "learning_rate": 0.00013633176186326394, "loss": 3.066194534301758, "step": 5061, "token_acc": 0.29041129910067953 }, { "epoch": 2.9674582233948987, "grad_norm": 0.32159209078980083, "learning_rate": 0.0001363295941244678, "loss": 3.0764331817626953, "step": 5062, "token_acc": 0.2882871605498486 }, { "epoch": 2.968044561712108, "grad_norm": 0.3116400205960304, "learning_rate": 0.0001363274257625945, "loss": 3.028153896331787, "step": 5063, "token_acc": 0.29643538311459333 }, { "epoch": 2.968630900029317, "grad_norm": 0.34566690995484034, "learning_rate": 0.0001363252567776644, "loss": 3.0133235454559326, "step": 5064, "token_acc": 0.2977909531796492 }, { "epoch": 2.969217238346526, "grad_norm": 0.32993049613609116, "learning_rate": 0.00013632308716969785, "loss": 3.0487701892852783, "step": 5065, "token_acc": 0.29457125885697316 }, { "epoch": 2.969803576663735, "grad_norm": 0.3331137804136902, "learning_rate": 0.00013632091693871533, "loss": 3.0613999366760254, "step": 5066, "token_acc": 0.2929213423389885 }, { "epoch": 2.970389914980944, "grad_norm": 0.32530746947130224, "learning_rate": 0.00013631874608473711, "loss": 3.0511653423309326, "step": 5067, "token_acc": 0.2927549490483088 }, { "epoch": 2.970976253298153, "grad_norm": 0.32281330716448636, "learning_rate": 0.00013631657460778368, "loss": 3.002755641937256, "step": 5068, "token_acc": 0.2995957872581301 }, { "epoch": 2.971562591615362, "grad_norm": 0.3094091551811668, "learning_rate": 0.00013631440250787537, "loss": 3.0356221199035645, "step": 5069, "token_acc": 0.2964823986294157 }, { "epoch": 2.972148929932571, "grad_norm": 0.3695372309072402, "learning_rate": 0.00013631222978503261, "loss": 3.095733880996704, "step": 5070, "token_acc": 0.28617151665337537 }, { "epoch": 2.97273526824978, "grad_norm": 0.3602856961512266, "learning_rate": 0.00013631005643927583, "loss": 3.0674257278442383, "step": 5071, "token_acc": 0.290475487102362 }, { "epoch": 2.973321606566989, "grad_norm": 0.2851985176519805, "learning_rate": 0.0001363078824706254, "loss": 3.0630340576171875, "step": 5072, "token_acc": 0.29165830335407433 }, { "epoch": 2.973907944884198, "grad_norm": 0.3852149031223918, "learning_rate": 0.00013630570787910177, "loss": 3.0783982276916504, "step": 5073, "token_acc": 0.28978509373571104 }, { "epoch": 2.974494283201407, "grad_norm": 0.350012734993071, "learning_rate": 0.00013630353266472537, "loss": 3.0373401641845703, "step": 5074, "token_acc": 0.29495274345058065 }, { "epoch": 2.9750806215186163, "grad_norm": 0.3762222832975856, "learning_rate": 0.0001363013568275166, "loss": 3.0902204513549805, "step": 5075, "token_acc": 0.2880220289568883 }, { "epoch": 2.9756669598358254, "grad_norm": 0.3618663933291691, "learning_rate": 0.00013629918036749597, "loss": 3.0512664318084717, "step": 5076, "token_acc": 0.29483844468784226 }, { "epoch": 2.9762532981530345, "grad_norm": 0.3208166237796758, "learning_rate": 0.00013629700328468384, "loss": 3.0551252365112305, "step": 5077, "token_acc": 0.2912604052022136 }, { "epoch": 2.976839636470243, "grad_norm": 0.43641435860404487, "learning_rate": 0.0001362948255791007, "loss": 3.026806116104126, "step": 5078, "token_acc": 0.29833368797959336 }, { "epoch": 2.9774259747874523, "grad_norm": 0.3033146370799069, "learning_rate": 0.00013629264725076705, "loss": 3.018017053604126, "step": 5079, "token_acc": 0.2990045077902382 }, { "epoch": 2.9780123131046614, "grad_norm": 0.3334356384644916, "learning_rate": 0.00013629046829970328, "loss": 3.057142734527588, "step": 5080, "token_acc": 0.2933613716570025 }, { "epoch": 2.9785986514218705, "grad_norm": 0.29101575068654045, "learning_rate": 0.0001362882887259299, "loss": 3.073469638824463, "step": 5081, "token_acc": 0.28909883489688015 }, { "epoch": 2.979184989739079, "grad_norm": 0.3522886872369336, "learning_rate": 0.00013628610852946734, "loss": 3.0624747276306152, "step": 5082, "token_acc": 0.2904996813052025 }, { "epoch": 2.9797713280562883, "grad_norm": 0.2732145934840157, "learning_rate": 0.00013628392771033616, "loss": 3.112614393234253, "step": 5083, "token_acc": 0.2859487957398735 }, { "epoch": 2.9803576663734974, "grad_norm": 0.3415988055820798, "learning_rate": 0.00013628174626855675, "loss": 3.0622897148132324, "step": 5084, "token_acc": 0.2929668926354114 }, { "epoch": 2.9809440046907065, "grad_norm": 0.2716813689985832, "learning_rate": 0.00013627956420414968, "loss": 3.061203956604004, "step": 5085, "token_acc": 0.2932223227110709 }, { "epoch": 2.9815303430079156, "grad_norm": 0.3284529104383866, "learning_rate": 0.0001362773815171354, "loss": 3.0636706352233887, "step": 5086, "token_acc": 0.2907071972251662 }, { "epoch": 2.9821166813251248, "grad_norm": 0.29128177793991056, "learning_rate": 0.00013627519820753444, "loss": 3.0941479206085205, "step": 5087, "token_acc": 0.2850295259881896 }, { "epoch": 2.982703019642334, "grad_norm": 0.3044754642911704, "learning_rate": 0.0001362730142753673, "loss": 3.104616641998291, "step": 5088, "token_acc": 0.2848116422584508 }, { "epoch": 2.9832893579595425, "grad_norm": 0.28446607656173484, "learning_rate": 0.00013627082972065448, "loss": 3.0720901489257812, "step": 5089, "token_acc": 0.29115595287308804 }, { "epoch": 2.9838756962767516, "grad_norm": 0.27921581035105186, "learning_rate": 0.00013626864454341654, "loss": 3.0732390880584717, "step": 5090, "token_acc": 0.29101294423210927 }, { "epoch": 2.9844620345939608, "grad_norm": 0.3108962166570425, "learning_rate": 0.000136266458743674, "loss": 3.0294482707977295, "step": 5091, "token_acc": 0.2964264430881779 }, { "epoch": 2.98504837291117, "grad_norm": 0.288786195872828, "learning_rate": 0.00013626427232144733, "loss": 3.0725369453430176, "step": 5092, "token_acc": 0.2897948169805191 }, { "epoch": 2.9856347112283785, "grad_norm": 0.2899785032171189, "learning_rate": 0.00013626208527675712, "loss": 3.045457124710083, "step": 5093, "token_acc": 0.293720648570496 }, { "epoch": 2.9862210495455876, "grad_norm": 0.2838222308098357, "learning_rate": 0.00013625989760962393, "loss": 3.057738780975342, "step": 5094, "token_acc": 0.2919088696727345 }, { "epoch": 2.9868073878627968, "grad_norm": 0.26958868885718185, "learning_rate": 0.00013625770932006826, "loss": 3.005828380584717, "step": 5095, "token_acc": 0.2989685741201478 }, { "epoch": 2.987393726180006, "grad_norm": 0.29840575465118574, "learning_rate": 0.0001362555204081107, "loss": 3.038477659225464, "step": 5096, "token_acc": 0.29393617439892966 }, { "epoch": 2.987980064497215, "grad_norm": 0.30503549006573216, "learning_rate": 0.00013625333087377185, "loss": 3.0915145874023438, "step": 5097, "token_acc": 0.28663287457910286 }, { "epoch": 2.988566402814424, "grad_norm": 0.2655160888565439, "learning_rate": 0.00013625114071707218, "loss": 3.0453274250030518, "step": 5098, "token_acc": 0.2936305616858615 }, { "epoch": 2.989152741131633, "grad_norm": 0.3019952983141854, "learning_rate": 0.00013624894993803234, "loss": 3.06643009185791, "step": 5099, "token_acc": 0.29185535210343244 }, { "epoch": 2.989739079448842, "grad_norm": 0.2782010071089016, "learning_rate": 0.00013624675853667292, "loss": 3.02945876121521, "step": 5100, "token_acc": 0.2955740625469531 }, { "epoch": 2.990325417766051, "grad_norm": 0.29022997425637564, "learning_rate": 0.00013624456651301442, "loss": 3.0640146732330322, "step": 5101, "token_acc": 0.2920845729410355 }, { "epoch": 2.99091175608326, "grad_norm": 0.352831237249997, "learning_rate": 0.0001362423738670775, "loss": 3.07149600982666, "step": 5102, "token_acc": 0.2899489746035689 }, { "epoch": 2.991498094400469, "grad_norm": 0.2916538850319864, "learning_rate": 0.00013624018059888276, "loss": 3.060878038406372, "step": 5103, "token_acc": 0.2937006948075831 }, { "epoch": 2.992084432717678, "grad_norm": 0.29713505292256487, "learning_rate": 0.00013623798670845075, "loss": 3.0834810733795166, "step": 5104, "token_acc": 0.28720010307950006 }, { "epoch": 2.992670771034887, "grad_norm": 0.2957345732471271, "learning_rate": 0.00013623579219580213, "loss": 3.081881046295166, "step": 5105, "token_acc": 0.2890877467836801 }, { "epoch": 2.993257109352096, "grad_norm": 0.30576726584363506, "learning_rate": 0.00013623359706095749, "loss": 3.0930802822113037, "step": 5106, "token_acc": 0.2872140427958852 }, { "epoch": 2.993843447669305, "grad_norm": 0.3338994892191529, "learning_rate": 0.00013623140130393746, "loss": 3.0544955730438232, "step": 5107, "token_acc": 0.29395764882517916 }, { "epoch": 2.9944297859865143, "grad_norm": 0.29969365244058976, "learning_rate": 0.00013622920492476265, "loss": 3.064077377319336, "step": 5108, "token_acc": 0.29168774554652604 }, { "epoch": 2.9950161243037234, "grad_norm": 0.4039136757450711, "learning_rate": 0.00013622700792345372, "loss": 3.0626978874206543, "step": 5109, "token_acc": 0.2921200832216812 }, { "epoch": 2.9956024626209325, "grad_norm": 0.3041044296242887, "learning_rate": 0.00013622481030003129, "loss": 3.0699901580810547, "step": 5110, "token_acc": 0.29167546118451604 }, { "epoch": 2.996188800938141, "grad_norm": 0.3376620845892908, "learning_rate": 0.000136222612054516, "loss": 3.0865955352783203, "step": 5111, "token_acc": 0.2881660006770968 }, { "epoch": 2.9967751392553503, "grad_norm": 0.3480892511274235, "learning_rate": 0.00013622041318692854, "loss": 3.0646862983703613, "step": 5112, "token_acc": 0.29179754035597644 }, { "epoch": 2.9973614775725594, "grad_norm": 0.35544446660291185, "learning_rate": 0.00013621821369728948, "loss": 3.059173822402954, "step": 5113, "token_acc": 0.29277986310443804 }, { "epoch": 2.9979478158897686, "grad_norm": 0.33960109007722494, "learning_rate": 0.00013621601358561954, "loss": 3.029160261154175, "step": 5114, "token_acc": 0.2970972777578973 }, { "epoch": 2.998534154206977, "grad_norm": 0.3080845228092365, "learning_rate": 0.00013621381285193942, "loss": 3.107654094696045, "step": 5115, "token_acc": 0.28583785734375583 }, { "epoch": 2.9991204925241863, "grad_norm": 0.3503238901929032, "learning_rate": 0.00013621161149626973, "loss": 2.9973530769348145, "step": 5116, "token_acc": 0.30067839634713667 }, { "epoch": 2.9997068308413954, "grad_norm": 0.3222596217633866, "learning_rate": 0.00013620940951863115, "loss": 3.0677568912506104, "step": 5117, "token_acc": 0.28862210858535414 }, { "epoch": 3.0, "grad_norm": 0.3686385325390872, "learning_rate": 0.0001362072069190444, "loss": 3.057602882385254, "step": 5118, "token_acc": 0.2944489963129865 }, { "epoch": 3.0, "eval_loss": 3.0860543251037598, "eval_runtime": 22.0388, "eval_samples_per_second": 11.616, "eval_steps_per_second": 1.452, "eval_token_acc": 0.2885165749617454, "step": 5118 } ], "logging_steps": 1, "max_steps": 34120, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": -34120, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5760517546672128.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }