{ "best_global_step": 6824, "best_metric": 3.07427096, "best_model_checkpoint": "/inspire/hdd/project/deepanalysis/guitao-25013/Muse/workspace/Finals/ckpt/Muse_4b_main_2e-4/v0-20251228-195009/checkpoint-6824", "epoch": 6.0, "eval_steps": 500, "global_step": 10236, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005863383172090296, "grad_norm": 584.2804376471709, "learning_rate": 1.1723329425556858e-07, "loss": 24.62055206298828, "step": 1, "token_acc": 0.007686354255189996 }, { "epoch": 0.0011726766344180592, "grad_norm": 585.5252394109084, "learning_rate": 2.3446658851113715e-07, "loss": 24.677995681762695, "step": 2, "token_acc": 0.008093503709854125 }, { "epoch": 0.001759014951627089, "grad_norm": 585.7065303934635, "learning_rate": 3.516998827667058e-07, "loss": 24.603187561035156, "step": 3, "token_acc": 0.008458956494478706 }, { "epoch": 0.0023453532688361184, "grad_norm": 574.3386500798841, "learning_rate": 4.689331770222743e-07, "loss": 24.61533546447754, "step": 4, "token_acc": 0.008429689439370778 }, { "epoch": 0.002931691586045148, "grad_norm": 586.9828711617042, "learning_rate": 5.861664712778429e-07, "loss": 24.568662643432617, "step": 5, "token_acc": 0.00815803723341387 }, { "epoch": 0.003518029903254178, "grad_norm": 581.1894751304563, "learning_rate": 7.033997655334116e-07, "loss": 24.425533294677734, "step": 6, "token_acc": 0.008231019863606543 }, { "epoch": 0.004104368220463207, "grad_norm": 576.582735702365, "learning_rate": 8.206330597889802e-07, "loss": 24.278076171875, "step": 7, "token_acc": 0.008396241956107345 }, { "epoch": 0.004690706537672237, "grad_norm": 559.7170641683947, "learning_rate": 9.378663540445486e-07, "loss": 23.611698150634766, "step": 8, "token_acc": 0.008459112531510722 }, { "epoch": 0.005277044854881266, "grad_norm": 553.2699738013441, "learning_rate": 1.0550996483001172e-06, "loss": 23.430171966552734, "step": 9, "token_acc": 0.008667536718691499 }, { "epoch": 0.005863383172090296, "grad_norm": 470.6857657644861, "learning_rate": 1.1723329425556858e-06, "loss": 21.311586380004883, "step": 10, "token_acc": 0.008872265692218927 }, { "epoch": 0.006449721489299325, "grad_norm": 465.08434745509754, "learning_rate": 1.2895662368112545e-06, "loss": 21.151081085205078, "step": 11, "token_acc": 0.009220426307274241 }, { "epoch": 0.007036059806508356, "grad_norm": 435.77600914753805, "learning_rate": 1.4067995310668232e-06, "loss": 20.59938621520996, "step": 12, "token_acc": 0.009601930789116425 }, { "epoch": 0.007622398123717385, "grad_norm": 247.10891420254228, "learning_rate": 1.5240328253223916e-06, "loss": 18.007747650146484, "step": 13, "token_acc": 0.007578861663965909 }, { "epoch": 0.008208736440926415, "grad_norm": 237.9853724112765, "learning_rate": 1.6412661195779603e-06, "loss": 17.838977813720703, "step": 14, "token_acc": 0.007391450069974441 }, { "epoch": 0.008795074758135445, "grad_norm": 226.18401166349722, "learning_rate": 1.7584994138335288e-06, "loss": 17.58853530883789, "step": 15, "token_acc": 0.00753833547933981 }, { "epoch": 0.009381413075344474, "grad_norm": 212.04199238263578, "learning_rate": 1.8757327080890972e-06, "loss": 17.134864807128906, "step": 16, "token_acc": 0.00739965274231437 }, { "epoch": 0.009967751392553504, "grad_norm": 203.06989705564112, "learning_rate": 1.992966002344666e-06, "loss": 16.87917709350586, "step": 17, "token_acc": 0.007535626739789771 }, { "epoch": 0.010554089709762533, "grad_norm": 215.18456945822558, "learning_rate": 2.1101992966002344e-06, "loss": 15.041175842285156, "step": 18, "token_acc": 0.007634953613450764 }, { "epoch": 0.011140428026971563, "grad_norm": 227.02143780993362, "learning_rate": 2.2274325908558035e-06, "loss": 14.484148025512695, "step": 19, "token_acc": 0.006797998652741568 }, { "epoch": 0.011726766344180592, "grad_norm": 183.1105892045818, "learning_rate": 2.3446658851113717e-06, "loss": 14.200407028198242, "step": 20, "token_acc": 0.0065096879784594264 }, { "epoch": 0.012313104661389622, "grad_norm": 129.9586725795648, "learning_rate": 2.4618991793669404e-06, "loss": 13.929039001464844, "step": 21, "token_acc": 0.006199476003017114 }, { "epoch": 0.01289944297859865, "grad_norm": 93.69074694202115, "learning_rate": 2.579132473622509e-06, "loss": 13.488332748413086, "step": 22, "token_acc": 0.005730457203949437 }, { "epoch": 0.013485781295807681, "grad_norm": 74.83117664490601, "learning_rate": 2.6963657678780773e-06, "loss": 13.16888427734375, "step": 23, "token_acc": 0.005586667167793307 }, { "epoch": 0.014072119613016711, "grad_norm": 59.01013500162835, "learning_rate": 2.8135990621336464e-06, "loss": 12.899251937866211, "step": 24, "token_acc": 0.005102690654354402 }, { "epoch": 0.01465845793022574, "grad_norm": 27.82824358924883, "learning_rate": 2.9308323563892146e-06, "loss": 12.312295913696289, "step": 25, "token_acc": 0.003928652065929104 }, { "epoch": 0.01524479624743477, "grad_norm": 14.386423261749979, "learning_rate": 3.0480656506447833e-06, "loss": 12.14841079711914, "step": 26, "token_acc": 0.0035273685317681225 }, { "epoch": 0.0158311345646438, "grad_norm": 7.000013089518929, "learning_rate": 3.1652989449003515e-06, "loss": 12.062915802001953, "step": 27, "token_acc": 0.006908471711882418 }, { "epoch": 0.01641747288185283, "grad_norm": 4.7552241501145, "learning_rate": 3.2825322391559206e-06, "loss": 12.0260009765625, "step": 28, "token_acc": 0.008021770475822993 }, { "epoch": 0.017003811199061858, "grad_norm": 2.9608083367079754, "learning_rate": 3.3997655334114893e-06, "loss": 11.992738723754883, "step": 29, "token_acc": 0.00914451688065049 }, { "epoch": 0.01759014951627089, "grad_norm": 2.141838994655946, "learning_rate": 3.5169988276670575e-06, "loss": 11.975879669189453, "step": 30, "token_acc": 0.009006903876792352 }, { "epoch": 0.01817648783347992, "grad_norm": 1.2898203877804213, "learning_rate": 3.6342321219226262e-06, "loss": 11.957919120788574, "step": 31, "token_acc": 0.009075683445647444 }, { "epoch": 0.018762826150688947, "grad_norm": 0.9495857459426789, "learning_rate": 3.7514654161781945e-06, "loss": 11.939529418945312, "step": 32, "token_acc": 0.009957634316987306 }, { "epoch": 0.019349164467897976, "grad_norm": 0.6584091413204045, "learning_rate": 3.8686987104337636e-06, "loss": 11.937095642089844, "step": 33, "token_acc": 0.009662900129048503 }, { "epoch": 0.019935502785107008, "grad_norm": 0.688413907968875, "learning_rate": 3.985932004689332e-06, "loss": 11.92875862121582, "step": 34, "token_acc": 0.009882162229565434 }, { "epoch": 0.020521841102316037, "grad_norm": 0.5648461988789549, "learning_rate": 4.103165298944901e-06, "loss": 11.915830612182617, "step": 35, "token_acc": 0.010540905856617228 }, { "epoch": 0.021108179419525065, "grad_norm": 0.48416007694292196, "learning_rate": 4.220398593200469e-06, "loss": 11.917096138000488, "step": 36, "token_acc": 0.010059371030048482 }, { "epoch": 0.021694517736734097, "grad_norm": 0.46871120331217914, "learning_rate": 4.337631887456037e-06, "loss": 11.916532516479492, "step": 37, "token_acc": 0.009734524520581022 }, { "epoch": 0.022280856053943126, "grad_norm": 0.46509747869489587, "learning_rate": 4.454865181711607e-06, "loss": 11.914100646972656, "step": 38, "token_acc": 0.009585416977414215 }, { "epoch": 0.022867194371152155, "grad_norm": 0.4139732923446741, "learning_rate": 4.572098475967175e-06, "loss": 11.90422248840332, "step": 39, "token_acc": 0.010022894937674892 }, { "epoch": 0.023453532688361183, "grad_norm": 0.43569069749728834, "learning_rate": 4.689331770222743e-06, "loss": 11.899462699890137, "step": 40, "token_acc": 0.00998906306963179 }, { "epoch": 0.024039871005570215, "grad_norm": 0.4427395068446952, "learning_rate": 4.806565064478312e-06, "loss": 11.893888473510742, "step": 41, "token_acc": 0.009981918543302703 }, { "epoch": 0.024626209322779244, "grad_norm": 0.41295279047613054, "learning_rate": 4.923798358733881e-06, "loss": 11.887333869934082, "step": 42, "token_acc": 0.010011369961669247 }, { "epoch": 0.025212547639988273, "grad_norm": 0.44474419506351187, "learning_rate": 5.041031652989449e-06, "loss": 11.893207550048828, "step": 43, "token_acc": 0.008911209871034638 }, { "epoch": 0.0257988859571973, "grad_norm": 0.45640471360812557, "learning_rate": 5.158264947245018e-06, "loss": 11.87860107421875, "step": 44, "token_acc": 0.00932694862883434 }, { "epoch": 0.026385224274406333, "grad_norm": 0.5137403210835171, "learning_rate": 5.275498241500587e-06, "loss": 11.868346214294434, "step": 45, "token_acc": 0.009457271649877254 }, { "epoch": 0.026971562591615362, "grad_norm": 0.48121145685232086, "learning_rate": 5.3927315357561546e-06, "loss": 11.854333877563477, "step": 46, "token_acc": 0.009954296567402753 }, { "epoch": 0.02755790090882439, "grad_norm": 0.5757201494805352, "learning_rate": 5.509964830011723e-06, "loss": 11.848532676696777, "step": 47, "token_acc": 0.00974159406980461 }, { "epoch": 0.028144239226033423, "grad_norm": 0.538540019928557, "learning_rate": 5.627198124267293e-06, "loss": 11.834402084350586, "step": 48, "token_acc": 0.010114053269194533 }, { "epoch": 0.02873057754324245, "grad_norm": 0.6339441614898844, "learning_rate": 5.7444314185228606e-06, "loss": 11.829710006713867, "step": 49, "token_acc": 0.00965653692444547 }, { "epoch": 0.02931691586045148, "grad_norm": 0.6309417881063738, "learning_rate": 5.861664712778429e-06, "loss": 11.810378074645996, "step": 50, "token_acc": 0.010177322843888137 }, { "epoch": 0.02990325417766051, "grad_norm": 0.8037078188876334, "learning_rate": 5.978898007033998e-06, "loss": 11.799762725830078, "step": 51, "token_acc": 0.00994983299427297 }, { "epoch": 0.03048959249486954, "grad_norm": 0.7589222871188954, "learning_rate": 6.096131301289567e-06, "loss": 11.775461196899414, "step": 52, "token_acc": 0.010665230272017236 }, { "epoch": 0.03107593081207857, "grad_norm": 0.891510021568239, "learning_rate": 6.213364595545135e-06, "loss": 11.758807182312012, "step": 53, "token_acc": 0.010381088131673077 }, { "epoch": 0.0316622691292876, "grad_norm": 1.0221106596531047, "learning_rate": 6.330597889800703e-06, "loss": 11.738049507141113, "step": 54, "token_acc": 0.009833599549249058 }, { "epoch": 0.03224860744649663, "grad_norm": 1.2614499319305532, "learning_rate": 6.447831184056272e-06, "loss": 11.710914611816406, "step": 55, "token_acc": 0.009808991012105984 }, { "epoch": 0.03283494576370566, "grad_norm": 1.4989446931123451, "learning_rate": 6.565064478311841e-06, "loss": 11.674249649047852, "step": 56, "token_acc": 0.010150821438190143 }, { "epoch": 0.03342128408091469, "grad_norm": 2.0703781344125622, "learning_rate": 6.68229777256741e-06, "loss": 11.643367767333984, "step": 57, "token_acc": 0.009861996179177854 }, { "epoch": 0.034007622398123716, "grad_norm": 2.7785040239090337, "learning_rate": 6.799531066822979e-06, "loss": 11.613290786743164, "step": 58, "token_acc": 0.009541249531284511 }, { "epoch": 0.034593960715332744, "grad_norm": 2.457524019443084, "learning_rate": 6.916764361078546e-06, "loss": 11.562480926513672, "step": 59, "token_acc": 0.01030941659537149 }, { "epoch": 0.03518029903254178, "grad_norm": 15.595939019506787, "learning_rate": 7.033997655334115e-06, "loss": 11.552815437316895, "step": 60, "token_acc": 0.009987452006606464 }, { "epoch": 0.03576663734975081, "grad_norm": 2.9202319636395058, "learning_rate": 7.151230949589684e-06, "loss": 11.480331420898438, "step": 61, "token_acc": 0.010081490959432815 }, { "epoch": 0.03635297566695984, "grad_norm": 2.4298458485314085, "learning_rate": 7.2684642438452524e-06, "loss": 11.429304122924805, "step": 62, "token_acc": 0.009682069716251167 }, { "epoch": 0.036939313984168866, "grad_norm": 28.665591175799804, "learning_rate": 7.38569753810082e-06, "loss": 11.454853057861328, "step": 63, "token_acc": 0.009593557236477977 }, { "epoch": 0.037525652301377894, "grad_norm": 5.095442403329534, "learning_rate": 7.502930832356389e-06, "loss": 11.323440551757812, "step": 64, "token_acc": 0.009494669682006818 }, { "epoch": 0.03811199061858692, "grad_norm": 5.679296974668429, "learning_rate": 7.620164126611958e-06, "loss": 11.302069664001465, "step": 65, "token_acc": 0.009763125864741436 }, { "epoch": 0.03869832893579595, "grad_norm": 5.267458930049111, "learning_rate": 7.737397420867527e-06, "loss": 11.252420425415039, "step": 66, "token_acc": 0.009664113140836771 }, { "epoch": 0.03928466725300499, "grad_norm": 5.744586630979143, "learning_rate": 7.854630715123097e-06, "loss": 11.17559814453125, "step": 67, "token_acc": 0.009575511602198475 }, { "epoch": 0.039871005570214016, "grad_norm": 12.534206770958226, "learning_rate": 7.971864009378664e-06, "loss": 11.125930786132812, "step": 68, "token_acc": 0.009746186611111684 }, { "epoch": 0.040457343887423045, "grad_norm": 9.747991427787758, "learning_rate": 8.089097303634232e-06, "loss": 11.078652381896973, "step": 69, "token_acc": 0.01034937890285831 }, { "epoch": 0.04104368220463207, "grad_norm": 5.9133151907886115, "learning_rate": 8.206330597889802e-06, "loss": 11.016322135925293, "step": 70, "token_acc": 0.009599121765713144 }, { "epoch": 0.0416300205218411, "grad_norm": 7.909715485811744, "learning_rate": 8.32356389214537e-06, "loss": 10.97691535949707, "step": 71, "token_acc": 0.009460101329955669 }, { "epoch": 0.04221635883905013, "grad_norm": 4.201718910330622, "learning_rate": 8.440797186400937e-06, "loss": 10.892742156982422, "step": 72, "token_acc": 0.010042107779976887 }, { "epoch": 0.04280269715625916, "grad_norm": 5.546328235312145, "learning_rate": 8.558030480656507e-06, "loss": 10.84189224243164, "step": 73, "token_acc": 0.00965858873464549 }, { "epoch": 0.043389035473468195, "grad_norm": 5.738914691308477, "learning_rate": 8.675263774912075e-06, "loss": 10.781645774841309, "step": 74, "token_acc": 0.00988893140261154 }, { "epoch": 0.04397537379067722, "grad_norm": 3.862020542912135, "learning_rate": 8.792497069167643e-06, "loss": 10.690727233886719, "step": 75, "token_acc": 0.010311249857864977 }, { "epoch": 0.04456171210788625, "grad_norm": 4.329753834367066, "learning_rate": 8.909730363423214e-06, "loss": 10.62625503540039, "step": 76, "token_acc": 0.009845417740154582 }, { "epoch": 0.04514805042509528, "grad_norm": 5.141225655320356, "learning_rate": 9.026963657678782e-06, "loss": 10.559036254882812, "step": 77, "token_acc": 0.010230417527201116 }, { "epoch": 0.04573438874230431, "grad_norm": 7.79799248442612, "learning_rate": 9.14419695193435e-06, "loss": 10.475767135620117, "step": 78, "token_acc": 0.009848835496833646 }, { "epoch": 0.04632072705951334, "grad_norm": 5.76678187359056, "learning_rate": 9.261430246189919e-06, "loss": 10.391319274902344, "step": 79, "token_acc": 0.010374260292944979 }, { "epoch": 0.046907065376722366, "grad_norm": 4.725496790498841, "learning_rate": 9.378663540445487e-06, "loss": 10.323711395263672, "step": 80, "token_acc": 0.010034155478255395 }, { "epoch": 0.047493403693931395, "grad_norm": 6.699213727205319, "learning_rate": 9.495896834701056e-06, "loss": 10.274932861328125, "step": 81, "token_acc": 0.010143984210553942 }, { "epoch": 0.04807974201114043, "grad_norm": 7.551130505152835, "learning_rate": 9.613130128956624e-06, "loss": 10.208810806274414, "step": 82, "token_acc": 0.009235846796731522 }, { "epoch": 0.04866608032834946, "grad_norm": 4.267879546505137, "learning_rate": 9.730363423212192e-06, "loss": 10.137231826782227, "step": 83, "token_acc": 0.009774705608551664 }, { "epoch": 0.04925241864555849, "grad_norm": 5.926476780253599, "learning_rate": 9.847596717467761e-06, "loss": 10.076465606689453, "step": 84, "token_acc": 0.00937833267980549 }, { "epoch": 0.049838756962767516, "grad_norm": 9.139763924223722, "learning_rate": 9.96483001172333e-06, "loss": 10.046934127807617, "step": 85, "token_acc": 0.009814420553032374 }, { "epoch": 0.050425095279976545, "grad_norm": 3.0602941868449354, "learning_rate": 1.0082063305978899e-05, "loss": 9.955390930175781, "step": 86, "token_acc": 0.008730275995944383 }, { "epoch": 0.051011433597185574, "grad_norm": 4.009236491484756, "learning_rate": 1.0199296600234468e-05, "loss": 9.890663146972656, "step": 87, "token_acc": 0.009424335670251352 }, { "epoch": 0.0515977719143946, "grad_norm": 12.892889552308237, "learning_rate": 1.0316529894490036e-05, "loss": 9.887292861938477, "step": 88, "token_acc": 0.009787613606229357 }, { "epoch": 0.05218411023160364, "grad_norm": 7.248811196026036, "learning_rate": 1.0433763188745604e-05, "loss": 9.79570484161377, "step": 89, "token_acc": 0.009228338721805463 }, { "epoch": 0.052770448548812667, "grad_norm": 11.890879296453985, "learning_rate": 1.0550996483001173e-05, "loss": 9.779861450195312, "step": 90, "token_acc": 0.009727726177726582 }, { "epoch": 0.053356786866021695, "grad_norm": 9.250824864575373, "learning_rate": 1.0668229777256741e-05, "loss": 9.756024360656738, "step": 91, "token_acc": 0.010158740210668524 }, { "epoch": 0.053943125183230724, "grad_norm": 4.638097055819151, "learning_rate": 1.0785463071512309e-05, "loss": 9.630939483642578, "step": 92, "token_acc": 0.009815266901281929 }, { "epoch": 0.05452946350043975, "grad_norm": 16.090620397340597, "learning_rate": 1.0902696365767879e-05, "loss": 9.688409805297852, "step": 93, "token_acc": 0.009961478678110428 }, { "epoch": 0.05511580181764878, "grad_norm": 16.640299084865184, "learning_rate": 1.1019929660023446e-05, "loss": 9.71513557434082, "step": 94, "token_acc": 0.009475612488303746 }, { "epoch": 0.05570214013485781, "grad_norm": 11.309254640285038, "learning_rate": 1.1137162954279014e-05, "loss": 9.633021354675293, "step": 95, "token_acc": 0.01003043354640326 }, { "epoch": 0.056288478452066845, "grad_norm": 2.423524902674893, "learning_rate": 1.1254396248534585e-05, "loss": 9.489806175231934, "step": 96, "token_acc": 0.01216712036927447 }, { "epoch": 0.056874816769275874, "grad_norm": 2.753859300151003, "learning_rate": 1.1371629542790153e-05, "loss": 9.52678108215332, "step": 97, "token_acc": 0.011481548078652043 }, { "epoch": 0.0574611550864849, "grad_norm": 2.045608031903105, "learning_rate": 1.1488862837045721e-05, "loss": 9.473427772521973, "step": 98, "token_acc": 0.011738283236948985 }, { "epoch": 0.05804749340369393, "grad_norm": 1.9804489391067366, "learning_rate": 1.160609613130129e-05, "loss": 9.454517364501953, "step": 99, "token_acc": 0.010973224189932379 }, { "epoch": 0.05863383172090296, "grad_norm": 1.8514809324227361, "learning_rate": 1.1723329425556858e-05, "loss": 9.397886276245117, "step": 100, "token_acc": 0.011507198891024228 }, { "epoch": 0.05922017003811199, "grad_norm": 1.764706822609502, "learning_rate": 1.1840562719812428e-05, "loss": 9.400192260742188, "step": 101, "token_acc": 0.011799695525480124 }, { "epoch": 0.05980650835532102, "grad_norm": 1.9003077492099714, "learning_rate": 1.1957796014067996e-05, "loss": 9.331049919128418, "step": 102, "token_acc": 0.011042258066142622 }, { "epoch": 0.06039284667253005, "grad_norm": 1.4989488763131005, "learning_rate": 1.2075029308323564e-05, "loss": 9.37513542175293, "step": 103, "token_acc": 0.011396305491181799 }, { "epoch": 0.06097918498973908, "grad_norm": 1.5982239206402578, "learning_rate": 1.2192262602579133e-05, "loss": 9.262404441833496, "step": 104, "token_acc": 0.011589492193744338 }, { "epoch": 0.06156552330694811, "grad_norm": 1.340719061509751, "learning_rate": 1.2309495896834701e-05, "loss": 9.31516170501709, "step": 105, "token_acc": 0.011250097929125428 }, { "epoch": 0.06215186162415714, "grad_norm": 1.2080849609378068, "learning_rate": 1.242672919109027e-05, "loss": 9.287046432495117, "step": 106, "token_acc": 0.012050421500488889 }, { "epoch": 0.06273819994136617, "grad_norm": 1.3475762654660048, "learning_rate": 1.254396248534584e-05, "loss": 9.218914031982422, "step": 107, "token_acc": 0.01137484366861183 }, { "epoch": 0.0633245382585752, "grad_norm": 1.264803927924952, "learning_rate": 1.2661195779601406e-05, "loss": 9.20506477355957, "step": 108, "token_acc": 0.011012659841656362 }, { "epoch": 0.06391087657578423, "grad_norm": 1.0501627514033995, "learning_rate": 1.2778429073856976e-05, "loss": 9.221490859985352, "step": 109, "token_acc": 0.011815973159538777 }, { "epoch": 0.06449721489299326, "grad_norm": 1.1351548669273637, "learning_rate": 1.2895662368112543e-05, "loss": 9.166486740112305, "step": 110, "token_acc": 0.011135588616953858 }, { "epoch": 0.06508355321020229, "grad_norm": 1.151934615978955, "learning_rate": 1.3012895662368113e-05, "loss": 9.145608901977539, "step": 111, "token_acc": 0.011828176856410338 }, { "epoch": 0.06566989152741132, "grad_norm": 1.0585231459700444, "learning_rate": 1.3130128956623683e-05, "loss": 9.142675399780273, "step": 112, "token_acc": 0.011521914309518282 }, { "epoch": 0.06625622984462035, "grad_norm": 0.9875605344566676, "learning_rate": 1.324736225087925e-05, "loss": 9.238873481750488, "step": 113, "token_acc": 0.011856729851139261 }, { "epoch": 0.06684256816182937, "grad_norm": 1.0325355052786365, "learning_rate": 1.336459554513482e-05, "loss": 9.105939865112305, "step": 114, "token_acc": 0.011576966306819666 }, { "epoch": 0.0674289064790384, "grad_norm": 1.1325458448605066, "learning_rate": 1.3481828839390388e-05, "loss": 9.110050201416016, "step": 115, "token_acc": 0.011211430980672428 }, { "epoch": 0.06801524479624743, "grad_norm": 1.090821686429762, "learning_rate": 1.3599062133645957e-05, "loss": 9.112598419189453, "step": 116, "token_acc": 0.011903571597720325 }, { "epoch": 0.06860158311345646, "grad_norm": 0.952304136534885, "learning_rate": 1.3716295427901523e-05, "loss": 9.133591651916504, "step": 117, "token_acc": 0.012701033123868357 }, { "epoch": 0.06918792143066549, "grad_norm": 1.1331670203367967, "learning_rate": 1.3833528722157093e-05, "loss": 9.020330429077148, "step": 118, "token_acc": 0.012102275793135877 }, { "epoch": 0.06977425974787452, "grad_norm": 0.9634373184547826, "learning_rate": 1.395076201641266e-05, "loss": 9.114398956298828, "step": 119, "token_acc": 0.012448431162093276 }, { "epoch": 0.07036059806508356, "grad_norm": 0.8265779678478534, "learning_rate": 1.406799531066823e-05, "loss": 9.048168182373047, "step": 120, "token_acc": 0.011561001740279884 }, { "epoch": 0.07094693638229259, "grad_norm": 0.833447735319011, "learning_rate": 1.41852286049238e-05, "loss": 9.062210083007812, "step": 121, "token_acc": 0.01157997128167122 }, { "epoch": 0.07153327469950162, "grad_norm": 0.7656553702940753, "learning_rate": 1.4302461899179368e-05, "loss": 9.11124038696289, "step": 122, "token_acc": 0.012586028094654473 }, { "epoch": 0.07211961301671065, "grad_norm": 0.7852937585282732, "learning_rate": 1.4419695193434937e-05, "loss": 9.070611000061035, "step": 123, "token_acc": 0.011125447142537066 }, { "epoch": 0.07270595133391967, "grad_norm": 0.7642616437853497, "learning_rate": 1.4536928487690505e-05, "loss": 9.045153617858887, "step": 124, "token_acc": 0.011685694001289344 }, { "epoch": 0.0732922896511287, "grad_norm": 0.6978454932933204, "learning_rate": 1.4654161781946074e-05, "loss": 9.031429290771484, "step": 125, "token_acc": 0.011616803743544022 }, { "epoch": 0.07387862796833773, "grad_norm": 0.7597479616255512, "learning_rate": 1.477139507620164e-05, "loss": 9.09515380859375, "step": 126, "token_acc": 0.012234666235394008 }, { "epoch": 0.07446496628554676, "grad_norm": 0.748173937002817, "learning_rate": 1.4888628370457212e-05, "loss": 9.073539733886719, "step": 127, "token_acc": 0.01131718594020911 }, { "epoch": 0.07505130460275579, "grad_norm": 0.8594747492046234, "learning_rate": 1.5005861664712778e-05, "loss": 9.14208984375, "step": 128, "token_acc": 0.012112918486366284 }, { "epoch": 0.07563764291996482, "grad_norm": 0.7994670855063187, "learning_rate": 1.5123094958968347e-05, "loss": 9.087879180908203, "step": 129, "token_acc": 0.012230560517448794 }, { "epoch": 0.07622398123717385, "grad_norm": 0.8570311648892158, "learning_rate": 1.5240328253223915e-05, "loss": 9.015796661376953, "step": 130, "token_acc": 0.011548020087755196 }, { "epoch": 0.07681031955438287, "grad_norm": 0.7219671720650426, "learning_rate": 1.5357561547479485e-05, "loss": 9.11224365234375, "step": 131, "token_acc": 0.011591454742862385 }, { "epoch": 0.0773966578715919, "grad_norm": 0.7638687168526499, "learning_rate": 1.5474794841735054e-05, "loss": 9.054033279418945, "step": 132, "token_acc": 0.012042958660582874 }, { "epoch": 0.07798299618880093, "grad_norm": 0.97440464015292, "learning_rate": 1.559202813599062e-05, "loss": 9.11070442199707, "step": 133, "token_acc": 0.011402734220381816 }, { "epoch": 0.07856933450600997, "grad_norm": 0.9001307159126438, "learning_rate": 1.5709261430246193e-05, "loss": 9.088046073913574, "step": 134, "token_acc": 0.012035650089874176 }, { "epoch": 0.079155672823219, "grad_norm": 0.8229793059931592, "learning_rate": 1.582649472450176e-05, "loss": 9.12911319732666, "step": 135, "token_acc": 0.012511244824788131 }, { "epoch": 0.07974201114042803, "grad_norm": 0.8517253815463744, "learning_rate": 1.594372801875733e-05, "loss": 8.992467880249023, "step": 136, "token_acc": 0.012404457301708667 }, { "epoch": 0.08032834945763706, "grad_norm": 0.6550721875663478, "learning_rate": 1.6060961313012895e-05, "loss": 9.066499710083008, "step": 137, "token_acc": 0.011989589001714632 }, { "epoch": 0.08091468777484609, "grad_norm": 0.7043109470028683, "learning_rate": 1.6178194607268465e-05, "loss": 9.069589614868164, "step": 138, "token_acc": 0.012002772152274216 }, { "epoch": 0.08150102609205512, "grad_norm": 0.7186750105655103, "learning_rate": 1.6295427901524034e-05, "loss": 9.092554092407227, "step": 139, "token_acc": 0.01172333546141648 }, { "epoch": 0.08208736440926415, "grad_norm": 0.7578414288822541, "learning_rate": 1.6412661195779604e-05, "loss": 9.005353927612305, "step": 140, "token_acc": 0.012403433476394849 }, { "epoch": 0.08267370272647317, "grad_norm": 0.6863964235343788, "learning_rate": 1.6529894490035173e-05, "loss": 9.08322525024414, "step": 141, "token_acc": 0.012483895800871413 }, { "epoch": 0.0832600410436822, "grad_norm": 0.7115942082178114, "learning_rate": 1.664712778429074e-05, "loss": 9.083134651184082, "step": 142, "token_acc": 0.011353050913913107 }, { "epoch": 0.08384637936089123, "grad_norm": 0.6761155619240857, "learning_rate": 1.676436107854631e-05, "loss": 9.069738388061523, "step": 143, "token_acc": 0.011400751517881825 }, { "epoch": 0.08443271767810026, "grad_norm": 0.7425388051750792, "learning_rate": 1.6881594372801875e-05, "loss": 8.993913650512695, "step": 144, "token_acc": 0.012067434266875038 }, { "epoch": 0.08501905599530929, "grad_norm": 0.9021054749102797, "learning_rate": 1.6998827667057444e-05, "loss": 9.140275955200195, "step": 145, "token_acc": 0.012101386584342878 }, { "epoch": 0.08560539431251832, "grad_norm": 0.8504217653550193, "learning_rate": 1.7116060961313014e-05, "loss": 9.014819145202637, "step": 146, "token_acc": 0.012131624988214094 }, { "epoch": 0.08619173262972735, "grad_norm": 0.8358068078543422, "learning_rate": 1.7233294255568583e-05, "loss": 9.017570495605469, "step": 147, "token_acc": 0.011914827195983298 }, { "epoch": 0.08677807094693639, "grad_norm": 0.7227265093302682, "learning_rate": 1.735052754982415e-05, "loss": 9.058671951293945, "step": 148, "token_acc": 0.014255455800161248 }, { "epoch": 0.08736440926414542, "grad_norm": 0.7834286440676679, "learning_rate": 1.746776084407972e-05, "loss": 9.038613319396973, "step": 149, "token_acc": 0.014479944047774593 }, { "epoch": 0.08795074758135445, "grad_norm": 0.696907119834478, "learning_rate": 1.7584994138335285e-05, "loss": 9.044143676757812, "step": 150, "token_acc": 0.013753129701304278 }, { "epoch": 0.08853708589856348, "grad_norm": 0.6962648070847909, "learning_rate": 1.7702227432590858e-05, "loss": 9.083137512207031, "step": 151, "token_acc": 0.014561923822757769 }, { "epoch": 0.0891234242157725, "grad_norm": 0.7437592420092028, "learning_rate": 1.7819460726846428e-05, "loss": 9.040020942687988, "step": 152, "token_acc": 0.014798572838693756 }, { "epoch": 0.08970976253298153, "grad_norm": 0.707776049021692, "learning_rate": 1.7936694021101994e-05, "loss": 9.024340629577637, "step": 153, "token_acc": 0.014677986703254204 }, { "epoch": 0.09029610085019056, "grad_norm": 0.7257843762539763, "learning_rate": 1.8053927315357563e-05, "loss": 9.047564506530762, "step": 154, "token_acc": 0.014282354619899112 }, { "epoch": 0.09088243916739959, "grad_norm": 0.641385919441577, "learning_rate": 1.817116060961313e-05, "loss": 9.074129104614258, "step": 155, "token_acc": 0.013060438286499033 }, { "epoch": 0.09146877748460862, "grad_norm": 0.7037882352890727, "learning_rate": 1.82883939038687e-05, "loss": 9.017114639282227, "step": 156, "token_acc": 0.01331883095490841 }, { "epoch": 0.09205511580181765, "grad_norm": 0.72330399374591, "learning_rate": 1.840562719812427e-05, "loss": 9.049703598022461, "step": 157, "token_acc": 0.013519621187834705 }, { "epoch": 0.09264145411902668, "grad_norm": 0.8153885218738263, "learning_rate": 1.8522860492379838e-05, "loss": 9.003667831420898, "step": 158, "token_acc": 0.012964697709666554 }, { "epoch": 0.0932277924362357, "grad_norm": 9.31449519764132, "learning_rate": 1.8640093786635404e-05, "loss": 8.997857093811035, "step": 159, "token_acc": 0.013419320710095996 }, { "epoch": 0.09381413075344473, "grad_norm": 1.005337534873343, "learning_rate": 1.8757327080890974e-05, "loss": 9.076769828796387, "step": 160, "token_acc": 0.013001575410405827 }, { "epoch": 0.09440046907065376, "grad_norm": 0.7684279289802697, "learning_rate": 1.8874560375146543e-05, "loss": 9.057096481323242, "step": 161, "token_acc": 0.01347530621106096 }, { "epoch": 0.09498680738786279, "grad_norm": 0.9409533191933808, "learning_rate": 1.8991793669402113e-05, "loss": 9.03506088256836, "step": 162, "token_acc": 0.014003734329154442 }, { "epoch": 0.09557314570507183, "grad_norm": 0.8645441983935221, "learning_rate": 1.9109026963657682e-05, "loss": 9.019186973571777, "step": 163, "token_acc": 0.01292129111088484 }, { "epoch": 0.09615948402228086, "grad_norm": 0.9393253312821077, "learning_rate": 1.9226260257913248e-05, "loss": 9.03659439086914, "step": 164, "token_acc": 0.012735673011338043 }, { "epoch": 0.09674582233948989, "grad_norm": 0.7811423991520909, "learning_rate": 1.9343493552168818e-05, "loss": 9.040750503540039, "step": 165, "token_acc": 0.014251560049570644 }, { "epoch": 0.09733216065669892, "grad_norm": 0.6971025306360749, "learning_rate": 1.9460726846424384e-05, "loss": 9.061229705810547, "step": 166, "token_acc": 0.013292242560079692 }, { "epoch": 0.09791849897390795, "grad_norm": 0.8212909189612504, "learning_rate": 1.9577960140679953e-05, "loss": 8.96902847290039, "step": 167, "token_acc": 0.014282893606546027 }, { "epoch": 0.09850483729111698, "grad_norm": 1.200594312016131, "learning_rate": 1.9695193434935523e-05, "loss": 8.952869415283203, "step": 168, "token_acc": 0.014163986808955163 }, { "epoch": 0.099091175608326, "grad_norm": 5.824729012820525, "learning_rate": 1.9812426729191092e-05, "loss": 8.955404281616211, "step": 169, "token_acc": 0.01576004892542661 }, { "epoch": 0.09967751392553503, "grad_norm": 1.4242937668335696, "learning_rate": 1.992966002344666e-05, "loss": 8.959024429321289, "step": 170, "token_acc": 0.014124234872105026 }, { "epoch": 0.10026385224274406, "grad_norm": 1.3609270715797308, "learning_rate": 2.0046893317702228e-05, "loss": 8.948944091796875, "step": 171, "token_acc": 0.016941815646295533 }, { "epoch": 0.10085019055995309, "grad_norm": 2.566289899575198, "learning_rate": 2.0164126611957798e-05, "loss": 8.961725234985352, "step": 172, "token_acc": 0.015024575656908973 }, { "epoch": 0.10143652887716212, "grad_norm": 1.375139853508446, "learning_rate": 2.0281359906213364e-05, "loss": 8.978058815002441, "step": 173, "token_acc": 0.015213520107315722 }, { "epoch": 0.10202286719437115, "grad_norm": 1.0423960317679877, "learning_rate": 2.0398593200468937e-05, "loss": 8.9154052734375, "step": 174, "token_acc": 0.0152281594750071 }, { "epoch": 0.10260920551158018, "grad_norm": 2.301740162866994, "learning_rate": 2.0515826494724503e-05, "loss": 8.96900749206543, "step": 175, "token_acc": 0.015455946544198266 }, { "epoch": 0.1031955438287892, "grad_norm": 1.5482325621563005, "learning_rate": 2.0633059788980072e-05, "loss": 8.960334777832031, "step": 176, "token_acc": 0.017298787210584345 }, { "epoch": 0.10378188214599825, "grad_norm": 0.8556971596952577, "learning_rate": 2.075029308323564e-05, "loss": 8.910372734069824, "step": 177, "token_acc": 0.016071924195301374 }, { "epoch": 0.10436822046320728, "grad_norm": 5.295799730411908, "learning_rate": 2.0867526377491208e-05, "loss": 8.86441707611084, "step": 178, "token_acc": 0.015786640844907197 }, { "epoch": 0.1049545587804163, "grad_norm": 3.5152963718692534, "learning_rate": 2.0984759671746777e-05, "loss": 8.927906036376953, "step": 179, "token_acc": 0.01683879018052864 }, { "epoch": 0.10554089709762533, "grad_norm": 3.138251926137507, "learning_rate": 2.1101992966002347e-05, "loss": 8.926370620727539, "step": 180, "token_acc": 0.015805829122476772 }, { "epoch": 0.10612723541483436, "grad_norm": 9.333767190674024, "learning_rate": 2.1219226260257916e-05, "loss": 8.921234130859375, "step": 181, "token_acc": 0.016851821229386207 }, { "epoch": 0.10671357373204339, "grad_norm": 3.323326480985714, "learning_rate": 2.1336459554513483e-05, "loss": 8.781801223754883, "step": 182, "token_acc": 0.01659174478355853 }, { "epoch": 0.10729991204925242, "grad_norm": 14.405946436689645, "learning_rate": 2.1453692848769052e-05, "loss": 8.9700927734375, "step": 183, "token_acc": 0.015191283957919641 }, { "epoch": 0.10788625036646145, "grad_norm": 13.346227364046948, "learning_rate": 2.1570926143024618e-05, "loss": 8.917417526245117, "step": 184, "token_acc": 0.01641444240406353 }, { "epoch": 0.10847258868367048, "grad_norm": 6.895339235739814, "learning_rate": 2.1688159437280188e-05, "loss": 8.860979080200195, "step": 185, "token_acc": 0.016249619733633078 }, { "epoch": 0.1090589270008795, "grad_norm": 12.261686386665007, "learning_rate": 2.1805392731535757e-05, "loss": 8.921403884887695, "step": 186, "token_acc": 0.015335129246208268 }, { "epoch": 0.10964526531808853, "grad_norm": 11.243118734511503, "learning_rate": 2.1922626025791327e-05, "loss": 8.925213813781738, "step": 187, "token_acc": 0.015112607008083487 }, { "epoch": 0.11023160363529756, "grad_norm": 6.1716959372824505, "learning_rate": 2.2039859320046893e-05, "loss": 8.8018798828125, "step": 188, "token_acc": 0.016534557011216457 }, { "epoch": 0.11081794195250659, "grad_norm": 3.101446594108936, "learning_rate": 2.2157092614302462e-05, "loss": 8.832223892211914, "step": 189, "token_acc": 0.01590452254985572 }, { "epoch": 0.11140428026971562, "grad_norm": 5.06177739064044, "learning_rate": 2.227432590855803e-05, "loss": 8.88629150390625, "step": 190, "token_acc": 0.01556583153825492 }, { "epoch": 0.11199061858692466, "grad_norm": 1.7667216614786856, "learning_rate": 2.23915592028136e-05, "loss": 8.804533958435059, "step": 191, "token_acc": 0.015173052296839346 }, { "epoch": 0.11257695690413369, "grad_norm": 3.658817747813563, "learning_rate": 2.250879249706917e-05, "loss": 8.903039932250977, "step": 192, "token_acc": 0.01608010181553726 }, { "epoch": 0.11316329522134272, "grad_norm": 3.961014838557213, "learning_rate": 2.2626025791324737e-05, "loss": 8.841934204101562, "step": 193, "token_acc": 0.01553905562836573 }, { "epoch": 0.11374963353855175, "grad_norm": 4.719743908208459, "learning_rate": 2.2743259085580307e-05, "loss": 8.834268569946289, "step": 194, "token_acc": 0.014658590076126611 }, { "epoch": 0.11433597185576078, "grad_norm": 2.4698910205375064, "learning_rate": 2.2860492379835873e-05, "loss": 8.811420440673828, "step": 195, "token_acc": 0.015618440936949038 }, { "epoch": 0.1149223101729698, "grad_norm": 7.209373338178073, "learning_rate": 2.2977725674091442e-05, "loss": 8.798686981201172, "step": 196, "token_acc": 0.01697163252817443 }, { "epoch": 0.11550864849017883, "grad_norm": 5.336832747788366, "learning_rate": 2.3094958968347012e-05, "loss": 8.872821807861328, "step": 197, "token_acc": 0.016580901911036543 }, { "epoch": 0.11609498680738786, "grad_norm": 1.0947098512600604, "learning_rate": 2.321219226260258e-05, "loss": 8.73609733581543, "step": 198, "token_acc": 0.017363586019040856 }, { "epoch": 0.11668132512459689, "grad_norm": 5.9811775760993, "learning_rate": 2.3329425556858147e-05, "loss": 8.715837478637695, "step": 199, "token_acc": 0.016491450941757517 }, { "epoch": 0.11726766344180592, "grad_norm": 2.0633491724212685, "learning_rate": 2.3446658851113717e-05, "loss": 8.695581436157227, "step": 200, "token_acc": 0.015138196091258485 }, { "epoch": 0.11785400175901495, "grad_norm": 10.591654191799721, "learning_rate": 2.3563892145369286e-05, "loss": 8.832849502563477, "step": 201, "token_acc": 0.015996674908346834 }, { "epoch": 0.11844034007622398, "grad_norm": 13.136811922185172, "learning_rate": 2.3681125439624856e-05, "loss": 8.710956573486328, "step": 202, "token_acc": 0.01733241936261253 }, { "epoch": 0.119026678393433, "grad_norm": 3.2711713145232273, "learning_rate": 2.3798358733880426e-05, "loss": 8.791112899780273, "step": 203, "token_acc": 0.01658490333767251 }, { "epoch": 0.11961301671064203, "grad_norm": 10.597238560834104, "learning_rate": 2.391559202813599e-05, "loss": 8.680379867553711, "step": 204, "token_acc": 0.016328462403123706 }, { "epoch": 0.12019935502785108, "grad_norm": 13.504735178378652, "learning_rate": 2.403282532239156e-05, "loss": 8.875692367553711, "step": 205, "token_acc": 0.015691883679761105 }, { "epoch": 0.1207856933450601, "grad_norm": 8.13171450433769, "learning_rate": 2.4150058616647127e-05, "loss": 8.767316818237305, "step": 206, "token_acc": 0.016828419899942028 }, { "epoch": 0.12137203166226913, "grad_norm": 1.9173355946554083, "learning_rate": 2.4267291910902697e-05, "loss": 8.74384593963623, "step": 207, "token_acc": 0.016572997407502156 }, { "epoch": 0.12195836997947816, "grad_norm": 3.4276935341479318, "learning_rate": 2.4384525205158266e-05, "loss": 8.787282943725586, "step": 208, "token_acc": 0.0161269036872806 }, { "epoch": 0.12254470829668719, "grad_norm": 1.2552424841660939, "learning_rate": 2.4501758499413836e-05, "loss": 8.732902526855469, "step": 209, "token_acc": 0.015460392390613068 }, { "epoch": 0.12313104661389622, "grad_norm": 2.8609025149950833, "learning_rate": 2.4618991793669402e-05, "loss": 8.645153045654297, "step": 210, "token_acc": 0.016576407000640725 }, { "epoch": 0.12371738493110525, "grad_norm": 1.8799254088976998, "learning_rate": 2.473622508792497e-05, "loss": 8.766607284545898, "step": 211, "token_acc": 0.017658121765497414 }, { "epoch": 0.12430372324831428, "grad_norm": 1.0712384945420417, "learning_rate": 2.485345838218054e-05, "loss": 8.612556457519531, "step": 212, "token_acc": 0.01722776504600158 }, { "epoch": 0.1248900615655233, "grad_norm": 2.7720756099267305, "learning_rate": 2.4970691676436107e-05, "loss": 8.535324096679688, "step": 213, "token_acc": 0.017316714193726428 }, { "epoch": 0.12547639988273235, "grad_norm": 8.642709154598062, "learning_rate": 2.508792497069168e-05, "loss": 8.709345817565918, "step": 214, "token_acc": 0.016801567304730695 }, { "epoch": 0.12606273819994138, "grad_norm": 5.202525132830785, "learning_rate": 2.520515826494725e-05, "loss": 8.681105613708496, "step": 215, "token_acc": 0.015975298354672072 }, { "epoch": 0.1266490765171504, "grad_norm": 8.168177418130778, "learning_rate": 2.5322391559202812e-05, "loss": 8.639896392822266, "step": 216, "token_acc": 0.016853400577382514 }, { "epoch": 0.12723541483435943, "grad_norm": 5.221265842440695, "learning_rate": 2.5439624853458382e-05, "loss": 8.569507598876953, "step": 217, "token_acc": 0.01684437386569873 }, { "epoch": 0.12782175315156846, "grad_norm": 6.303391587658705, "learning_rate": 2.555685814771395e-05, "loss": 8.726802825927734, "step": 218, "token_acc": 0.016575941281428277 }, { "epoch": 0.1284080914687775, "grad_norm": 8.912004600500023, "learning_rate": 2.567409144196952e-05, "loss": 8.578927993774414, "step": 219, "token_acc": 0.01656643387633698 }, { "epoch": 0.12899442978598652, "grad_norm": 3.7996099592616632, "learning_rate": 2.5791324736225087e-05, "loss": 8.55063247680664, "step": 220, "token_acc": 0.01638220035436132 }, { "epoch": 0.12958076810319555, "grad_norm": 3.6968260263310673, "learning_rate": 2.5908558030480656e-05, "loss": 8.614810943603516, "step": 221, "token_acc": 0.017415957877683273 }, { "epoch": 0.13016710642040458, "grad_norm": 17.05818270793626, "learning_rate": 2.6025791324736226e-05, "loss": 8.583649635314941, "step": 222, "token_acc": 0.017697264823808075 }, { "epoch": 0.1307534447376136, "grad_norm": 16.282298485283953, "learning_rate": 2.6143024618991796e-05, "loss": 8.66443920135498, "step": 223, "token_acc": 0.016362078958571627 }, { "epoch": 0.13133978305482263, "grad_norm": 2.0644641560828476, "learning_rate": 2.6260257913247365e-05, "loss": 8.622711181640625, "step": 224, "token_acc": 0.01609407258292875 }, { "epoch": 0.13192612137203166, "grad_norm": 2.9579060756891993, "learning_rate": 2.637749120750293e-05, "loss": 8.510727882385254, "step": 225, "token_acc": 0.01712861956339319 }, { "epoch": 0.1325124596892407, "grad_norm": 2.2077382104085386, "learning_rate": 2.64947245017585e-05, "loss": 8.509180068969727, "step": 226, "token_acc": 0.017173266755904484 }, { "epoch": 0.13309879800644972, "grad_norm": 9.986535544055963, "learning_rate": 2.661195779601407e-05, "loss": 8.598289489746094, "step": 227, "token_acc": 0.016466763159575147 }, { "epoch": 0.13368513632365875, "grad_norm": 6.5569825066103835, "learning_rate": 2.672919109026964e-05, "loss": 8.589065551757812, "step": 228, "token_acc": 0.016002592717545207 }, { "epoch": 0.13427147464086778, "grad_norm": 13.528946435191246, "learning_rate": 2.6846424384525202e-05, "loss": 8.567931175231934, "step": 229, "token_acc": 0.016549324269644734 }, { "epoch": 0.1348578129580768, "grad_norm": 14.673761218136983, "learning_rate": 2.6963657678780775e-05, "loss": 8.519822120666504, "step": 230, "token_acc": 0.017126661208036784 }, { "epoch": 0.13544415127528583, "grad_norm": 5.382544931895434, "learning_rate": 2.7080890973036345e-05, "loss": 8.577202796936035, "step": 231, "token_acc": 0.015222634267783511 }, { "epoch": 0.13603048959249486, "grad_norm": 4.550736155538764, "learning_rate": 2.7198124267291914e-05, "loss": 8.50474739074707, "step": 232, "token_acc": 0.016359756787404724 }, { "epoch": 0.1366168279097039, "grad_norm": 9.984215509657442, "learning_rate": 2.7315357561547484e-05, "loss": 8.471184730529785, "step": 233, "token_acc": 0.018521768477894027 }, { "epoch": 0.13720316622691292, "grad_norm": 5.524981881058105, "learning_rate": 2.7432590855803047e-05, "loss": 8.468493461608887, "step": 234, "token_acc": 0.017912650035211357 }, { "epoch": 0.13778950454412195, "grad_norm": 11.699236774247959, "learning_rate": 2.7549824150058616e-05, "loss": 8.444255828857422, "step": 235, "token_acc": 0.01791036236821437 }, { "epoch": 0.13837584286133098, "grad_norm": 10.123664924444109, "learning_rate": 2.7667057444314186e-05, "loss": 8.387200355529785, "step": 236, "token_acc": 0.01708580705129367 }, { "epoch": 0.13896218117854, "grad_norm": 3.9487437276302484, "learning_rate": 2.778429073856976e-05, "loss": 8.458383560180664, "step": 237, "token_acc": 0.017343458903028196 }, { "epoch": 0.13954851949574903, "grad_norm": 3.963875412289304, "learning_rate": 2.790152403282532e-05, "loss": 8.412214279174805, "step": 238, "token_acc": 0.019628865768343048 }, { "epoch": 0.14013485781295806, "grad_norm": 12.211930197030025, "learning_rate": 2.801875732708089e-05, "loss": 8.418054580688477, "step": 239, "token_acc": 0.017701699611281117 }, { "epoch": 0.14072119613016712, "grad_norm": 10.909038604133126, "learning_rate": 2.813599062133646e-05, "loss": 8.36636734008789, "step": 240, "token_acc": 0.017211151251472892 }, { "epoch": 0.14130753444737615, "grad_norm": 8.28542484631731, "learning_rate": 2.825322391559203e-05, "loss": 8.370955467224121, "step": 241, "token_acc": 0.019585555889333625 }, { "epoch": 0.14189387276458518, "grad_norm": 7.33590152292038, "learning_rate": 2.83704572098476e-05, "loss": 8.39145278930664, "step": 242, "token_acc": 0.018641399386104405 }, { "epoch": 0.1424802110817942, "grad_norm": 7.749166741267496, "learning_rate": 2.8487690504103166e-05, "loss": 8.324542999267578, "step": 243, "token_acc": 0.018362774783092776 }, { "epoch": 0.14306654939900323, "grad_norm": 5.324767302634211, "learning_rate": 2.8604923798358735e-05, "loss": 8.325507164001465, "step": 244, "token_acc": 0.01934745508491453 }, { "epoch": 0.14365288771621226, "grad_norm": 9.781482280787388, "learning_rate": 2.8722157092614305e-05, "loss": 8.440896987915039, "step": 245, "token_acc": 0.01824733374953695 }, { "epoch": 0.1442392260334213, "grad_norm": 10.380814201783155, "learning_rate": 2.8839390386869874e-05, "loss": 8.244488716125488, "step": 246, "token_acc": 0.020205417955481815 }, { "epoch": 0.14482556435063032, "grad_norm": 5.154728848909781, "learning_rate": 2.895662368112544e-05, "loss": 8.224533081054688, "step": 247, "token_acc": 0.01909172108635162 }, { "epoch": 0.14541190266783935, "grad_norm": 5.502263745507572, "learning_rate": 2.907385697538101e-05, "loss": 8.275815963745117, "step": 248, "token_acc": 0.019192503889150177 }, { "epoch": 0.14599824098504838, "grad_norm": 7.585891046065305, "learning_rate": 2.919109026963658e-05, "loss": 8.325769424438477, "step": 249, "token_acc": 0.020240734874881216 }, { "epoch": 0.1465845793022574, "grad_norm": 3.7468291672525176, "learning_rate": 2.930832356389215e-05, "loss": 8.226460456848145, "step": 250, "token_acc": 0.021003967820610828 }, { "epoch": 0.14717091761946643, "grad_norm": 11.951610769329216, "learning_rate": 2.9425556858147718e-05, "loss": 8.30726432800293, "step": 251, "token_acc": 0.01735603104933756 }, { "epoch": 0.14775725593667546, "grad_norm": 10.49913542957289, "learning_rate": 2.954279015240328e-05, "loss": 8.246274948120117, "step": 252, "token_acc": 0.01941996525233919 }, { "epoch": 0.1483435942538845, "grad_norm": 4.437843733752013, "learning_rate": 2.9660023446658854e-05, "loss": 8.106080055236816, "step": 253, "token_acc": 0.021376591873862948 }, { "epoch": 0.14892993257109352, "grad_norm": 5.150232708191367, "learning_rate": 2.9777256740914423e-05, "loss": 8.203289031982422, "step": 254, "token_acc": 0.0213694924389258 }, { "epoch": 0.14951627088830255, "grad_norm": 6.97369549993389, "learning_rate": 2.9894490035169993e-05, "loss": 8.237259864807129, "step": 255, "token_acc": 0.02066425045949239 }, { "epoch": 0.15010260920551158, "grad_norm": 4.124770989228856, "learning_rate": 3.0011723329425556e-05, "loss": 8.218684196472168, "step": 256, "token_acc": 0.02030398362175691 }, { "epoch": 0.1506889475227206, "grad_norm": 4.511899369399747, "learning_rate": 3.0128956623681125e-05, "loss": 8.083969116210938, "step": 257, "token_acc": 0.02240091461469559 }, { "epoch": 0.15127528583992964, "grad_norm": 6.980317013092456, "learning_rate": 3.0246189917936695e-05, "loss": 8.092212677001953, "step": 258, "token_acc": 0.022515734420707385 }, { "epoch": 0.15186162415713866, "grad_norm": 3.317675318677222, "learning_rate": 3.0363423212192264e-05, "loss": 8.109350204467773, "step": 259, "token_acc": 0.021867027916403905 }, { "epoch": 0.1524479624743477, "grad_norm": 11.65603149025743, "learning_rate": 3.048065650644783e-05, "loss": 8.081318855285645, "step": 260, "token_acc": 0.023007410084944876 }, { "epoch": 0.15303430079155672, "grad_norm": 8.447222871175889, "learning_rate": 3.05978898007034e-05, "loss": 8.224822998046875, "step": 261, "token_acc": 0.020889620909093368 }, { "epoch": 0.15362063910876575, "grad_norm": 8.744415179863386, "learning_rate": 3.071512309495897e-05, "loss": 8.100028038024902, "step": 262, "token_acc": 0.021298408978786383 }, { "epoch": 0.15420697742597478, "grad_norm": 4.970656588146134, "learning_rate": 3.083235638921454e-05, "loss": 8.10700798034668, "step": 263, "token_acc": 0.023156668608037275 }, { "epoch": 0.1547933157431838, "grad_norm": 12.728606559643172, "learning_rate": 3.094958968347011e-05, "loss": 8.07313060760498, "step": 264, "token_acc": 0.023119693382585425 }, { "epoch": 0.15537965406039284, "grad_norm": 11.2290241235744, "learning_rate": 3.106682297772567e-05, "loss": 8.088520050048828, "step": 265, "token_acc": 0.023887609373042917 }, { "epoch": 0.15596599237760186, "grad_norm": 5.6683871927044525, "learning_rate": 3.118405627198124e-05, "loss": 7.989017486572266, "step": 266, "token_acc": 0.023079469594480113 }, { "epoch": 0.1565523306948109, "grad_norm": 4.550302568228976, "learning_rate": 3.130128956623681e-05, "loss": 8.032395362854004, "step": 267, "token_acc": 0.024050520610699337 }, { "epoch": 0.15713866901201995, "grad_norm": 6.713584702227589, "learning_rate": 3.1418522860492386e-05, "loss": 8.073318481445312, "step": 268, "token_acc": 0.02364868286457964 }, { "epoch": 0.15772500732922898, "grad_norm": 3.1993517533470652, "learning_rate": 3.153575615474795e-05, "loss": 7.910236358642578, "step": 269, "token_acc": 0.02569669008944696 }, { "epoch": 0.158311345646438, "grad_norm": 12.713592150068552, "learning_rate": 3.165298944900352e-05, "loss": 8.088440895080566, "step": 270, "token_acc": 0.02105357438504401 }, { "epoch": 0.15889768396364704, "grad_norm": 11.677275340003138, "learning_rate": 3.177022274325909e-05, "loss": 7.953788757324219, "step": 271, "token_acc": 0.022620417049463826 }, { "epoch": 0.15948402228085606, "grad_norm": 4.557457205967349, "learning_rate": 3.188745603751466e-05, "loss": 7.925534725189209, "step": 272, "token_acc": 0.02491990124847167 }, { "epoch": 0.1600703605980651, "grad_norm": 8.31131892366096, "learning_rate": 3.200468933177023e-05, "loss": 7.940683841705322, "step": 273, "token_acc": 0.024467306800732145 }, { "epoch": 0.16065669891527412, "grad_norm": 5.278990594903741, "learning_rate": 3.212192262602579e-05, "loss": 8.003097534179688, "step": 274, "token_acc": 0.023447019707503708 }, { "epoch": 0.16124303723248315, "grad_norm": 6.065647567055946, "learning_rate": 3.223915592028136e-05, "loss": 7.935528755187988, "step": 275, "token_acc": 0.024805437322650642 }, { "epoch": 0.16182937554969218, "grad_norm": 2.5249131219658145, "learning_rate": 3.235638921453693e-05, "loss": 7.942893028259277, "step": 276, "token_acc": 0.0249650614526599 }, { "epoch": 0.1624157138669012, "grad_norm": 11.358404063997227, "learning_rate": 3.24736225087925e-05, "loss": 7.845043182373047, "step": 277, "token_acc": 0.027566706532497525 }, { "epoch": 0.16300205218411024, "grad_norm": 6.6310532158063245, "learning_rate": 3.259085580304807e-05, "loss": 7.773580551147461, "step": 278, "token_acc": 0.02728788508370985 }, { "epoch": 0.16358839050131926, "grad_norm": 11.793723136999827, "learning_rate": 3.270808909730364e-05, "loss": 7.852505207061768, "step": 279, "token_acc": 0.02475860361475613 }, { "epoch": 0.1641747288185283, "grad_norm": 10.028946127084307, "learning_rate": 3.282532239155921e-05, "loss": 7.7964186668396, "step": 280, "token_acc": 0.0256860655973738 }, { "epoch": 0.16476106713573732, "grad_norm": 6.988201898998311, "learning_rate": 3.294255568581478e-05, "loss": 7.760739326477051, "step": 281, "token_acc": 0.029985963318567 }, { "epoch": 0.16534740545294635, "grad_norm": 7.0124436464722, "learning_rate": 3.3059788980070346e-05, "loss": 7.711109161376953, "step": 282, "token_acc": 0.029875808366755168 }, { "epoch": 0.16593374377015538, "grad_norm": 7.6616465604716995, "learning_rate": 3.317702227432591e-05, "loss": 7.811481952667236, "step": 283, "token_acc": 0.024813128320286103 }, { "epoch": 0.1665200820873644, "grad_norm": 7.060172207640459, "learning_rate": 3.329425556858148e-05, "loss": 7.762804985046387, "step": 284, "token_acc": 0.027798685207458658 }, { "epoch": 0.16710642040457344, "grad_norm": 4.784394588924861, "learning_rate": 3.341148886283705e-05, "loss": 7.639404773712158, "step": 285, "token_acc": 0.03056520248815575 }, { "epoch": 0.16769275872178246, "grad_norm": 1.5110618201600867, "learning_rate": 3.352872215709262e-05, "loss": 7.698606491088867, "step": 286, "token_acc": 0.029386059615246204 }, { "epoch": 0.1682790970389915, "grad_norm": 9.132929671450656, "learning_rate": 3.364595545134818e-05, "loss": 7.63512659072876, "step": 287, "token_acc": 0.029604158172619553 }, { "epoch": 0.16886543535620052, "grad_norm": 5.870506476761636, "learning_rate": 3.376318874560375e-05, "loss": 7.667540550231934, "step": 288, "token_acc": 0.02818635598471411 }, { "epoch": 0.16945177367340955, "grad_norm": 11.39258145479053, "learning_rate": 3.388042203985932e-05, "loss": 7.682027816772461, "step": 289, "token_acc": 0.03005629348996723 }, { "epoch": 0.17003811199061858, "grad_norm": 9.26264010364802, "learning_rate": 3.399765533411489e-05, "loss": 7.638578414916992, "step": 290, "token_acc": 0.03236761528777258 }, { "epoch": 0.1706244503078276, "grad_norm": 5.576872460057638, "learning_rate": 3.411488862837046e-05, "loss": 7.578747749328613, "step": 291, "token_acc": 0.031305617085568685 }, { "epoch": 0.17121078862503664, "grad_norm": 4.854351933904024, "learning_rate": 3.423212192262603e-05, "loss": 7.461580753326416, "step": 292, "token_acc": 0.034631231017537345 }, { "epoch": 0.17179712694224566, "grad_norm": 7.95537002735846, "learning_rate": 3.43493552168816e-05, "loss": 7.587418079376221, "step": 293, "token_acc": 0.03271779374030124 }, { "epoch": 0.1723834652594547, "grad_norm": 3.8841427282950365, "learning_rate": 3.446658851113717e-05, "loss": 7.523349285125732, "step": 294, "token_acc": 0.033108201384131716 }, { "epoch": 0.17296980357666372, "grad_norm": 3.271674798272289, "learning_rate": 3.4583821805392736e-05, "loss": 7.487792015075684, "step": 295, "token_acc": 0.03521963536444989 }, { "epoch": 0.17355614189387278, "grad_norm": 4.937817206942403, "learning_rate": 3.47010550996483e-05, "loss": 7.451592445373535, "step": 296, "token_acc": 0.0363185712795222 }, { "epoch": 0.1741424802110818, "grad_norm": 2.2811487818554443, "learning_rate": 3.481828839390387e-05, "loss": 7.391551494598389, "step": 297, "token_acc": 0.036459766394216504 }, { "epoch": 0.17472881852829084, "grad_norm": 4.656159086752642, "learning_rate": 3.493552168815944e-05, "loss": 7.45686149597168, "step": 298, "token_acc": 0.034385240265990706 }, { "epoch": 0.17531515684549986, "grad_norm": 6.681774346496673, "learning_rate": 3.505275498241501e-05, "loss": 7.279962062835693, "step": 299, "token_acc": 0.036752212223622564 }, { "epoch": 0.1759014951627089, "grad_norm": 3.006122301061248, "learning_rate": 3.516998827667057e-05, "loss": 7.289892196655273, "step": 300, "token_acc": 0.03830300798789032 }, { "epoch": 0.17648783347991792, "grad_norm": 7.749870951875948, "learning_rate": 3.528722157092615e-05, "loss": 7.46074104309082, "step": 301, "token_acc": 0.03685454597347739 }, { "epoch": 0.17707417179712695, "grad_norm": 4.507101515014864, "learning_rate": 3.5404454865181716e-05, "loss": 7.326682090759277, "step": 302, "token_acc": 0.03886698788144255 }, { "epoch": 0.17766051011433598, "grad_norm": 6.117967333469634, "learning_rate": 3.5521688159437286e-05, "loss": 7.266670227050781, "step": 303, "token_acc": 0.03918212128075341 }, { "epoch": 0.178246848431545, "grad_norm": 4.15836396325537, "learning_rate": 3.5638921453692855e-05, "loss": 7.240307807922363, "step": 304, "token_acc": 0.03883024628167284 }, { "epoch": 0.17883318674875404, "grad_norm": 4.675615474675205, "learning_rate": 3.575615474794842e-05, "loss": 7.203600883483887, "step": 305, "token_acc": 0.03955861084206741 }, { "epoch": 0.17941952506596306, "grad_norm": 4.909434257054275, "learning_rate": 3.587338804220399e-05, "loss": 7.126448631286621, "step": 306, "token_acc": 0.041463668269855716 }, { "epoch": 0.1800058633831721, "grad_norm": 5.506219891330509, "learning_rate": 3.599062133645956e-05, "loss": 7.274214744567871, "step": 307, "token_acc": 0.04332027850304613 }, { "epoch": 0.18059220170038112, "grad_norm": 3.781047379953383, "learning_rate": 3.6107854630715126e-05, "loss": 7.200839996337891, "step": 308, "token_acc": 0.04252858212020492 }, { "epoch": 0.18117854001759015, "grad_norm": 5.139988025106219, "learning_rate": 3.622508792497069e-05, "loss": 7.19188117980957, "step": 309, "token_acc": 0.04257972609502846 }, { "epoch": 0.18176487833479918, "grad_norm": 4.531100392177371, "learning_rate": 3.634232121922626e-05, "loss": 7.082777500152588, "step": 310, "token_acc": 0.045122115215524825 }, { "epoch": 0.1823512166520082, "grad_norm": 7.489128865005872, "learning_rate": 3.645955451348183e-05, "loss": 7.129194259643555, "step": 311, "token_acc": 0.043057136052081404 }, { "epoch": 0.18293755496921724, "grad_norm": 2.659038273550176, "learning_rate": 3.65767878077374e-05, "loss": 6.948609828948975, "step": 312, "token_acc": 0.0508242877217278 }, { "epoch": 0.18352389328642627, "grad_norm": 7.652578801222082, "learning_rate": 3.669402110199297e-05, "loss": 7.0207414627075195, "step": 313, "token_acc": 0.05026367704743116 }, { "epoch": 0.1841102316036353, "grad_norm": 5.807980961527149, "learning_rate": 3.681125439624854e-05, "loss": 7.00704288482666, "step": 314, "token_acc": 0.04856493435284969 }, { "epoch": 0.18469656992084432, "grad_norm": 3.0899571116273266, "learning_rate": 3.6928487690504106e-05, "loss": 6.859892845153809, "step": 315, "token_acc": 0.05295855992685723 }, { "epoch": 0.18528290823805335, "grad_norm": 9.622625931652337, "learning_rate": 3.7045720984759676e-05, "loss": 6.883492469787598, "step": 316, "token_acc": 0.051444997690056976 }, { "epoch": 0.18586924655526238, "grad_norm": 4.960630303186147, "learning_rate": 3.7162954279015245e-05, "loss": 6.878410339355469, "step": 317, "token_acc": 0.05236103850845095 }, { "epoch": 0.1864555848724714, "grad_norm": 8.175282079681656, "learning_rate": 3.728018757327081e-05, "loss": 6.859060287475586, "step": 318, "token_acc": 0.0509749130328842 }, { "epoch": 0.18704192318968044, "grad_norm": 6.115616224953417, "learning_rate": 3.739742086752638e-05, "loss": 6.931354999542236, "step": 319, "token_acc": 0.050930681099246186 }, { "epoch": 0.18762826150688947, "grad_norm": 6.459522185580921, "learning_rate": 3.751465416178195e-05, "loss": 6.845908164978027, "step": 320, "token_acc": 0.054294137119413215 }, { "epoch": 0.1882145998240985, "grad_norm": 4.692766055687629, "learning_rate": 3.763188745603752e-05, "loss": 6.842883586883545, "step": 321, "token_acc": 0.05154300972725539 }, { "epoch": 0.18880093814130752, "grad_norm": 7.042664582653199, "learning_rate": 3.7749120750293086e-05, "loss": 6.786920547485352, "step": 322, "token_acc": 0.05923182977914182 }, { "epoch": 0.18938727645851655, "grad_norm": 5.431578393082523, "learning_rate": 3.786635404454865e-05, "loss": 6.746435165405273, "step": 323, "token_acc": 0.05848481009985855 }, { "epoch": 0.18997361477572558, "grad_norm": 4.075530250856771, "learning_rate": 3.7983587338804225e-05, "loss": 6.725821495056152, "step": 324, "token_acc": 0.057761391070179406 }, { "epoch": 0.19055995309293464, "grad_norm": 6.003719707092143, "learning_rate": 3.8100820633059795e-05, "loss": 6.6018218994140625, "step": 325, "token_acc": 0.06687685381913036 }, { "epoch": 0.19114629141014366, "grad_norm": 4.201184398536375, "learning_rate": 3.8218053927315364e-05, "loss": 6.591614246368408, "step": 326, "token_acc": 0.06588746070688217 }, { "epoch": 0.1917326297273527, "grad_norm": 3.6096920754357935, "learning_rate": 3.833528722157093e-05, "loss": 6.626930236816406, "step": 327, "token_acc": 0.0645815578218781 }, { "epoch": 0.19231896804456172, "grad_norm": 6.394680532706906, "learning_rate": 3.8452520515826496e-05, "loss": 6.544139385223389, "step": 328, "token_acc": 0.0649723083069822 }, { "epoch": 0.19290530636177075, "grad_norm": 4.488174882421174, "learning_rate": 3.8569753810082066e-05, "loss": 6.556469440460205, "step": 329, "token_acc": 0.06285310734463277 }, { "epoch": 0.19349164467897978, "grad_norm": 5.898014370678826, "learning_rate": 3.8686987104337636e-05, "loss": 6.525766372680664, "step": 330, "token_acc": 0.07114072078716843 }, { "epoch": 0.1940779829961888, "grad_norm": 4.575229544093522, "learning_rate": 3.88042203985932e-05, "loss": 6.511011600494385, "step": 331, "token_acc": 0.07230858509008153 }, { "epoch": 0.19466432131339784, "grad_norm": 4.944474437604798, "learning_rate": 3.892145369284877e-05, "loss": 6.596277236938477, "step": 332, "token_acc": 0.06643491598196785 }, { "epoch": 0.19525065963060687, "grad_norm": 6.28951471147423, "learning_rate": 3.903868698710434e-05, "loss": 6.444920539855957, "step": 333, "token_acc": 0.07534779227512313 }, { "epoch": 0.1958369979478159, "grad_norm": 5.054363457676562, "learning_rate": 3.915592028135991e-05, "loss": 6.509368419647217, "step": 334, "token_acc": 0.06765982279145505 }, { "epoch": 0.19642333626502492, "grad_norm": 5.824608881894826, "learning_rate": 3.9273153575615476e-05, "loss": 6.386015892028809, "step": 335, "token_acc": 0.07861068389479499 }, { "epoch": 0.19700967458223395, "grad_norm": 4.696158819548083, "learning_rate": 3.9390386869871046e-05, "loss": 6.4276933670043945, "step": 336, "token_acc": 0.07102308817045741 }, { "epoch": 0.19759601289944298, "grad_norm": 4.966826764596138, "learning_rate": 3.9507620164126615e-05, "loss": 6.30764627456665, "step": 337, "token_acc": 0.08435175928724391 }, { "epoch": 0.198182351216652, "grad_norm": 3.5096544889134917, "learning_rate": 3.9624853458382185e-05, "loss": 6.336080551147461, "step": 338, "token_acc": 0.07917522227729282 }, { "epoch": 0.19876868953386104, "grad_norm": 4.475172724159575, "learning_rate": 3.9742086752637754e-05, "loss": 6.37416934967041, "step": 339, "token_acc": 0.07943514235455079 }, { "epoch": 0.19935502785107007, "grad_norm": 3.6635807270155736, "learning_rate": 3.985932004689332e-05, "loss": 6.239297866821289, "step": 340, "token_acc": 0.08446195973949082 }, { "epoch": 0.1999413661682791, "grad_norm": 7.001244208216398, "learning_rate": 3.997655334114889e-05, "loss": 6.343474388122559, "step": 341, "token_acc": 0.081755765294107 }, { "epoch": 0.20052770448548812, "grad_norm": 4.380565562063101, "learning_rate": 4.0093786635404456e-05, "loss": 6.209048271179199, "step": 342, "token_acc": 0.08533622570830124 }, { "epoch": 0.20111404280269715, "grad_norm": 5.347243976155213, "learning_rate": 4.0211019929660026e-05, "loss": 6.310052394866943, "step": 343, "token_acc": 0.0825726972771285 }, { "epoch": 0.20170038111990618, "grad_norm": 4.812075288202936, "learning_rate": 4.0328253223915595e-05, "loss": 6.210603713989258, "step": 344, "token_acc": 0.08699436953622759 }, { "epoch": 0.2022867194371152, "grad_norm": 7.151960616505751, "learning_rate": 4.044548651817116e-05, "loss": 6.280752182006836, "step": 345, "token_acc": 0.08506893140265447 }, { "epoch": 0.20287305775432424, "grad_norm": 3.8099430441961077, "learning_rate": 4.056271981242673e-05, "loss": 6.210249900817871, "step": 346, "token_acc": 0.08739278807262285 }, { "epoch": 0.20345939607153327, "grad_norm": 9.325598699943097, "learning_rate": 4.06799531066823e-05, "loss": 6.164924621582031, "step": 347, "token_acc": 0.08574209856965663 }, { "epoch": 0.2040457343887423, "grad_norm": 6.221575328878346, "learning_rate": 4.079718640093787e-05, "loss": 6.174298286437988, "step": 348, "token_acc": 0.08908505885098864 }, { "epoch": 0.20463207270595132, "grad_norm": 6.149406958395271, "learning_rate": 4.0914419695193436e-05, "loss": 6.135800361633301, "step": 349, "token_acc": 0.09458441428215142 }, { "epoch": 0.20521841102316035, "grad_norm": 5.832286508856503, "learning_rate": 4.1031652989449006e-05, "loss": 6.150198459625244, "step": 350, "token_acc": 0.09132320819287217 }, { "epoch": 0.20580474934036938, "grad_norm": 4.692689603551784, "learning_rate": 4.1148886283704575e-05, "loss": 6.036190986633301, "step": 351, "token_acc": 0.09885695917711347 }, { "epoch": 0.2063910876575784, "grad_norm": 4.601553977320466, "learning_rate": 4.1266119577960145e-05, "loss": 6.0102338790893555, "step": 352, "token_acc": 0.09844902060736734 }, { "epoch": 0.20697742597478747, "grad_norm": 4.7297483925632715, "learning_rate": 4.1383352872215714e-05, "loss": 6.127054214477539, "step": 353, "token_acc": 0.08951850197088224 }, { "epoch": 0.2075637642919965, "grad_norm": 5.755085063329411, "learning_rate": 4.150058616647128e-05, "loss": 6.058117389678955, "step": 354, "token_acc": 0.09324453412615784 }, { "epoch": 0.20815010260920552, "grad_norm": 5.341676855616793, "learning_rate": 4.1617819460726846e-05, "loss": 5.9347381591796875, "step": 355, "token_acc": 0.10432863113897596 }, { "epoch": 0.20873644092641455, "grad_norm": 5.395934916865773, "learning_rate": 4.1735052754982416e-05, "loss": 6.054256439208984, "step": 356, "token_acc": 0.09926763192347078 }, { "epoch": 0.20932277924362358, "grad_norm": 5.715870640293422, "learning_rate": 4.1852286049237985e-05, "loss": 5.997771739959717, "step": 357, "token_acc": 0.09700314021161956 }, { "epoch": 0.2099091175608326, "grad_norm": 3.5615944041553607, "learning_rate": 4.1969519343493555e-05, "loss": 5.895035743713379, "step": 358, "token_acc": 0.10641487918318648 }, { "epoch": 0.21049545587804164, "grad_norm": 5.845631819196494, "learning_rate": 4.2086752637749124e-05, "loss": 5.883098602294922, "step": 359, "token_acc": 0.10628197675940909 }, { "epoch": 0.21108179419525067, "grad_norm": 3.5360548107743766, "learning_rate": 4.2203985932004694e-05, "loss": 5.856866836547852, "step": 360, "token_acc": 0.10811878248745563 }, { "epoch": 0.2116681325124597, "grad_norm": 5.967455575364195, "learning_rate": 4.2321219226260263e-05, "loss": 5.933638572692871, "step": 361, "token_acc": 0.10281906474726657 }, { "epoch": 0.21225447082966872, "grad_norm": 4.229025443485074, "learning_rate": 4.243845252051583e-05, "loss": 5.913669109344482, "step": 362, "token_acc": 0.1050239128567077 }, { "epoch": 0.21284080914687775, "grad_norm": 4.529543652120965, "learning_rate": 4.2555685814771396e-05, "loss": 5.800420761108398, "step": 363, "token_acc": 0.11265826075653192 }, { "epoch": 0.21342714746408678, "grad_norm": 5.686450704195899, "learning_rate": 4.2672919109026965e-05, "loss": 5.866429328918457, "step": 364, "token_acc": 0.1057487212408623 }, { "epoch": 0.2140134857812958, "grad_norm": 4.077313786677668, "learning_rate": 4.2790152403282535e-05, "loss": 5.747271537780762, "step": 365, "token_acc": 0.11442014276304292 }, { "epoch": 0.21459982409850484, "grad_norm": 6.672966648822387, "learning_rate": 4.2907385697538104e-05, "loss": 5.730378150939941, "step": 366, "token_acc": 0.11439315034922191 }, { "epoch": 0.21518616241571387, "grad_norm": 2.7752563636797394, "learning_rate": 4.302461899179367e-05, "loss": 5.722157955169678, "step": 367, "token_acc": 0.11698321017378943 }, { "epoch": 0.2157725007329229, "grad_norm": 8.241006700989637, "learning_rate": 4.3141852286049237e-05, "loss": 5.77549934387207, "step": 368, "token_acc": 0.11451595990905626 }, { "epoch": 0.21635883905013192, "grad_norm": 5.269185513436011, "learning_rate": 4.3259085580304806e-05, "loss": 5.682419776916504, "step": 369, "token_acc": 0.12174918787329506 }, { "epoch": 0.21694517736734095, "grad_norm": 5.705536472809987, "learning_rate": 4.3376318874560376e-05, "loss": 5.659437656402588, "step": 370, "token_acc": 0.11823349019193148 }, { "epoch": 0.21753151568454998, "grad_norm": 5.457771189060109, "learning_rate": 4.3493552168815945e-05, "loss": 5.651723861694336, "step": 371, "token_acc": 0.12209207554199754 }, { "epoch": 0.218117854001759, "grad_norm": 3.1384572810976428, "learning_rate": 4.3610785463071515e-05, "loss": 5.68341064453125, "step": 372, "token_acc": 0.1172328908535052 }, { "epoch": 0.21870419231896804, "grad_norm": 6.773099153647383, "learning_rate": 4.3728018757327084e-05, "loss": 5.680211067199707, "step": 373, "token_acc": 0.1181388900922757 }, { "epoch": 0.21929053063617707, "grad_norm": 3.9603005154282895, "learning_rate": 4.3845252051582654e-05, "loss": 5.662923812866211, "step": 374, "token_acc": 0.12033459095852522 }, { "epoch": 0.2198768689533861, "grad_norm": 4.891664810590013, "learning_rate": 4.396248534583822e-05, "loss": 5.65479850769043, "step": 375, "token_acc": 0.11968197935379797 }, { "epoch": 0.22046320727059512, "grad_norm": 4.451815920987612, "learning_rate": 4.4079718640093786e-05, "loss": 5.569565773010254, "step": 376, "token_acc": 0.12573834146086366 }, { "epoch": 0.22104954558780415, "grad_norm": 3.6537013107000744, "learning_rate": 4.4196951934349355e-05, "loss": 5.645937919616699, "step": 377, "token_acc": 0.1212388384973406 }, { "epoch": 0.22163588390501318, "grad_norm": 3.8855429079124764, "learning_rate": 4.4314185228604925e-05, "loss": 5.52020263671875, "step": 378, "token_acc": 0.12892019050975298 }, { "epoch": 0.2222222222222222, "grad_norm": 5.587337565697956, "learning_rate": 4.4431418522860494e-05, "loss": 5.542252540588379, "step": 379, "token_acc": 0.13029775523505793 }, { "epoch": 0.22280856053943124, "grad_norm": 2.542651347601647, "learning_rate": 4.454865181711606e-05, "loss": 5.49782657623291, "step": 380, "token_acc": 0.12988392743879235 }, { "epoch": 0.2233948988566403, "grad_norm": 7.035213414596234, "learning_rate": 4.4665885111371633e-05, "loss": 5.471724987030029, "step": 381, "token_acc": 0.13173882035975593 }, { "epoch": 0.22398123717384932, "grad_norm": 4.007597974750773, "learning_rate": 4.47831184056272e-05, "loss": 5.421903610229492, "step": 382, "token_acc": 0.1353257786024259 }, { "epoch": 0.22456757549105835, "grad_norm": 6.123846328762211, "learning_rate": 4.490035169988277e-05, "loss": 5.4561638832092285, "step": 383, "token_acc": 0.13245959184311193 }, { "epoch": 0.22515391380826738, "grad_norm": 3.915813309836124, "learning_rate": 4.501758499413834e-05, "loss": 5.427371978759766, "step": 384, "token_acc": 0.13743677567931242 }, { "epoch": 0.2257402521254764, "grad_norm": 5.051389456308207, "learning_rate": 4.5134818288393905e-05, "loss": 5.4404191970825195, "step": 385, "token_acc": 0.13028250727540924 }, { "epoch": 0.22632659044268544, "grad_norm": 3.7147143922772434, "learning_rate": 4.5252051582649474e-05, "loss": 5.433538436889648, "step": 386, "token_acc": 0.1338136581432912 }, { "epoch": 0.22691292875989447, "grad_norm": 3.842044243055899, "learning_rate": 4.5369284876905044e-05, "loss": 5.426792144775391, "step": 387, "token_acc": 0.13355265071679231 }, { "epoch": 0.2274992670771035, "grad_norm": 4.494266776847402, "learning_rate": 4.548651817116061e-05, "loss": 5.399279594421387, "step": 388, "token_acc": 0.13653473341978656 }, { "epoch": 0.22808560539431252, "grad_norm": 4.0793197936546415, "learning_rate": 4.5603751465416176e-05, "loss": 5.424678325653076, "step": 389, "token_acc": 0.13530543056793912 }, { "epoch": 0.22867194371152155, "grad_norm": 4.937034919559551, "learning_rate": 4.5720984759671746e-05, "loss": 5.320995330810547, "step": 390, "token_acc": 0.1419976913509804 }, { "epoch": 0.22925828202873058, "grad_norm": 5.49299598158518, "learning_rate": 4.5838218053927315e-05, "loss": 5.3018798828125, "step": 391, "token_acc": 0.14444821839035063 }, { "epoch": 0.2298446203459396, "grad_norm": 3.8325353825306303, "learning_rate": 4.5955451348182885e-05, "loss": 5.3399224281311035, "step": 392, "token_acc": 0.14038277289548506 }, { "epoch": 0.23043095866314864, "grad_norm": 5.20437433350758, "learning_rate": 4.6072684642438454e-05, "loss": 5.305937767028809, "step": 393, "token_acc": 0.14236234978409376 }, { "epoch": 0.23101729698035767, "grad_norm": 3.728657536120107, "learning_rate": 4.6189917936694024e-05, "loss": 5.201369285583496, "step": 394, "token_acc": 0.1493447073297205 }, { "epoch": 0.2316036352975667, "grad_norm": 2.9344606334239405, "learning_rate": 4.630715123094959e-05, "loss": 5.297689437866211, "step": 395, "token_acc": 0.14088288217292513 }, { "epoch": 0.23218997361477572, "grad_norm": 5.086839632347553, "learning_rate": 4.642438452520516e-05, "loss": 5.243603706359863, "step": 396, "token_acc": 0.14902199223803364 }, { "epoch": 0.23277631193198475, "grad_norm": 3.210427720308006, "learning_rate": 4.654161781946073e-05, "loss": 5.270680904388428, "step": 397, "token_acc": 0.14516630888662863 }, { "epoch": 0.23336265024919378, "grad_norm": 4.778326085569365, "learning_rate": 4.6658851113716295e-05, "loss": 5.325577735900879, "step": 398, "token_acc": 0.13558950291864524 }, { "epoch": 0.2339489885664028, "grad_norm": 3.6557315264797308, "learning_rate": 4.6776084407971864e-05, "loss": 5.205186367034912, "step": 399, "token_acc": 0.14969502934116122 }, { "epoch": 0.23453532688361184, "grad_norm": 5.398471352403463, "learning_rate": 4.6893317702227434e-05, "loss": 5.219882011413574, "step": 400, "token_acc": 0.14776963416945726 }, { "epoch": 0.23512166520082087, "grad_norm": 3.5376565697782043, "learning_rate": 4.7010550996483003e-05, "loss": 5.163187026977539, "step": 401, "token_acc": 0.15280870318055817 }, { "epoch": 0.2357080035180299, "grad_norm": 6.476770641315054, "learning_rate": 4.712778429073857e-05, "loss": 5.174548149108887, "step": 402, "token_acc": 0.14979405187777747 }, { "epoch": 0.23629434183523892, "grad_norm": 4.423286572106772, "learning_rate": 4.7245017584994136e-05, "loss": 5.136268138885498, "step": 403, "token_acc": 0.15266173311052758 }, { "epoch": 0.23688068015244795, "grad_norm": 4.778090928904879, "learning_rate": 4.736225087924971e-05, "loss": 5.147398471832275, "step": 404, "token_acc": 0.15271420087556561 }, { "epoch": 0.23746701846965698, "grad_norm": 3.827407600350183, "learning_rate": 4.747948417350528e-05, "loss": 5.075559616088867, "step": 405, "token_acc": 0.15962479565608195 }, { "epoch": 0.238053356786866, "grad_norm": 3.0031702278636634, "learning_rate": 4.759671746776085e-05, "loss": 5.111988067626953, "step": 406, "token_acc": 0.15723925391241372 }, { "epoch": 0.23863969510407504, "grad_norm": 5.420472709231712, "learning_rate": 4.7713950762016414e-05, "loss": 5.096987724304199, "step": 407, "token_acc": 0.1567969515346462 }, { "epoch": 0.23922603342128407, "grad_norm": 5.449500272978671, "learning_rate": 4.783118405627198e-05, "loss": 5.149758338928223, "step": 408, "token_acc": 0.1505394283172061 }, { "epoch": 0.23981237173849312, "grad_norm": 4.608375821577883, "learning_rate": 4.794841735052755e-05, "loss": 5.046197891235352, "step": 409, "token_acc": 0.15915106813062027 }, { "epoch": 0.24039871005570215, "grad_norm": 3.4576392609912467, "learning_rate": 4.806565064478312e-05, "loss": 5.0519208908081055, "step": 410, "token_acc": 0.16108123017514825 }, { "epoch": 0.24098504837291118, "grad_norm": 4.179272701853546, "learning_rate": 4.8182883939038685e-05, "loss": 4.946943283081055, "step": 411, "token_acc": 0.16871528649976275 }, { "epoch": 0.2415713866901202, "grad_norm": 3.675872246847288, "learning_rate": 4.8300117233294255e-05, "loss": 5.007920265197754, "step": 412, "token_acc": 0.16317211767851533 }, { "epoch": 0.24215772500732924, "grad_norm": 4.9142371455491975, "learning_rate": 4.8417350527549824e-05, "loss": 5.013101577758789, "step": 413, "token_acc": 0.15803190248750165 }, { "epoch": 0.24274406332453827, "grad_norm": 3.1770908201582193, "learning_rate": 4.8534583821805394e-05, "loss": 4.974532604217529, "step": 414, "token_acc": 0.16620475884176283 }, { "epoch": 0.2433304016417473, "grad_norm": 5.089356478874053, "learning_rate": 4.865181711606096e-05, "loss": 5.017539978027344, "step": 415, "token_acc": 0.15953471249268353 }, { "epoch": 0.24391673995895632, "grad_norm": 3.0222424743635856, "learning_rate": 4.876905041031653e-05, "loss": 5.045172691345215, "step": 416, "token_acc": 0.1563271753528109 }, { "epoch": 0.24450307827616535, "grad_norm": 4.265443053126318, "learning_rate": 4.88862837045721e-05, "loss": 4.937203407287598, "step": 417, "token_acc": 0.16491518885726497 }, { "epoch": 0.24508941659337438, "grad_norm": 3.310746147527371, "learning_rate": 4.900351699882767e-05, "loss": 4.929368019104004, "step": 418, "token_acc": 0.16673037442441518 }, { "epoch": 0.2456757549105834, "grad_norm": 3.9085209966297474, "learning_rate": 4.912075029308324e-05, "loss": 4.954566955566406, "step": 419, "token_acc": 0.163182649935057 }, { "epoch": 0.24626209322779244, "grad_norm": 5.4866721056533585, "learning_rate": 4.9237983587338804e-05, "loss": 4.969094276428223, "step": 420, "token_acc": 0.15997041320646874 }, { "epoch": 0.24684843154500147, "grad_norm": 3.3752213125571915, "learning_rate": 4.9355216881594373e-05, "loss": 4.968939304351807, "step": 421, "token_acc": 0.1638807205676675 }, { "epoch": 0.2474347698622105, "grad_norm": 4.646425434911336, "learning_rate": 4.947245017584994e-05, "loss": 4.950305938720703, "step": 422, "token_acc": 0.16355955125645616 }, { "epoch": 0.24802110817941952, "grad_norm": 3.8480374814367377, "learning_rate": 4.958968347010551e-05, "loss": 4.9508562088012695, "step": 423, "token_acc": 0.16265855959744208 }, { "epoch": 0.24860744649662855, "grad_norm": 2.7654982744712124, "learning_rate": 4.970691676436108e-05, "loss": 4.805039405822754, "step": 424, "token_acc": 0.1760382252263728 }, { "epoch": 0.24919378481383758, "grad_norm": 4.423193541228044, "learning_rate": 4.9824150058616645e-05, "loss": 4.82535457611084, "step": 425, "token_acc": 0.17515355442282757 }, { "epoch": 0.2497801231310466, "grad_norm": 4.61103093354574, "learning_rate": 4.9941383352872214e-05, "loss": 4.852334499359131, "step": 426, "token_acc": 0.1716508990275111 }, { "epoch": 0.25036646144825564, "grad_norm": 3.5509009977431463, "learning_rate": 5.005861664712779e-05, "loss": 4.856379508972168, "step": 427, "token_acc": 0.17005668037198926 }, { "epoch": 0.2509527997654647, "grad_norm": 3.2601719431377774, "learning_rate": 5.017584994138336e-05, "loss": 4.8676605224609375, "step": 428, "token_acc": 0.16826130775559006 }, { "epoch": 0.2515391380826737, "grad_norm": 4.232915465436352, "learning_rate": 5.029308323563893e-05, "loss": 4.851711273193359, "step": 429, "token_acc": 0.17420452430952443 }, { "epoch": 0.25212547639988275, "grad_norm": 3.884975874428379, "learning_rate": 5.04103165298945e-05, "loss": 4.870162487030029, "step": 430, "token_acc": 0.1702800798191851 }, { "epoch": 0.25271181471709175, "grad_norm": 4.682919672338841, "learning_rate": 5.0527549824150055e-05, "loss": 4.756889343261719, "step": 431, "token_acc": 0.18132139178353895 }, { "epoch": 0.2532981530343008, "grad_norm": 3.3294638413452087, "learning_rate": 5.0644783118405625e-05, "loss": 4.807443618774414, "step": 432, "token_acc": 0.1746914565177199 }, { "epoch": 0.2538844913515098, "grad_norm": 4.029265109002957, "learning_rate": 5.0762016412661194e-05, "loss": 4.8091630935668945, "step": 433, "token_acc": 0.17244274906875232 }, { "epoch": 0.25447082966871887, "grad_norm": 4.566084594423671, "learning_rate": 5.0879249706916764e-05, "loss": 4.794814586639404, "step": 434, "token_acc": 0.17268670627378901 }, { "epoch": 0.25505716798592787, "grad_norm": 2.671062649057688, "learning_rate": 5.099648300117233e-05, "loss": 4.72795295715332, "step": 435, "token_acc": 0.17818418092444874 }, { "epoch": 0.2556435063031369, "grad_norm": 4.478555987155282, "learning_rate": 5.11137162954279e-05, "loss": 4.773293495178223, "step": 436, "token_acc": 0.1773602397430093 }, { "epoch": 0.2562298446203459, "grad_norm": 2.657925898560469, "learning_rate": 5.123094958968347e-05, "loss": 4.810330867767334, "step": 437, "token_acc": 0.1714175287751882 }, { "epoch": 0.256816182937555, "grad_norm": 6.023820501185185, "learning_rate": 5.134818288393904e-05, "loss": 4.823758125305176, "step": 438, "token_acc": 0.17141139311959966 }, { "epoch": 0.257402521254764, "grad_norm": 3.3973305175625237, "learning_rate": 5.146541617819461e-05, "loss": 4.7239603996276855, "step": 439, "token_acc": 0.18132137159824996 }, { "epoch": 0.25798885957197304, "grad_norm": 5.365086437028126, "learning_rate": 5.1582649472450174e-05, "loss": 4.755255222320557, "step": 440, "token_acc": 0.17837833469374562 }, { "epoch": 0.25857519788918204, "grad_norm": 4.070008151234846, "learning_rate": 5.1699882766705743e-05, "loss": 4.7630934715271, "step": 441, "token_acc": 0.17507118100162025 }, { "epoch": 0.2591615362063911, "grad_norm": 4.015443808366917, "learning_rate": 5.181711606096131e-05, "loss": 4.7503342628479, "step": 442, "token_acc": 0.1794966939295083 }, { "epoch": 0.2597478745236001, "grad_norm": 3.7405364322711496, "learning_rate": 5.193434935521688e-05, "loss": 4.753812313079834, "step": 443, "token_acc": 0.17403056114921983 }, { "epoch": 0.26033421284080915, "grad_norm": 4.255768332015965, "learning_rate": 5.205158264947245e-05, "loss": 4.750164031982422, "step": 444, "token_acc": 0.17661911771639313 }, { "epoch": 0.26092055115801815, "grad_norm": 3.344182570884632, "learning_rate": 5.216881594372802e-05, "loss": 4.712779998779297, "step": 445, "token_acc": 0.17827867186589672 }, { "epoch": 0.2615068894752272, "grad_norm": 3.2942120060425526, "learning_rate": 5.228604923798359e-05, "loss": 4.713029861450195, "step": 446, "token_acc": 0.17849801394105191 }, { "epoch": 0.2620932277924362, "grad_norm": 3.631913296407001, "learning_rate": 5.240328253223916e-05, "loss": 4.672717094421387, "step": 447, "token_acc": 0.1823320681065526 }, { "epoch": 0.26267956610964527, "grad_norm": 3.5917764041423896, "learning_rate": 5.252051582649473e-05, "loss": 4.751844882965088, "step": 448, "token_acc": 0.17539330716575505 }, { "epoch": 0.26326590442685427, "grad_norm": 4.66199453953803, "learning_rate": 5.263774912075029e-05, "loss": 4.6465606689453125, "step": 449, "token_acc": 0.18647601638893072 }, { "epoch": 0.2638522427440633, "grad_norm": 3.110033847968872, "learning_rate": 5.275498241500586e-05, "loss": 4.732669830322266, "step": 450, "token_acc": 0.17615747478164193 }, { "epoch": 0.2644385810612723, "grad_norm": 6.136425267770306, "learning_rate": 5.287221570926143e-05, "loss": 4.752108573913574, "step": 451, "token_acc": 0.17556507392085308 }, { "epoch": 0.2650249193784814, "grad_norm": 2.454061213324948, "learning_rate": 5.2989449003517e-05, "loss": 4.7041215896606445, "step": 452, "token_acc": 0.17846731011200684 }, { "epoch": 0.26561125769569044, "grad_norm": 5.095585691439736, "learning_rate": 5.310668229777257e-05, "loss": 4.647067070007324, "step": 453, "token_acc": 0.18367408715393832 }, { "epoch": 0.26619759601289944, "grad_norm": 3.912351219840878, "learning_rate": 5.322391559202814e-05, "loss": 4.601974010467529, "step": 454, "token_acc": 0.18942634666886832 }, { "epoch": 0.2667839343301085, "grad_norm": 3.1909408395324905, "learning_rate": 5.334114888628371e-05, "loss": 4.691964149475098, "step": 455, "token_acc": 0.17834420158649184 }, { "epoch": 0.2673702726473175, "grad_norm": 3.6206275155656824, "learning_rate": 5.345838218053928e-05, "loss": 4.594367027282715, "step": 456, "token_acc": 0.18779055573401862 }, { "epoch": 0.26795661096452655, "grad_norm": 3.0073995915152962, "learning_rate": 5.357561547479485e-05, "loss": 4.679717063903809, "step": 457, "token_acc": 0.18085086832674813 }, { "epoch": 0.26854294928173555, "grad_norm": 3.7129917382547335, "learning_rate": 5.3692848769050405e-05, "loss": 4.6553473472595215, "step": 458, "token_acc": 0.1805312309849467 }, { "epoch": 0.2691292875989446, "grad_norm": 2.7666424999717423, "learning_rate": 5.3810082063305974e-05, "loss": 4.669938087463379, "step": 459, "token_acc": 0.1797411519543521 }, { "epoch": 0.2697156259161536, "grad_norm": 3.4278824474403264, "learning_rate": 5.392731535756155e-05, "loss": 4.65788459777832, "step": 460, "token_acc": 0.17988725461707755 }, { "epoch": 0.27030196423336267, "grad_norm": 3.8472485397278873, "learning_rate": 5.404454865181712e-05, "loss": 4.689602851867676, "step": 461, "token_acc": 0.17704811200316223 }, { "epoch": 0.27088830255057167, "grad_norm": 2.823584758680837, "learning_rate": 5.416178194607269e-05, "loss": 4.56749153137207, "step": 462, "token_acc": 0.1920817936233539 }, { "epoch": 0.2714746408677807, "grad_norm": 3.6490140941426192, "learning_rate": 5.427901524032826e-05, "loss": 4.528097152709961, "step": 463, "token_acc": 0.19415251296067332 }, { "epoch": 0.2720609791849897, "grad_norm": 4.7132989375231755, "learning_rate": 5.439624853458383e-05, "loss": 4.581571578979492, "step": 464, "token_acc": 0.18678572372070906 }, { "epoch": 0.2726473175021988, "grad_norm": 2.755636297750266, "learning_rate": 5.45134818288394e-05, "loss": 4.650444984436035, "step": 465, "token_acc": 0.17997130267645373 }, { "epoch": 0.2732336558194078, "grad_norm": 4.397395293628881, "learning_rate": 5.463071512309497e-05, "loss": 4.568827152252197, "step": 466, "token_acc": 0.1866361042097733 }, { "epoch": 0.27381999413661684, "grad_norm": 2.986730148647632, "learning_rate": 5.4747948417350524e-05, "loss": 4.6009202003479, "step": 467, "token_acc": 0.18425193276548105 }, { "epoch": 0.27440633245382584, "grad_norm": 4.699682073573299, "learning_rate": 5.486518171160609e-05, "loss": 4.602838516235352, "step": 468, "token_acc": 0.1868637541790316 }, { "epoch": 0.2749926707710349, "grad_norm": 2.8796390464984034, "learning_rate": 5.498241500586166e-05, "loss": 4.545544624328613, "step": 469, "token_acc": 0.18741544055470996 }, { "epoch": 0.2755790090882439, "grad_norm": 5.3082079793418115, "learning_rate": 5.509964830011723e-05, "loss": 4.601395130157471, "step": 470, "token_acc": 0.18333350748140098 }, { "epoch": 0.27616534740545295, "grad_norm": 3.121777272541811, "learning_rate": 5.52168815943728e-05, "loss": 4.559847354888916, "step": 471, "token_acc": 0.19044785427520886 }, { "epoch": 0.27675168572266196, "grad_norm": 3.5974998778561282, "learning_rate": 5.533411488862837e-05, "loss": 4.589479446411133, "step": 472, "token_acc": 0.18454644037680137 }, { "epoch": 0.277338024039871, "grad_norm": 3.7081488343461184, "learning_rate": 5.545134818288395e-05, "loss": 4.587972640991211, "step": 473, "token_acc": 0.1865380705053056 }, { "epoch": 0.27792436235708, "grad_norm": 3.132876670561137, "learning_rate": 5.556858147713952e-05, "loss": 4.512928485870361, "step": 474, "token_acc": 0.19116964313349222 }, { "epoch": 0.27851070067428907, "grad_norm": 4.563254403654539, "learning_rate": 5.568581477139509e-05, "loss": 4.464240074157715, "step": 475, "token_acc": 0.1967592958287914 }, { "epoch": 0.27909703899149807, "grad_norm": 2.531606706540315, "learning_rate": 5.580304806565064e-05, "loss": 4.568399429321289, "step": 476, "token_acc": 0.18655219884448201 }, { "epoch": 0.2796833773087071, "grad_norm": 3.246063240066807, "learning_rate": 5.592028135990621e-05, "loss": 4.511410236358643, "step": 477, "token_acc": 0.1905886843449062 }, { "epoch": 0.2802697156259161, "grad_norm": 2.9008077674361146, "learning_rate": 5.603751465416178e-05, "loss": 4.5342512130737305, "step": 478, "token_acc": 0.1906546070687063 }, { "epoch": 0.2808560539431252, "grad_norm": 4.482368078854062, "learning_rate": 5.615474794841735e-05, "loss": 4.595100402832031, "step": 479, "token_acc": 0.18227198923855414 }, { "epoch": 0.28144239226033424, "grad_norm": 2.8591624906446964, "learning_rate": 5.627198124267292e-05, "loss": 4.464855194091797, "step": 480, "token_acc": 0.19437381590122166 }, { "epoch": 0.28202873057754324, "grad_norm": 3.732394683730308, "learning_rate": 5.638921453692849e-05, "loss": 4.487558364868164, "step": 481, "token_acc": 0.19142626325801995 }, { "epoch": 0.2826150688947523, "grad_norm": 3.0738507311070222, "learning_rate": 5.650644783118406e-05, "loss": 4.506575107574463, "step": 482, "token_acc": 0.18991745469496188 }, { "epoch": 0.2832014072119613, "grad_norm": 2.640228879884843, "learning_rate": 5.662368112543963e-05, "loss": 4.508360385894775, "step": 483, "token_acc": 0.1901190896678247 }, { "epoch": 0.28378774552917035, "grad_norm": 4.1319681597755284, "learning_rate": 5.67409144196952e-05, "loss": 4.513545036315918, "step": 484, "token_acc": 0.1895675356064771 }, { "epoch": 0.28437408384637936, "grad_norm": 1.9575377377133154, "learning_rate": 5.685814771395076e-05, "loss": 4.48956298828125, "step": 485, "token_acc": 0.19050407965148464 }, { "epoch": 0.2849604221635884, "grad_norm": 4.50932084457957, "learning_rate": 5.697538100820633e-05, "loss": 4.452964782714844, "step": 486, "token_acc": 0.1954853463892806 }, { "epoch": 0.2855467604807974, "grad_norm": 2.9427026965492, "learning_rate": 5.70926143024619e-05, "loss": 4.47849178314209, "step": 487, "token_acc": 0.19249258797281396 }, { "epoch": 0.28613309879800647, "grad_norm": 3.480974505558243, "learning_rate": 5.720984759671747e-05, "loss": 4.5085368156433105, "step": 488, "token_acc": 0.19087664413766048 }, { "epoch": 0.28671943711521547, "grad_norm": 3.7232006890841616, "learning_rate": 5.732708089097304e-05, "loss": 4.407660484313965, "step": 489, "token_acc": 0.1992382192647413 }, { "epoch": 0.2873057754324245, "grad_norm": 2.7136169159354, "learning_rate": 5.744431418522861e-05, "loss": 4.480723857879639, "step": 490, "token_acc": 0.19082514871551567 }, { "epoch": 0.2878921137496335, "grad_norm": 2.9036343074174873, "learning_rate": 5.756154747948418e-05, "loss": 4.38192892074585, "step": 491, "token_acc": 0.20023494677312043 }, { "epoch": 0.2884784520668426, "grad_norm": 3.5612553025697067, "learning_rate": 5.767878077373975e-05, "loss": 4.463859558105469, "step": 492, "token_acc": 0.19478429944158673 }, { "epoch": 0.2890647903840516, "grad_norm": 4.205181719826563, "learning_rate": 5.779601406799532e-05, "loss": 4.446661472320557, "step": 493, "token_acc": 0.19458880257165817 }, { "epoch": 0.28965112870126064, "grad_norm": 3.5094414677268015, "learning_rate": 5.791324736225088e-05, "loss": 4.450451850891113, "step": 494, "token_acc": 0.19391485323613777 }, { "epoch": 0.29023746701846964, "grad_norm": 3.34698573124997, "learning_rate": 5.803048065650645e-05, "loss": 4.456144332885742, "step": 495, "token_acc": 0.19304447214220327 }, { "epoch": 0.2908238053356787, "grad_norm": 2.753792317524598, "learning_rate": 5.814771395076202e-05, "loss": 4.411314010620117, "step": 496, "token_acc": 0.19554556279769367 }, { "epoch": 0.2914101436528877, "grad_norm": 3.8606605865456904, "learning_rate": 5.826494724501759e-05, "loss": 4.470166206359863, "step": 497, "token_acc": 0.1917171439892062 }, { "epoch": 0.29199648197009676, "grad_norm": 2.455948618551917, "learning_rate": 5.838218053927316e-05, "loss": 4.377389430999756, "step": 498, "token_acc": 0.19982438861704732 }, { "epoch": 0.29258282028730576, "grad_norm": 5.343417119635586, "learning_rate": 5.849941383352873e-05, "loss": 4.406900882720947, "step": 499, "token_acc": 0.1980844031385122 }, { "epoch": 0.2931691586045148, "grad_norm": 2.9239887603882244, "learning_rate": 5.86166471277843e-05, "loss": 4.51575231552124, "step": 500, "token_acc": 0.1866204138401301 }, { "epoch": 0.2937554969217238, "grad_norm": 2.800433513644758, "learning_rate": 5.873388042203987e-05, "loss": 4.3633198738098145, "step": 501, "token_acc": 0.20142863367361846 }, { "epoch": 0.29434183523893287, "grad_norm": 3.828413682092989, "learning_rate": 5.8851113716295437e-05, "loss": 4.386670112609863, "step": 502, "token_acc": 0.20040446044356172 }, { "epoch": 0.29492817355614187, "grad_norm": 2.8804133009076853, "learning_rate": 5.896834701055099e-05, "loss": 4.323336601257324, "step": 503, "token_acc": 0.20636026312722425 }, { "epoch": 0.2955145118733509, "grad_norm": 2.643471056930714, "learning_rate": 5.908558030480656e-05, "loss": 4.417023658752441, "step": 504, "token_acc": 0.19429314731992475 }, { "epoch": 0.2961008501905599, "grad_norm": 3.369462429534447, "learning_rate": 5.920281359906213e-05, "loss": 4.443114757537842, "step": 505, "token_acc": 0.19048538440746032 }, { "epoch": 0.296687188507769, "grad_norm": 3.7701041467513283, "learning_rate": 5.932004689331771e-05, "loss": 4.392706394195557, "step": 506, "token_acc": 0.19715420061212915 }, { "epoch": 0.297273526824978, "grad_norm": 2.4770220668655187, "learning_rate": 5.943728018757328e-05, "loss": 4.408752918243408, "step": 507, "token_acc": 0.19628239302219208 }, { "epoch": 0.29785986514218704, "grad_norm": 3.7387126188839277, "learning_rate": 5.955451348182885e-05, "loss": 4.347857475280762, "step": 508, "token_acc": 0.20171198176137803 }, { "epoch": 0.2984462034593961, "grad_norm": 3.23206754272277, "learning_rate": 5.9671746776084416e-05, "loss": 4.395596981048584, "step": 509, "token_acc": 0.19687374442765032 }, { "epoch": 0.2990325417766051, "grad_norm": 3.358368391772889, "learning_rate": 5.9788980070339986e-05, "loss": 4.325115203857422, "step": 510, "token_acc": 0.2025297587359724 }, { "epoch": 0.29961888009381415, "grad_norm": 2.6976862248522515, "learning_rate": 5.990621336459554e-05, "loss": 4.369183540344238, "step": 511, "token_acc": 0.19953872324438277 }, { "epoch": 0.30020521841102316, "grad_norm": 2.4095643340089823, "learning_rate": 6.002344665885111e-05, "loss": 4.426271438598633, "step": 512, "token_acc": 0.19135800869278996 }, { "epoch": 0.3007915567282322, "grad_norm": 3.8703607332517675, "learning_rate": 6.014067995310668e-05, "loss": 4.4024763107299805, "step": 513, "token_acc": 0.1927203055160008 }, { "epoch": 0.3013778950454412, "grad_norm": 1.6644479259440794, "learning_rate": 6.025791324736225e-05, "loss": 4.343623161315918, "step": 514, "token_acc": 0.19906187400591813 }, { "epoch": 0.30196423336265027, "grad_norm": 3.480729105453486, "learning_rate": 6.037514654161782e-05, "loss": 4.367980480194092, "step": 515, "token_acc": 0.1987372864490044 }, { "epoch": 0.30255057167985927, "grad_norm": 2.6814707008914342, "learning_rate": 6.049237983587339e-05, "loss": 4.429714679718018, "step": 516, "token_acc": 0.19339287208232242 }, { "epoch": 0.3031369099970683, "grad_norm": 2.785312769386459, "learning_rate": 6.060961313012896e-05, "loss": 4.3388566970825195, "step": 517, "token_acc": 0.20033506676621549 }, { "epoch": 0.3037232483142773, "grad_norm": 2.495024892629932, "learning_rate": 6.072684642438453e-05, "loss": 4.331653594970703, "step": 518, "token_acc": 0.20044875808808182 }, { "epoch": 0.3043095866314864, "grad_norm": 3.4708620091733438, "learning_rate": 6.08440797186401e-05, "loss": 4.390961647033691, "step": 519, "token_acc": 0.1969736034213751 }, { "epoch": 0.3048959249486954, "grad_norm": 3.033005292285393, "learning_rate": 6.096131301289566e-05, "loss": 4.349078178405762, "step": 520, "token_acc": 0.19735757707282164 }, { "epoch": 0.30548226326590444, "grad_norm": 2.5937866371977747, "learning_rate": 6.107854630715122e-05, "loss": 4.394092559814453, "step": 521, "token_acc": 0.1937719465109225 }, { "epoch": 0.30606860158311344, "grad_norm": 3.1840985236302886, "learning_rate": 6.11957796014068e-05, "loss": 4.303842544555664, "step": 522, "token_acc": 0.20364115984287895 }, { "epoch": 0.3066549399003225, "grad_norm": 3.1289547158734514, "learning_rate": 6.131301289566238e-05, "loss": 4.325160503387451, "step": 523, "token_acc": 0.20073300492610838 }, { "epoch": 0.3072412782175315, "grad_norm": 3.0988410176487204, "learning_rate": 6.143024618991794e-05, "loss": 4.369857311248779, "step": 524, "token_acc": 0.1947928981836714 }, { "epoch": 0.30782761653474056, "grad_norm": 2.41363271069774, "learning_rate": 6.154747948417352e-05, "loss": 4.2893781661987305, "step": 525, "token_acc": 0.20386245102430636 }, { "epoch": 0.30841395485194956, "grad_norm": 3.4778544856462688, "learning_rate": 6.166471277842908e-05, "loss": 4.30779504776001, "step": 526, "token_acc": 0.2030679195216372 }, { "epoch": 0.3090002931691586, "grad_norm": 2.251000694494349, "learning_rate": 6.178194607268465e-05, "loss": 4.313277244567871, "step": 527, "token_acc": 0.20145233757064016 }, { "epoch": 0.3095866314863676, "grad_norm": 3.6065247266884075, "learning_rate": 6.189917936694022e-05, "loss": 4.308966159820557, "step": 528, "token_acc": 0.2023708563351735 }, { "epoch": 0.31017296980357667, "grad_norm": 2.961381538778253, "learning_rate": 6.201641266119578e-05, "loss": 4.340670108795166, "step": 529, "token_acc": 0.1994566223499258 }, { "epoch": 0.31075930812078567, "grad_norm": 3.125363991499523, "learning_rate": 6.213364595545134e-05, "loss": 4.285861015319824, "step": 530, "token_acc": 0.20383868029825172 }, { "epoch": 0.3113456464379947, "grad_norm": 2.289866650220009, "learning_rate": 6.225087924970692e-05, "loss": 4.2917022705078125, "step": 531, "token_acc": 0.20222967263608363 }, { "epoch": 0.31193198475520373, "grad_norm": 3.382631909872567, "learning_rate": 6.236811254396248e-05, "loss": 4.259230136871338, "step": 532, "token_acc": 0.2058734019101656 }, { "epoch": 0.3125183230724128, "grad_norm": 3.110635914721146, "learning_rate": 6.248534583821806e-05, "loss": 4.2336039543151855, "step": 533, "token_acc": 0.20849226414753255 }, { "epoch": 0.3131046613896218, "grad_norm": 3.0024036914041328, "learning_rate": 6.260257913247362e-05, "loss": 4.2339277267456055, "step": 534, "token_acc": 0.20773492774766358 }, { "epoch": 0.31369099970683084, "grad_norm": 2.4222094918046535, "learning_rate": 6.27198124267292e-05, "loss": 4.2574992179870605, "step": 535, "token_acc": 0.20562153012503526 }, { "epoch": 0.3142773380240399, "grad_norm": 3.3072388970515414, "learning_rate": 6.283704572098477e-05, "loss": 4.30474853515625, "step": 536, "token_acc": 0.2000345180293251 }, { "epoch": 0.3148636763412489, "grad_norm": 2.4305479203971174, "learning_rate": 6.295427901524034e-05, "loss": 4.312208652496338, "step": 537, "token_acc": 0.1995582754336723 }, { "epoch": 0.31545001465845796, "grad_norm": 2.9078070087946886, "learning_rate": 6.30715123094959e-05, "loss": 4.245410919189453, "step": 538, "token_acc": 0.20610069637343187 }, { "epoch": 0.31603635297566696, "grad_norm": 3.0994651985418256, "learning_rate": 6.318874560375146e-05, "loss": 4.257229804992676, "step": 539, "token_acc": 0.20460338566165456 }, { "epoch": 0.316622691292876, "grad_norm": 2.3054571015684946, "learning_rate": 6.330597889800704e-05, "loss": 4.27479362487793, "step": 540, "token_acc": 0.2029582760249069 }, { "epoch": 0.317209029610085, "grad_norm": 3.8815160826069444, "learning_rate": 6.34232121922626e-05, "loss": 4.2639007568359375, "step": 541, "token_acc": 0.20444951698603656 }, { "epoch": 0.31779536792729407, "grad_norm": 2.1634279733391244, "learning_rate": 6.354044548651818e-05, "loss": 4.313498497009277, "step": 542, "token_acc": 0.20060012214875594 }, { "epoch": 0.31838170624450307, "grad_norm": 3.305437100668334, "learning_rate": 6.365767878077374e-05, "loss": 4.308589935302734, "step": 543, "token_acc": 0.1977927693446581 }, { "epoch": 0.3189680445617121, "grad_norm": 3.025695879328745, "learning_rate": 6.377491207502932e-05, "loss": 4.358048439025879, "step": 544, "token_acc": 0.19489793545686834 }, { "epoch": 0.31955438287892113, "grad_norm": 3.12511711141683, "learning_rate": 6.389214536928488e-05, "loss": 4.206076622009277, "step": 545, "token_acc": 0.21111521694751448 }, { "epoch": 0.3201407211961302, "grad_norm": 2.0898254045131877, "learning_rate": 6.400937866354045e-05, "loss": 4.251198768615723, "step": 546, "token_acc": 0.20295281799774112 }, { "epoch": 0.3207270595133392, "grad_norm": 2.364299326785073, "learning_rate": 6.412661195779602e-05, "loss": 4.267653942108154, "step": 547, "token_acc": 0.20484509545151638 }, { "epoch": 0.32131339783054824, "grad_norm": 2.9441084483992395, "learning_rate": 6.424384525205158e-05, "loss": 4.3124775886535645, "step": 548, "token_acc": 0.19838106032508473 }, { "epoch": 0.32189973614775724, "grad_norm": 3.449394281505019, "learning_rate": 6.436107854630716e-05, "loss": 4.265442371368408, "step": 549, "token_acc": 0.20195000544906644 }, { "epoch": 0.3224860744649663, "grad_norm": 2.5815111328172615, "learning_rate": 6.447831184056272e-05, "loss": 4.201301097869873, "step": 550, "token_acc": 0.20851545146530456 }, { "epoch": 0.3230724127821753, "grad_norm": 3.2272637224586096, "learning_rate": 6.45955451348183e-05, "loss": 4.259978771209717, "step": 551, "token_acc": 0.20282464020967703 }, { "epoch": 0.32365875109938436, "grad_norm": 2.0735665000713617, "learning_rate": 6.471277842907386e-05, "loss": 4.219204902648926, "step": 552, "token_acc": 0.20681528987982986 }, { "epoch": 0.32424508941659336, "grad_norm": 2.9399438736941934, "learning_rate": 6.483001172332943e-05, "loss": 4.272747993469238, "step": 553, "token_acc": 0.20200138337145113 }, { "epoch": 0.3248314277338024, "grad_norm": 2.4112188164337893, "learning_rate": 6.4947245017585e-05, "loss": 4.183073997497559, "step": 554, "token_acc": 0.2122227612683225 }, { "epoch": 0.3254177660510114, "grad_norm": 3.008468569567238, "learning_rate": 6.506447831184057e-05, "loss": 4.204877853393555, "step": 555, "token_acc": 0.21000351304153025 }, { "epoch": 0.32600410436822047, "grad_norm": 3.102461422246427, "learning_rate": 6.518171160609614e-05, "loss": 4.25691032409668, "step": 556, "token_acc": 0.20268615728654027 }, { "epoch": 0.32659044268542947, "grad_norm": 3.669523274302679, "learning_rate": 6.52989449003517e-05, "loss": 4.295387268066406, "step": 557, "token_acc": 0.19698025911524897 }, { "epoch": 0.32717678100263853, "grad_norm": 1.96765863474173, "learning_rate": 6.541617819460728e-05, "loss": 4.196666717529297, "step": 558, "token_acc": 0.20732685099124679 }, { "epoch": 0.32776311931984753, "grad_norm": 2.432340640743014, "learning_rate": 6.553341148886284e-05, "loss": 4.208634853363037, "step": 559, "token_acc": 0.20784644069711897 }, { "epoch": 0.3283494576370566, "grad_norm": 3.0206287169947728, "learning_rate": 6.565064478311841e-05, "loss": 4.251628875732422, "step": 560, "token_acc": 0.20332906600440653 }, { "epoch": 0.3289357959542656, "grad_norm": 2.116632864769787, "learning_rate": 6.576787807737398e-05, "loss": 4.215981483459473, "step": 561, "token_acc": 0.20722201670426776 }, { "epoch": 0.32952213427147464, "grad_norm": 3.2491172523454166, "learning_rate": 6.588511137162955e-05, "loss": 4.176368236541748, "step": 562, "token_acc": 0.2099354680968941 }, { "epoch": 0.33010847258868364, "grad_norm": 1.9983710024669683, "learning_rate": 6.600234466588512e-05, "loss": 4.156754970550537, "step": 563, "token_acc": 0.2149327449049561 }, { "epoch": 0.3306948109058927, "grad_norm": 2.8918187477077786, "learning_rate": 6.611957796014069e-05, "loss": 4.195796012878418, "step": 564, "token_acc": 0.20799567081102985 }, { "epoch": 0.33128114922310176, "grad_norm": 2.405068276555556, "learning_rate": 6.623681125439624e-05, "loss": 4.185660362243652, "step": 565, "token_acc": 0.2096683954273283 }, { "epoch": 0.33186748754031076, "grad_norm": 2.2574345534241917, "learning_rate": 6.635404454865182e-05, "loss": 4.200215816497803, "step": 566, "token_acc": 0.2059081648644413 }, { "epoch": 0.3324538258575198, "grad_norm": 2.8489403260176243, "learning_rate": 6.647127784290738e-05, "loss": 4.239872932434082, "step": 567, "token_acc": 0.20448478931461037 }, { "epoch": 0.3330401641747288, "grad_norm": 2.388464643582114, "learning_rate": 6.658851113716296e-05, "loss": 4.209165573120117, "step": 568, "token_acc": 0.20629989740598587 }, { "epoch": 0.33362650249193787, "grad_norm": 2.7440106681362613, "learning_rate": 6.670574443141853e-05, "loss": 4.1976447105407715, "step": 569, "token_acc": 0.2070436644140365 }, { "epoch": 0.33421284080914687, "grad_norm": 3.399737707685417, "learning_rate": 6.68229777256741e-05, "loss": 4.252682685852051, "step": 570, "token_acc": 0.20181014251032653 }, { "epoch": 0.33479917912635593, "grad_norm": 2.470896500135244, "learning_rate": 6.694021101992967e-05, "loss": 4.138561248779297, "step": 571, "token_acc": 0.21314833832699862 }, { "epoch": 0.33538551744356493, "grad_norm": 2.4140149900254064, "learning_rate": 6.705744431418523e-05, "loss": 4.170652389526367, "step": 572, "token_acc": 0.20969149388007258 }, { "epoch": 0.335971855760774, "grad_norm": 2.822389985905648, "learning_rate": 6.717467760844081e-05, "loss": 4.154342174530029, "step": 573, "token_acc": 0.21218587763162086 }, { "epoch": 0.336558194077983, "grad_norm": 3.222209285487207, "learning_rate": 6.729191090269636e-05, "loss": 4.208691596984863, "step": 574, "token_acc": 0.20521084935746625 }, { "epoch": 0.33714453239519204, "grad_norm": 2.5246955124082144, "learning_rate": 6.740914419695194e-05, "loss": 4.15161657333374, "step": 575, "token_acc": 0.21294184498742869 }, { "epoch": 0.33773087071240104, "grad_norm": 3.066062702228726, "learning_rate": 6.75263774912075e-05, "loss": 4.170627593994141, "step": 576, "token_acc": 0.2093553177326028 }, { "epoch": 0.3383172090296101, "grad_norm": 2.6538439988281746, "learning_rate": 6.764361078546308e-05, "loss": 4.179351806640625, "step": 577, "token_acc": 0.20783089318924616 }, { "epoch": 0.3389035473468191, "grad_norm": 2.4515128383597626, "learning_rate": 6.776084407971864e-05, "loss": 4.2190728187561035, "step": 578, "token_acc": 0.2043195899375451 }, { "epoch": 0.33948988566402816, "grad_norm": 2.419640052287422, "learning_rate": 6.787807737397421e-05, "loss": 4.163544654846191, "step": 579, "token_acc": 0.21081945719155631 }, { "epoch": 0.34007622398123716, "grad_norm": 2.298839773360609, "learning_rate": 6.799531066822978e-05, "loss": 4.110836029052734, "step": 580, "token_acc": 0.21495364823106514 }, { "epoch": 0.3406625622984462, "grad_norm": 3.3550963244004826, "learning_rate": 6.811254396248535e-05, "loss": 4.175626754760742, "step": 581, "token_acc": 0.20792943824519308 }, { "epoch": 0.3412489006156552, "grad_norm": 2.0308058243557907, "learning_rate": 6.822977725674092e-05, "loss": 4.170405864715576, "step": 582, "token_acc": 0.20964526761889812 }, { "epoch": 0.34183523893286427, "grad_norm": 3.650819037132007, "learning_rate": 6.834701055099648e-05, "loss": 4.214597225189209, "step": 583, "token_acc": 0.20386166106908057 }, { "epoch": 0.3424215772500733, "grad_norm": 1.811216058241486, "learning_rate": 6.846424384525206e-05, "loss": 4.193975448608398, "step": 584, "token_acc": 0.20604070531994234 }, { "epoch": 0.34300791556728233, "grad_norm": 3.5601494060460728, "learning_rate": 6.858147713950762e-05, "loss": 4.249112129211426, "step": 585, "token_acc": 0.20160157087321603 }, { "epoch": 0.34359425388449133, "grad_norm": 2.546240696896199, "learning_rate": 6.86987104337632e-05, "loss": 4.188510417938232, "step": 586, "token_acc": 0.20822171119698443 }, { "epoch": 0.3441805922017004, "grad_norm": 2.2424369094428407, "learning_rate": 6.881594372801876e-05, "loss": 4.160363674163818, "step": 587, "token_acc": 0.2108433895806685 }, { "epoch": 0.3447669305189094, "grad_norm": 3.0594478016209186, "learning_rate": 6.893317702227433e-05, "loss": 4.154229164123535, "step": 588, "token_acc": 0.21069161160004213 }, { "epoch": 0.34535326883611844, "grad_norm": 1.9778745335785097, "learning_rate": 6.90504103165299e-05, "loss": 4.158848762512207, "step": 589, "token_acc": 0.21057924164727437 }, { "epoch": 0.34593960715332744, "grad_norm": 2.4539916534467014, "learning_rate": 6.916764361078547e-05, "loss": 4.145071983337402, "step": 590, "token_acc": 0.2105528348693428 }, { "epoch": 0.3465259454705365, "grad_norm": 2.1635838933477642, "learning_rate": 6.928487690504104e-05, "loss": 4.216288089752197, "step": 591, "token_acc": 0.20310441229456674 }, { "epoch": 0.34711228378774556, "grad_norm": 2.532584626642596, "learning_rate": 6.94021101992966e-05, "loss": 4.083550453186035, "step": 592, "token_acc": 0.21601965729738162 }, { "epoch": 0.34769862210495456, "grad_norm": 2.039146815016486, "learning_rate": 6.951934349355217e-05, "loss": 4.123083114624023, "step": 593, "token_acc": 0.21277903624920874 }, { "epoch": 0.3482849604221636, "grad_norm": 2.808290026352396, "learning_rate": 6.963657678780774e-05, "loss": 4.155226707458496, "step": 594, "token_acc": 0.2085782112488905 }, { "epoch": 0.3488712987393726, "grad_norm": 2.4468154808366966, "learning_rate": 6.975381008206331e-05, "loss": 4.1117119789123535, "step": 595, "token_acc": 0.21326740303108144 }, { "epoch": 0.34945763705658167, "grad_norm": 2.5366374410948596, "learning_rate": 6.987104337631888e-05, "loss": 4.112312316894531, "step": 596, "token_acc": 0.21480364630228507 }, { "epoch": 0.3500439753737907, "grad_norm": 2.4227846174641616, "learning_rate": 6.998827667057445e-05, "loss": 4.150864601135254, "step": 597, "token_acc": 0.20841413373046058 }, { "epoch": 0.35063031369099973, "grad_norm": 2.2854830698100397, "learning_rate": 7.010550996483002e-05, "loss": 4.147131443023682, "step": 598, "token_acc": 0.2085973335004138 }, { "epoch": 0.35121665200820873, "grad_norm": 3.0727805184674257, "learning_rate": 7.022274325908559e-05, "loss": 4.153531074523926, "step": 599, "token_acc": 0.2079611058898861 }, { "epoch": 0.3518029903254178, "grad_norm": 2.5258316759349153, "learning_rate": 7.033997655334114e-05, "loss": 4.129731178283691, "step": 600, "token_acc": 0.21162862738303378 }, { "epoch": 0.3523893286426268, "grad_norm": 2.9267199154449934, "learning_rate": 7.045720984759672e-05, "loss": 4.118048191070557, "step": 601, "token_acc": 0.2126418067910963 }, { "epoch": 0.35297566695983584, "grad_norm": 1.8731295565234167, "learning_rate": 7.05744431418523e-05, "loss": 4.1715192794799805, "step": 602, "token_acc": 0.20527919047183255 }, { "epoch": 0.35356200527704484, "grad_norm": 3.3143169992915245, "learning_rate": 7.069167643610786e-05, "loss": 4.100879669189453, "step": 603, "token_acc": 0.21716443857009016 }, { "epoch": 0.3541483435942539, "grad_norm": 2.018374253692198, "learning_rate": 7.080890973036343e-05, "loss": 4.091705322265625, "step": 604, "token_acc": 0.21426541631145213 }, { "epoch": 0.3547346819114629, "grad_norm": 2.885438639183108, "learning_rate": 7.0926143024619e-05, "loss": 4.0958991050720215, "step": 605, "token_acc": 0.21525665386851 }, { "epoch": 0.35532102022867196, "grad_norm": 2.2996061397666074, "learning_rate": 7.104337631887457e-05, "loss": 4.094749927520752, "step": 606, "token_acc": 0.2160558994417651 }, { "epoch": 0.35590735854588096, "grad_norm": 2.511428308734274, "learning_rate": 7.116060961313013e-05, "loss": 4.122867584228516, "step": 607, "token_acc": 0.21326891173011467 }, { "epoch": 0.35649369686309, "grad_norm": 3.19272148743335, "learning_rate": 7.127784290738571e-05, "loss": 4.177186012268066, "step": 608, "token_acc": 0.20434262095356895 }, { "epoch": 0.357080035180299, "grad_norm": 1.5290924558721875, "learning_rate": 7.139507620164126e-05, "loss": 4.097538471221924, "step": 609, "token_acc": 0.21519980028796862 }, { "epoch": 0.35766637349750807, "grad_norm": 3.8177361886489027, "learning_rate": 7.151230949589684e-05, "loss": 4.184349060058594, "step": 610, "token_acc": 0.20378709185527077 }, { "epoch": 0.3582527118147171, "grad_norm": 1.9845041679115725, "learning_rate": 7.16295427901524e-05, "loss": 4.1518073081970215, "step": 611, "token_acc": 0.20982409381663114 }, { "epoch": 0.35883905013192613, "grad_norm": 2.9172907817702947, "learning_rate": 7.174677608440797e-05, "loss": 4.14028263092041, "step": 612, "token_acc": 0.21218426241624838 }, { "epoch": 0.35942538844913513, "grad_norm": 2.2453526658169802, "learning_rate": 7.186400937866354e-05, "loss": 4.142838001251221, "step": 613, "token_acc": 0.21205322308721536 }, { "epoch": 0.3600117267663442, "grad_norm": 2.5834409682522037, "learning_rate": 7.198124267291911e-05, "loss": 4.2115254402160645, "step": 614, "token_acc": 0.2025347590813764 }, { "epoch": 0.3605980650835532, "grad_norm": 2.0144628590268856, "learning_rate": 7.209847596717468e-05, "loss": 4.098343372344971, "step": 615, "token_acc": 0.21502927868335459 }, { "epoch": 0.36118440340076224, "grad_norm": 2.7391891206161736, "learning_rate": 7.221570926143025e-05, "loss": 4.07414436340332, "step": 616, "token_acc": 0.2168717809704527 }, { "epoch": 0.36177074171797124, "grad_norm": 2.149948650310363, "learning_rate": 7.233294255568583e-05, "loss": 4.059930801391602, "step": 617, "token_acc": 0.21909523528854014 }, { "epoch": 0.3623570800351803, "grad_norm": 2.075780985724799, "learning_rate": 7.245017584994138e-05, "loss": 4.108323097229004, "step": 618, "token_acc": 0.2142750568838823 }, { "epoch": 0.3629434183523893, "grad_norm": 2.585159275197605, "learning_rate": 7.256740914419695e-05, "loss": 4.162170886993408, "step": 619, "token_acc": 0.2068520399525986 }, { "epoch": 0.36352975666959836, "grad_norm": 1.6350827403615937, "learning_rate": 7.268464243845252e-05, "loss": 4.104890823364258, "step": 620, "token_acc": 0.21055449430171663 }, { "epoch": 0.3641160949868074, "grad_norm": 2.7039293247821834, "learning_rate": 7.28018757327081e-05, "loss": 4.109732151031494, "step": 621, "token_acc": 0.2145976358849946 }, { "epoch": 0.3647024333040164, "grad_norm": 2.1190350311590067, "learning_rate": 7.291910902696366e-05, "loss": 4.059538841247559, "step": 622, "token_acc": 0.2174160411729703 }, { "epoch": 0.36528877162122547, "grad_norm": 2.902336514881658, "learning_rate": 7.303634232121923e-05, "loss": 4.145930290222168, "step": 623, "token_acc": 0.21163515634174201 }, { "epoch": 0.3658751099384345, "grad_norm": 1.7549445332593754, "learning_rate": 7.31535756154748e-05, "loss": 4.1091227531433105, "step": 624, "token_acc": 0.2112800324090298 }, { "epoch": 0.36646144825564353, "grad_norm": 3.9523416271040603, "learning_rate": 7.327080890973037e-05, "loss": 4.074059963226318, "step": 625, "token_acc": 0.2165876496065328 }, { "epoch": 0.36704778657285253, "grad_norm": 2.3403679467796605, "learning_rate": 7.338804220398593e-05, "loss": 4.123082637786865, "step": 626, "token_acc": 0.21221446641182018 }, { "epoch": 0.3676341248900616, "grad_norm": 2.1452631986183737, "learning_rate": 7.35052754982415e-05, "loss": 4.087530136108398, "step": 627, "token_acc": 0.21535591662386114 }, { "epoch": 0.3682204632072706, "grad_norm": 2.9309804271543367, "learning_rate": 7.362250879249707e-05, "loss": 4.101961612701416, "step": 628, "token_acc": 0.21293058083188984 }, { "epoch": 0.36880680152447964, "grad_norm": 1.7598176871254188, "learning_rate": 7.373974208675264e-05, "loss": 4.1332550048828125, "step": 629, "token_acc": 0.2088044037612862 }, { "epoch": 0.36939313984168864, "grad_norm": 3.0626904431246156, "learning_rate": 7.385697538100821e-05, "loss": 4.096952438354492, "step": 630, "token_acc": 0.2133911631997453 }, { "epoch": 0.3699794781588977, "grad_norm": 1.6434192302561115, "learning_rate": 7.397420867526378e-05, "loss": 4.105629920959473, "step": 631, "token_acc": 0.21183037001693572 }, { "epoch": 0.3705658164761067, "grad_norm": 3.1225308376453125, "learning_rate": 7.409144196951935e-05, "loss": 4.0817790031433105, "step": 632, "token_acc": 0.2125951542719465 }, { "epoch": 0.37115215479331576, "grad_norm": 1.876114939668875, "learning_rate": 7.420867526377491e-05, "loss": 4.103361129760742, "step": 633, "token_acc": 0.2110155437428117 }, { "epoch": 0.37173849311052476, "grad_norm": 2.9456311645822697, "learning_rate": 7.432590855803049e-05, "loss": 4.085481643676758, "step": 634, "token_acc": 0.21446758808643177 }, { "epoch": 0.3723248314277338, "grad_norm": 2.222620899646469, "learning_rate": 7.444314185228605e-05, "loss": 4.087406158447266, "step": 635, "token_acc": 0.21455811377482395 }, { "epoch": 0.3729111697449428, "grad_norm": 1.8617111992948212, "learning_rate": 7.456037514654162e-05, "loss": 4.094298362731934, "step": 636, "token_acc": 0.21440071372902908 }, { "epoch": 0.3734975080621519, "grad_norm": 2.4678181433695707, "learning_rate": 7.467760844079719e-05, "loss": 4.054091930389404, "step": 637, "token_acc": 0.21730131701682898 }, { "epoch": 0.3740838463793609, "grad_norm": 1.8739959514199582, "learning_rate": 7.479484173505276e-05, "loss": 4.097439765930176, "step": 638, "token_acc": 0.21199458815407057 }, { "epoch": 0.37467018469656993, "grad_norm": 1.999997947980484, "learning_rate": 7.491207502930833e-05, "loss": 4.060850143432617, "step": 639, "token_acc": 0.2155914878931368 }, { "epoch": 0.37525652301377893, "grad_norm": 2.473943136769906, "learning_rate": 7.50293083235639e-05, "loss": 4.12377405166626, "step": 640, "token_acc": 0.20844757986509665 }, { "epoch": 0.375842861330988, "grad_norm": 2.1540266521538496, "learning_rate": 7.514654161781947e-05, "loss": 4.0693206787109375, "step": 641, "token_acc": 0.21374037776709007 }, { "epoch": 0.376429199648197, "grad_norm": 1.9555041691960107, "learning_rate": 7.526377491207503e-05, "loss": 4.0703816413879395, "step": 642, "token_acc": 0.21338682410864312 }, { "epoch": 0.37701553796540604, "grad_norm": 2.4746565620809533, "learning_rate": 7.538100820633061e-05, "loss": 3.993250846862793, "step": 643, "token_acc": 0.22307548327388813 }, { "epoch": 0.37760187628261505, "grad_norm": 1.9200736941317331, "learning_rate": 7.549824150058617e-05, "loss": 4.02085018157959, "step": 644, "token_acc": 0.21993525156256702 }, { "epoch": 0.3781882145998241, "grad_norm": 2.4149432761682417, "learning_rate": 7.561547479484174e-05, "loss": 4.071619033813477, "step": 645, "token_acc": 0.21326240469941257 }, { "epoch": 0.3787745529170331, "grad_norm": 2.4871582498174125, "learning_rate": 7.57327080890973e-05, "loss": 4.132684707641602, "step": 646, "token_acc": 0.20779481374768471 }, { "epoch": 0.37936089123424216, "grad_norm": 3.226823534661959, "learning_rate": 7.584994138335287e-05, "loss": 4.0648603439331055, "step": 647, "token_acc": 0.21545968622224757 }, { "epoch": 0.37994722955145116, "grad_norm": 1.6540072681096925, "learning_rate": 7.596717467760845e-05, "loss": 4.031717300415039, "step": 648, "token_acc": 0.2201859905981044 }, { "epoch": 0.3805335678686602, "grad_norm": 3.138959935778616, "learning_rate": 7.608440797186401e-05, "loss": 4.13758659362793, "step": 649, "token_acc": 0.20802588963436297 }, { "epoch": 0.3811199061858693, "grad_norm": 2.1891622390419894, "learning_rate": 7.620164126611959e-05, "loss": 4.064674377441406, "step": 650, "token_acc": 0.2122129264246783 }, { "epoch": 0.3817062445030783, "grad_norm": 2.632488521285851, "learning_rate": 7.631887456037515e-05, "loss": 4.001059055328369, "step": 651, "token_acc": 0.22232432998164614 }, { "epoch": 0.38229258282028733, "grad_norm": 1.5309038291721875, "learning_rate": 7.643610785463073e-05, "loss": 4.113205432891846, "step": 652, "token_acc": 0.20970721296591022 }, { "epoch": 0.38287892113749633, "grad_norm": 3.0572790928646048, "learning_rate": 7.655334114888629e-05, "loss": 4.093487739562988, "step": 653, "token_acc": 0.21182909957443227 }, { "epoch": 0.3834652594547054, "grad_norm": 2.5229273446299123, "learning_rate": 7.667057444314185e-05, "loss": 4.037172794342041, "step": 654, "token_acc": 0.2184483672218355 }, { "epoch": 0.3840515977719144, "grad_norm": 1.9075802163317703, "learning_rate": 7.678780773739742e-05, "loss": 4.034085273742676, "step": 655, "token_acc": 0.2168918608454039 }, { "epoch": 0.38463793608912344, "grad_norm": 2.411474115819284, "learning_rate": 7.690504103165299e-05, "loss": 4.028837203979492, "step": 656, "token_acc": 0.21688751715757676 }, { "epoch": 0.38522427440633245, "grad_norm": 2.1772932845239636, "learning_rate": 7.702227432590856e-05, "loss": 4.046817779541016, "step": 657, "token_acc": 0.21557185728576192 }, { "epoch": 0.3858106127235415, "grad_norm": 2.4533435573107263, "learning_rate": 7.713950762016413e-05, "loss": 4.056399822235107, "step": 658, "token_acc": 0.2157916559510001 }, { "epoch": 0.3863969510407505, "grad_norm": 1.8473469022474964, "learning_rate": 7.72567409144197e-05, "loss": 4.04714298248291, "step": 659, "token_acc": 0.21505665695962742 }, { "epoch": 0.38698328935795956, "grad_norm": 2.3602063123064188, "learning_rate": 7.737397420867527e-05, "loss": 4.014403820037842, "step": 660, "token_acc": 0.21875912798096045 }, { "epoch": 0.38756962767516856, "grad_norm": 1.7020453106770677, "learning_rate": 7.749120750293083e-05, "loss": 4.058602333068848, "step": 661, "token_acc": 0.21461313141590313 }, { "epoch": 0.3881559659923776, "grad_norm": 2.0873313035369665, "learning_rate": 7.76084407971864e-05, "loss": 4.054744720458984, "step": 662, "token_acc": 0.21395478518271455 }, { "epoch": 0.3887423043095866, "grad_norm": 2.395388425986399, "learning_rate": 7.772567409144197e-05, "loss": 3.9661686420440674, "step": 663, "token_acc": 0.22505120987458538 }, { "epoch": 0.3893286426267957, "grad_norm": 1.9608797379106884, "learning_rate": 7.784290738569754e-05, "loss": 3.990251064300537, "step": 664, "token_acc": 0.21994407252760115 }, { "epoch": 0.3899149809440047, "grad_norm": 2.553547491895268, "learning_rate": 7.796014067995311e-05, "loss": 4.010693550109863, "step": 665, "token_acc": 0.22014327655842145 }, { "epoch": 0.39050131926121373, "grad_norm": 2.073981011067883, "learning_rate": 7.807737397420867e-05, "loss": 4.03361701965332, "step": 666, "token_acc": 0.2182426436154111 }, { "epoch": 0.39108765757842273, "grad_norm": 1.918682275933873, "learning_rate": 7.819460726846425e-05, "loss": 4.050731658935547, "step": 667, "token_acc": 0.21442472323545989 }, { "epoch": 0.3916739958956318, "grad_norm": 2.1443675765705796, "learning_rate": 7.831184056271981e-05, "loss": 3.9616920948028564, "step": 668, "token_acc": 0.2242642406901948 }, { "epoch": 0.3922603342128408, "grad_norm": 2.1331902977640156, "learning_rate": 7.842907385697539e-05, "loss": 4.044347286224365, "step": 669, "token_acc": 0.21538822330878069 }, { "epoch": 0.39284667253004985, "grad_norm": 2.243072466940306, "learning_rate": 7.854630715123095e-05, "loss": 4.085768222808838, "step": 670, "token_acc": 0.21124857053747792 }, { "epoch": 0.39343301084725885, "grad_norm": 2.231107447143818, "learning_rate": 7.866354044548652e-05, "loss": 3.9956047534942627, "step": 671, "token_acc": 0.2188850407541879 }, { "epoch": 0.3940193491644679, "grad_norm": 2.108116358043997, "learning_rate": 7.878077373974209e-05, "loss": 3.9872591495513916, "step": 672, "token_acc": 0.22259305938062415 }, { "epoch": 0.3946056874816769, "grad_norm": 2.335134786070096, "learning_rate": 7.889800703399765e-05, "loss": 3.9818410873413086, "step": 673, "token_acc": 0.22226903001881143 }, { "epoch": 0.39519202579888596, "grad_norm": 1.7637353035663494, "learning_rate": 7.901524032825323e-05, "loss": 3.9797415733337402, "step": 674, "token_acc": 0.22384945423246957 }, { "epoch": 0.39577836411609496, "grad_norm": 2.751577869702306, "learning_rate": 7.91324736225088e-05, "loss": 4.02831506729126, "step": 675, "token_acc": 0.21854878487728793 }, { "epoch": 0.396364702433304, "grad_norm": 1.605864066444226, "learning_rate": 7.924970691676437e-05, "loss": 3.9465856552124023, "step": 676, "token_acc": 0.2257202965851656 }, { "epoch": 0.3969510407505131, "grad_norm": 2.8483845164092987, "learning_rate": 7.936694021101993e-05, "loss": 3.9910123348236084, "step": 677, "token_acc": 0.2207799455615958 }, { "epoch": 0.3975373790677221, "grad_norm": 1.4685836708648312, "learning_rate": 7.948417350527551e-05, "loss": 3.9373085498809814, "step": 678, "token_acc": 0.22478049654830656 }, { "epoch": 0.39812371738493113, "grad_norm": 3.1584410509641083, "learning_rate": 7.960140679953107e-05, "loss": 4.052556037902832, "step": 679, "token_acc": 0.21437743014739127 }, { "epoch": 0.39871005570214013, "grad_norm": 2.1134430010740166, "learning_rate": 7.971864009378663e-05, "loss": 3.953364849090576, "step": 680, "token_acc": 0.22410616518009976 }, { "epoch": 0.3992963940193492, "grad_norm": 2.027947057181343, "learning_rate": 7.983587338804221e-05, "loss": 4.093752384185791, "step": 681, "token_acc": 0.20953233488854062 }, { "epoch": 0.3998827323365582, "grad_norm": 2.834836713241114, "learning_rate": 7.995310668229777e-05, "loss": 4.011902809143066, "step": 682, "token_acc": 0.21620308608178657 }, { "epoch": 0.40046907065376725, "grad_norm": 1.9358391805959974, "learning_rate": 8.007033997655335e-05, "loss": 4.031554222106934, "step": 683, "token_acc": 0.21570020881772092 }, { "epoch": 0.40105540897097625, "grad_norm": 1.795843654663749, "learning_rate": 8.018757327080891e-05, "loss": 3.986050605773926, "step": 684, "token_acc": 0.222041572229191 }, { "epoch": 0.4016417472881853, "grad_norm": 2.6068865630161007, "learning_rate": 8.030480656506449e-05, "loss": 3.9698944091796875, "step": 685, "token_acc": 0.22357636607220177 }, { "epoch": 0.4022280856053943, "grad_norm": 1.8456236622891797, "learning_rate": 8.042203985932005e-05, "loss": 4.0039215087890625, "step": 686, "token_acc": 0.21838219936472802 }, { "epoch": 0.40281442392260336, "grad_norm": 2.276514144099755, "learning_rate": 8.053927315357563e-05, "loss": 3.993375062942505, "step": 687, "token_acc": 0.21902647209953063 }, { "epoch": 0.40340076223981236, "grad_norm": 1.9406005967692974, "learning_rate": 8.065650644783119e-05, "loss": 3.9986319541931152, "step": 688, "token_acc": 0.22091693269034843 }, { "epoch": 0.4039871005570214, "grad_norm": 2.6068922881409, "learning_rate": 8.077373974208675e-05, "loss": 3.9992117881774902, "step": 689, "token_acc": 0.21998947532651608 }, { "epoch": 0.4045734388742304, "grad_norm": 2.1736297833674647, "learning_rate": 8.089097303634232e-05, "loss": 3.9667046070098877, "step": 690, "token_acc": 0.220259143520898 }, { "epoch": 0.4051597771914395, "grad_norm": 2.27013025247477, "learning_rate": 8.100820633059789e-05, "loss": 4.015791416168213, "step": 691, "token_acc": 0.2171304206329835 }, { "epoch": 0.4057461155086485, "grad_norm": 1.8133066440047596, "learning_rate": 8.112543962485345e-05, "loss": 4.029618263244629, "step": 692, "token_acc": 0.21445345442750569 }, { "epoch": 0.40633245382585753, "grad_norm": 1.947593396191246, "learning_rate": 8.124267291910903e-05, "loss": 4.012633323669434, "step": 693, "token_acc": 0.2172589234258634 }, { "epoch": 0.40691879214306653, "grad_norm": 2.3347922630746054, "learning_rate": 8.13599062133646e-05, "loss": 4.041747093200684, "step": 694, "token_acc": 0.21478468911923815 }, { "epoch": 0.4075051304602756, "grad_norm": 1.5684402865571183, "learning_rate": 8.147713950762017e-05, "loss": 3.923807382583618, "step": 695, "token_acc": 0.22580291453496612 }, { "epoch": 0.4080914687774846, "grad_norm": 2.130446863345715, "learning_rate": 8.159437280187575e-05, "loss": 3.9689764976501465, "step": 696, "token_acc": 0.2218048385437413 }, { "epoch": 0.40867780709469365, "grad_norm": 2.2117512943963296, "learning_rate": 8.171160609613131e-05, "loss": 3.9482274055480957, "step": 697, "token_acc": 0.2253584146845398 }, { "epoch": 0.40926414541190265, "grad_norm": 2.2471329420627564, "learning_rate": 8.182883939038687e-05, "loss": 4.002993583679199, "step": 698, "token_acc": 0.21702742510308898 }, { "epoch": 0.4098504837291117, "grad_norm": 2.5516320367202474, "learning_rate": 8.194607268464243e-05, "loss": 3.9073610305786133, "step": 699, "token_acc": 0.2278225212855082 }, { "epoch": 0.4104368220463207, "grad_norm": 2.07434391491364, "learning_rate": 8.206330597889801e-05, "loss": 3.996926784515381, "step": 700, "token_acc": 0.21590331255668146 }, { "epoch": 0.41102316036352976, "grad_norm": 2.701039367480318, "learning_rate": 8.218053927315357e-05, "loss": 4.009852409362793, "step": 701, "token_acc": 0.2172141838888435 }, { "epoch": 0.41160949868073876, "grad_norm": 1.4646660186614113, "learning_rate": 8.229777256740915e-05, "loss": 3.9965920448303223, "step": 702, "token_acc": 0.2191698207378447 }, { "epoch": 0.4121958369979478, "grad_norm": 3.1570371849933956, "learning_rate": 8.241500586166471e-05, "loss": 3.992090940475464, "step": 703, "token_acc": 0.2190071815547436 }, { "epoch": 0.4127821753151568, "grad_norm": 1.8870292738736854, "learning_rate": 8.253223915592029e-05, "loss": 4.004321098327637, "step": 704, "token_acc": 0.21755276839848503 }, { "epoch": 0.4133685136323659, "grad_norm": 3.3549004761100933, "learning_rate": 8.264947245017585e-05, "loss": 4.063796043395996, "step": 705, "token_acc": 0.21098198024245524 }, { "epoch": 0.41395485194957493, "grad_norm": 2.16753937155867, "learning_rate": 8.276670574443143e-05, "loss": 3.979288101196289, "step": 706, "token_acc": 0.2197244342267462 }, { "epoch": 0.41454119026678393, "grad_norm": 2.256985192036278, "learning_rate": 8.288393903868699e-05, "loss": 3.9586453437805176, "step": 707, "token_acc": 0.22309999231025557 }, { "epoch": 0.415127528583993, "grad_norm": 1.954939127077894, "learning_rate": 8.300117233294255e-05, "loss": 3.9463043212890625, "step": 708, "token_acc": 0.2236039054298286 }, { "epoch": 0.415713866901202, "grad_norm": 2.4362819756259326, "learning_rate": 8.311840562719813e-05, "loss": 3.9839253425598145, "step": 709, "token_acc": 0.22196783379716523 }, { "epoch": 0.41630020521841105, "grad_norm": 2.062117685664566, "learning_rate": 8.323563892145369e-05, "loss": 3.9828412532806396, "step": 710, "token_acc": 0.22041821921958635 }, { "epoch": 0.41688654353562005, "grad_norm": 2.518798149480698, "learning_rate": 8.335287221570927e-05, "loss": 3.966951608657837, "step": 711, "token_acc": 0.22057594887185403 }, { "epoch": 0.4174728818528291, "grad_norm": 2.33488563735422, "learning_rate": 8.347010550996483e-05, "loss": 4.020444869995117, "step": 712, "token_acc": 0.21567288505328716 }, { "epoch": 0.4180592201700381, "grad_norm": 1.3085221419826674, "learning_rate": 8.358733880422041e-05, "loss": 4.00489616394043, "step": 713, "token_acc": 0.21908290268196823 }, { "epoch": 0.41864555848724716, "grad_norm": 2.4149560137622115, "learning_rate": 8.370457209847597e-05, "loss": 3.9871957302093506, "step": 714, "token_acc": 0.21834809649697443 }, { "epoch": 0.41923189680445616, "grad_norm": 1.848581982176165, "learning_rate": 8.382180539273155e-05, "loss": 3.9747753143310547, "step": 715, "token_acc": 0.22014599739217036 }, { "epoch": 0.4198182351216652, "grad_norm": 1.5880853884188684, "learning_rate": 8.393903868698711e-05, "loss": 3.9152612686157227, "step": 716, "token_acc": 0.2266055384096822 }, { "epoch": 0.4204045734388742, "grad_norm": 2.4278481046131786, "learning_rate": 8.405627198124267e-05, "loss": 3.990471601486206, "step": 717, "token_acc": 0.21986459581955026 }, { "epoch": 0.4209909117560833, "grad_norm": 1.496198977995066, "learning_rate": 8.417350527549825e-05, "loss": 3.996959686279297, "step": 718, "token_acc": 0.21578277516266198 }, { "epoch": 0.4215772500732923, "grad_norm": 2.229378172850658, "learning_rate": 8.429073856975381e-05, "loss": 3.946504592895508, "step": 719, "token_acc": 0.22425191965447533 }, { "epoch": 0.42216358839050133, "grad_norm": 1.7659846802731216, "learning_rate": 8.440797186400939e-05, "loss": 4.021200180053711, "step": 720, "token_acc": 0.21562805573663624 }, { "epoch": 0.42274992670771033, "grad_norm": 2.3525238749731665, "learning_rate": 8.452520515826495e-05, "loss": 3.9007182121276855, "step": 721, "token_acc": 0.2278418843187129 }, { "epoch": 0.4233362650249194, "grad_norm": 2.0762583390587555, "learning_rate": 8.464243845252053e-05, "loss": 3.995725631713867, "step": 722, "token_acc": 0.21636987180662792 }, { "epoch": 0.4239226033421284, "grad_norm": 1.835997138157056, "learning_rate": 8.475967174677609e-05, "loss": 3.9783501625061035, "step": 723, "token_acc": 0.2200947039352703 }, { "epoch": 0.42450894165933745, "grad_norm": 1.998538865801269, "learning_rate": 8.487690504103167e-05, "loss": 3.899714946746826, "step": 724, "token_acc": 0.2277796328981751 }, { "epoch": 0.42509527997654645, "grad_norm": 2.056393049375904, "learning_rate": 8.499413833528722e-05, "loss": 3.9990315437316895, "step": 725, "token_acc": 0.21626505392136294 }, { "epoch": 0.4256816182937555, "grad_norm": 1.5564394332706086, "learning_rate": 8.511137162954279e-05, "loss": 4.009754657745361, "step": 726, "token_acc": 0.2143423783411283 }, { "epoch": 0.4262679566109645, "grad_norm": 2.1011744301973794, "learning_rate": 8.522860492379835e-05, "loss": 3.9437594413757324, "step": 727, "token_acc": 0.22307386260338471 }, { "epoch": 0.42685429492817356, "grad_norm": 2.300110050957418, "learning_rate": 8.534583821805393e-05, "loss": 3.9627723693847656, "step": 728, "token_acc": 0.22014249117395904 }, { "epoch": 0.42744063324538256, "grad_norm": 1.8150002936726186, "learning_rate": 8.54630715123095e-05, "loss": 3.937901258468628, "step": 729, "token_acc": 0.22241908243543698 }, { "epoch": 0.4280269715625916, "grad_norm": 2.70277092710677, "learning_rate": 8.558030480656507e-05, "loss": 3.9719107151031494, "step": 730, "token_acc": 0.22138685801847835 }, { "epoch": 0.4286133098798006, "grad_norm": 1.6532035295187189, "learning_rate": 8.569753810082065e-05, "loss": 3.9723806381225586, "step": 731, "token_acc": 0.21880205655526994 }, { "epoch": 0.4291996481970097, "grad_norm": 2.676508541580671, "learning_rate": 8.581477139507621e-05, "loss": 3.9484610557556152, "step": 732, "token_acc": 0.2227352200484422 }, { "epoch": 0.42978598651421873, "grad_norm": 1.403127241890776, "learning_rate": 8.593200468933178e-05, "loss": 3.9220519065856934, "step": 733, "token_acc": 0.22626335247921198 }, { "epoch": 0.43037232483142773, "grad_norm": 2.6423420760706486, "learning_rate": 8.604923798358733e-05, "loss": 3.9348416328430176, "step": 734, "token_acc": 0.22170235924365955 }, { "epoch": 0.4309586631486368, "grad_norm": 1.9143950897663173, "learning_rate": 8.616647127784291e-05, "loss": 3.951483726501465, "step": 735, "token_acc": 0.2222365654851916 }, { "epoch": 0.4315450014658458, "grad_norm": 1.814817755227424, "learning_rate": 8.628370457209847e-05, "loss": 3.900132179260254, "step": 736, "token_acc": 0.2292070940655613 }, { "epoch": 0.43213133978305485, "grad_norm": 1.8966274546804156, "learning_rate": 8.640093786635405e-05, "loss": 3.9608585834503174, "step": 737, "token_acc": 0.2217940825334792 }, { "epoch": 0.43271767810026385, "grad_norm": 2.2331692203473326, "learning_rate": 8.651817116060961e-05, "loss": 3.9567646980285645, "step": 738, "token_acc": 0.2201034012744563 }, { "epoch": 0.4333040164174729, "grad_norm": 1.6670104427552728, "learning_rate": 8.663540445486519e-05, "loss": 3.947218179702759, "step": 739, "token_acc": 0.22202044884573338 }, { "epoch": 0.4338903547346819, "grad_norm": 2.0507402177009824, "learning_rate": 8.675263774912075e-05, "loss": 3.910508871078491, "step": 740, "token_acc": 0.22730556287898762 }, { "epoch": 0.43447669305189096, "grad_norm": 1.615803099592315, "learning_rate": 8.686987104337633e-05, "loss": 3.9932494163513184, "step": 741, "token_acc": 0.21694113506191537 }, { "epoch": 0.43506303136909996, "grad_norm": 2.5371822313297403, "learning_rate": 8.698710433763189e-05, "loss": 3.8925976753234863, "step": 742, "token_acc": 0.22879213092889494 }, { "epoch": 0.435649369686309, "grad_norm": 1.8626483187207974, "learning_rate": 8.710433763188745e-05, "loss": 3.937966823577881, "step": 743, "token_acc": 0.22448765850708127 }, { "epoch": 0.436235708003518, "grad_norm": 2.250001136779272, "learning_rate": 8.722157092614303e-05, "loss": 3.900315046310425, "step": 744, "token_acc": 0.22572346116801262 }, { "epoch": 0.4368220463207271, "grad_norm": 2.0386335384596954, "learning_rate": 8.733880422039859e-05, "loss": 3.938730239868164, "step": 745, "token_acc": 0.22347982681328787 }, { "epoch": 0.4374083846379361, "grad_norm": 2.0306205239517334, "learning_rate": 8.745603751465417e-05, "loss": 3.966027021408081, "step": 746, "token_acc": 0.2189314855824024 }, { "epoch": 0.43799472295514513, "grad_norm": 1.9042181280416175, "learning_rate": 8.757327080890973e-05, "loss": 3.906756639480591, "step": 747, "token_acc": 0.2251283743647284 }, { "epoch": 0.43858106127235413, "grad_norm": 1.83388543747683, "learning_rate": 8.769050410316531e-05, "loss": 3.9863650798797607, "step": 748, "token_acc": 0.21566247599724647 }, { "epoch": 0.4391673995895632, "grad_norm": 2.4927173945530576, "learning_rate": 8.780773739742087e-05, "loss": 3.9408254623413086, "step": 749, "token_acc": 0.2226859005154706 }, { "epoch": 0.4397537379067722, "grad_norm": 1.878251095328266, "learning_rate": 8.792497069167645e-05, "loss": 3.9462904930114746, "step": 750, "token_acc": 0.22154756755045843 }, { "epoch": 0.44034007622398125, "grad_norm": 2.5811473775151583, "learning_rate": 8.804220398593201e-05, "loss": 3.9655089378356934, "step": 751, "token_acc": 0.21821542515543843 }, { "epoch": 0.44092641454119025, "grad_norm": 1.5868598891613437, "learning_rate": 8.815943728018757e-05, "loss": 3.9307169914245605, "step": 752, "token_acc": 0.22196996782266715 }, { "epoch": 0.4415127528583993, "grad_norm": 2.2626821868614995, "learning_rate": 8.827667057444315e-05, "loss": 3.926063060760498, "step": 753, "token_acc": 0.22557470488856743 }, { "epoch": 0.4420990911756083, "grad_norm": 1.9137229422291535, "learning_rate": 8.839390386869871e-05, "loss": 3.8885974884033203, "step": 754, "token_acc": 0.22743625191613862 }, { "epoch": 0.44268542949281736, "grad_norm": 2.42027527214035, "learning_rate": 8.851113716295429e-05, "loss": 3.951854705810547, "step": 755, "token_acc": 0.21919975367235237 }, { "epoch": 0.44327176781002636, "grad_norm": 1.4274348182652419, "learning_rate": 8.862837045720985e-05, "loss": 3.924478054046631, "step": 756, "token_acc": 0.22586739733819544 }, { "epoch": 0.4438581061272354, "grad_norm": 2.387809799570043, "learning_rate": 8.874560375146543e-05, "loss": 3.9574551582336426, "step": 757, "token_acc": 0.22065239441538465 }, { "epoch": 0.4444444444444444, "grad_norm": 2.066025639346712, "learning_rate": 8.886283704572099e-05, "loss": 3.9644346237182617, "step": 758, "token_acc": 0.21807099281297007 }, { "epoch": 0.4450307827616535, "grad_norm": 2.0071041068521813, "learning_rate": 8.898007033997657e-05, "loss": 3.89326810836792, "step": 759, "token_acc": 0.2262106542215205 }, { "epoch": 0.4456171210788625, "grad_norm": 2.0702333406413, "learning_rate": 8.909730363423211e-05, "loss": 3.949096202850342, "step": 760, "token_acc": 0.21959702504489864 }, { "epoch": 0.44620345939607153, "grad_norm": 1.7844832049053576, "learning_rate": 8.921453692848769e-05, "loss": 3.9036145210266113, "step": 761, "token_acc": 0.22562241895689575 }, { "epoch": 0.4467897977132806, "grad_norm": 2.035225491767218, "learning_rate": 8.933177022274327e-05, "loss": 3.894869327545166, "step": 762, "token_acc": 0.2258279057563059 }, { "epoch": 0.4473761360304896, "grad_norm": 1.8586834290729655, "learning_rate": 8.944900351699883e-05, "loss": 3.9075770378112793, "step": 763, "token_acc": 0.2255222223975676 }, { "epoch": 0.44796247434769865, "grad_norm": 1.7336219261594272, "learning_rate": 8.95662368112544e-05, "loss": 3.8645763397216797, "step": 764, "token_acc": 0.22972833902867018 }, { "epoch": 0.44854881266490765, "grad_norm": 2.2428110263096537, "learning_rate": 8.968347010550997e-05, "loss": 3.932220935821533, "step": 765, "token_acc": 0.22227289432809236 }, { "epoch": 0.4491351509821167, "grad_norm": 1.3581669888821015, "learning_rate": 8.980070339976554e-05, "loss": 3.8672127723693848, "step": 766, "token_acc": 0.23058741996545196 }, { "epoch": 0.4497214892993257, "grad_norm": 2.6220000478513033, "learning_rate": 8.991793669402111e-05, "loss": 3.922210454940796, "step": 767, "token_acc": 0.2243839689135488 }, { "epoch": 0.45030782761653476, "grad_norm": 1.3507666299828758, "learning_rate": 9.003516998827668e-05, "loss": 3.9137024879455566, "step": 768, "token_acc": 0.22226228221795702 }, { "epoch": 0.45089416593374376, "grad_norm": 1.9320738465240743, "learning_rate": 9.015240328253223e-05, "loss": 3.960395336151123, "step": 769, "token_acc": 0.21898678494363363 }, { "epoch": 0.4514805042509528, "grad_norm": 1.6960861932966782, "learning_rate": 9.026963657678781e-05, "loss": 3.899871587753296, "step": 770, "token_acc": 0.2266482478203641 }, { "epoch": 0.4520668425681618, "grad_norm": 2.4099589458695125, "learning_rate": 9.038686987104337e-05, "loss": 3.891418933868408, "step": 771, "token_acc": 0.22540586273314767 }, { "epoch": 0.4526531808853709, "grad_norm": 1.5763104772797327, "learning_rate": 9.050410316529895e-05, "loss": 3.8581748008728027, "step": 772, "token_acc": 0.23018679436762654 }, { "epoch": 0.4532395192025799, "grad_norm": 2.549973842613314, "learning_rate": 9.062133645955451e-05, "loss": 3.918881893157959, "step": 773, "token_acc": 0.22459708741725015 }, { "epoch": 0.45382585751978893, "grad_norm": 1.736984546111328, "learning_rate": 9.073856975381009e-05, "loss": 3.9528517723083496, "step": 774, "token_acc": 0.22035208243881493 }, { "epoch": 0.45441219583699793, "grad_norm": 1.698273811652094, "learning_rate": 9.085580304806566e-05, "loss": 3.9416563510894775, "step": 775, "token_acc": 0.2212251668707295 }, { "epoch": 0.454998534154207, "grad_norm": 2.069628020016678, "learning_rate": 9.097303634232123e-05, "loss": 3.903822898864746, "step": 776, "token_acc": 0.22593174857732728 }, { "epoch": 0.455584872471416, "grad_norm": 1.8985676809172887, "learning_rate": 9.10902696365768e-05, "loss": 3.8916778564453125, "step": 777, "token_acc": 0.22497064181408177 }, { "epoch": 0.45617121078862505, "grad_norm": 2.249942313911619, "learning_rate": 9.120750293083235e-05, "loss": 3.893077850341797, "step": 778, "token_acc": 0.22636775568028203 }, { "epoch": 0.45675754910583405, "grad_norm": 1.7904222203599198, "learning_rate": 9.132473622508793e-05, "loss": 3.8899126052856445, "step": 779, "token_acc": 0.2254609469100343 }, { "epoch": 0.4573438874230431, "grad_norm": 1.7742101311256486, "learning_rate": 9.144196951934349e-05, "loss": 3.8878064155578613, "step": 780, "token_acc": 0.2239327950160751 }, { "epoch": 0.4579302257402521, "grad_norm": 1.6449534718641463, "learning_rate": 9.155920281359907e-05, "loss": 3.867175579071045, "step": 781, "token_acc": 0.22982883813379193 }, { "epoch": 0.45851656405746116, "grad_norm": 1.7486791327143634, "learning_rate": 9.167643610785463e-05, "loss": 3.8803577423095703, "step": 782, "token_acc": 0.22562936079944795 }, { "epoch": 0.45910290237467016, "grad_norm": 1.6895566187152857, "learning_rate": 9.17936694021102e-05, "loss": 3.902367353439331, "step": 783, "token_acc": 0.22516781262920058 }, { "epoch": 0.4596892406918792, "grad_norm": 2.576619700263025, "learning_rate": 9.191090269636577e-05, "loss": 3.921027183532715, "step": 784, "token_acc": 0.22181978404718755 }, { "epoch": 0.4602755790090882, "grad_norm": 1.5998442620731603, "learning_rate": 9.202813599062135e-05, "loss": 3.902172088623047, "step": 785, "token_acc": 0.22429702639920943 }, { "epoch": 0.4608619173262973, "grad_norm": 2.800536871811806, "learning_rate": 9.214536928487691e-05, "loss": 3.870784282684326, "step": 786, "token_acc": 0.22730087367374868 }, { "epoch": 0.4614482556435063, "grad_norm": 1.5157492029140756, "learning_rate": 9.226260257913247e-05, "loss": 3.9423084259033203, "step": 787, "token_acc": 0.21816674971535385 }, { "epoch": 0.46203459396071533, "grad_norm": 2.5524842560316743, "learning_rate": 9.237983587338805e-05, "loss": 3.909968852996826, "step": 788, "token_acc": 0.22396359055214693 }, { "epoch": 0.46262093227792433, "grad_norm": 1.8200972978087542, "learning_rate": 9.249706916764361e-05, "loss": 3.8912699222564697, "step": 789, "token_acc": 0.2261728535641084 }, { "epoch": 0.4632072705951334, "grad_norm": 1.7984710543234683, "learning_rate": 9.261430246189919e-05, "loss": 3.8978404998779297, "step": 790, "token_acc": 0.22575110823458103 }, { "epoch": 0.46379360891234245, "grad_norm": 1.869542470040667, "learning_rate": 9.273153575615475e-05, "loss": 3.928298234939575, "step": 791, "token_acc": 0.22056627989208727 }, { "epoch": 0.46437994722955145, "grad_norm": 1.9655390950080824, "learning_rate": 9.284876905041033e-05, "loss": 3.8563456535339355, "step": 792, "token_acc": 0.22948494983277593 }, { "epoch": 0.4649662855467605, "grad_norm": 1.7826658467215866, "learning_rate": 9.296600234466589e-05, "loss": 3.9013195037841797, "step": 793, "token_acc": 0.22432930503774495 }, { "epoch": 0.4655526238639695, "grad_norm": 1.6155847072960474, "learning_rate": 9.308323563892146e-05, "loss": 3.880685329437256, "step": 794, "token_acc": 0.22561012673616387 }, { "epoch": 0.46613896218117856, "grad_norm": 2.464187024479387, "learning_rate": 9.320046893317703e-05, "loss": 3.8675413131713867, "step": 795, "token_acc": 0.2293023035287835 }, { "epoch": 0.46672530049838756, "grad_norm": 2.117923485251248, "learning_rate": 9.331770222743259e-05, "loss": 3.890979290008545, "step": 796, "token_acc": 0.2274577739281074 }, { "epoch": 0.4673116388155966, "grad_norm": 1.8135485629403558, "learning_rate": 9.343493552168817e-05, "loss": 3.8722591400146484, "step": 797, "token_acc": 0.22754903413092575 }, { "epoch": 0.4678979771328056, "grad_norm": 1.6666757203163662, "learning_rate": 9.355216881594373e-05, "loss": 3.7965779304504395, "step": 798, "token_acc": 0.23548949511358522 }, { "epoch": 0.4684843154500147, "grad_norm": 1.6498589259186138, "learning_rate": 9.36694021101993e-05, "loss": 3.8730015754699707, "step": 799, "token_acc": 0.22462406762145762 }, { "epoch": 0.4690706537672237, "grad_norm": 1.8834256598113779, "learning_rate": 9.378663540445487e-05, "loss": 3.8841490745544434, "step": 800, "token_acc": 0.2255475091680745 }, { "epoch": 0.46965699208443273, "grad_norm": 2.0334556878234915, "learning_rate": 9.390386869871044e-05, "loss": 3.8848514556884766, "step": 801, "token_acc": 0.22690193035731007 }, { "epoch": 0.47024333040164173, "grad_norm": 2.0377081679199613, "learning_rate": 9.402110199296601e-05, "loss": 3.877686023712158, "step": 802, "token_acc": 0.2246462658967874 }, { "epoch": 0.4708296687188508, "grad_norm": 2.0015218844171843, "learning_rate": 9.413833528722158e-05, "loss": 3.8933260440826416, "step": 803, "token_acc": 0.2268061021422694 }, { "epoch": 0.4714160070360598, "grad_norm": 2.0478849793439164, "learning_rate": 9.425556858147715e-05, "loss": 3.9260916709899902, "step": 804, "token_acc": 0.22048878420875148 }, { "epoch": 0.47200234535326885, "grad_norm": 1.8694828328303443, "learning_rate": 9.437280187573271e-05, "loss": 3.889716863632202, "step": 805, "token_acc": 0.22447748263432293 }, { "epoch": 0.47258868367047785, "grad_norm": 1.8970059184469343, "learning_rate": 9.449003516998827e-05, "loss": 3.8182625770568848, "step": 806, "token_acc": 0.2320792620500958 }, { "epoch": 0.4731750219876869, "grad_norm": 1.7310927049039988, "learning_rate": 9.460726846424385e-05, "loss": 3.9098448753356934, "step": 807, "token_acc": 0.22147892786801618 }, { "epoch": 0.4737613603048959, "grad_norm": 2.1400799019598016, "learning_rate": 9.472450175849942e-05, "loss": 3.884195566177368, "step": 808, "token_acc": 0.2241544220289735 }, { "epoch": 0.47434769862210496, "grad_norm": 1.5526087586862078, "learning_rate": 9.484173505275499e-05, "loss": 3.8570384979248047, "step": 809, "token_acc": 0.22767766931192512 }, { "epoch": 0.47493403693931396, "grad_norm": 2.7078440330444646, "learning_rate": 9.495896834701056e-05, "loss": 3.8440260887145996, "step": 810, "token_acc": 0.2282954245620147 }, { "epoch": 0.475520375256523, "grad_norm": 1.1888403967427135, "learning_rate": 9.507620164126613e-05, "loss": 3.896343946456909, "step": 811, "token_acc": 0.22439760246719737 }, { "epoch": 0.476106713573732, "grad_norm": 2.3604336924134524, "learning_rate": 9.51934349355217e-05, "loss": 3.8686041831970215, "step": 812, "token_acc": 0.22957351201370874 }, { "epoch": 0.4766930518909411, "grad_norm": 2.124829798786561, "learning_rate": 9.531066822977726e-05, "loss": 3.8537416458129883, "step": 813, "token_acc": 0.22822571306228304 }, { "epoch": 0.4772793902081501, "grad_norm": 2.119433619207387, "learning_rate": 9.542790152403283e-05, "loss": 3.876708984375, "step": 814, "token_acc": 0.2248204058179777 }, { "epoch": 0.47786572852535913, "grad_norm": 1.9560413764089617, "learning_rate": 9.554513481828839e-05, "loss": 3.894780397415161, "step": 815, "token_acc": 0.2237485651875396 }, { "epoch": 0.47845206684256814, "grad_norm": 2.0383280700068243, "learning_rate": 9.566236811254397e-05, "loss": 3.897106647491455, "step": 816, "token_acc": 0.2221390579942326 }, { "epoch": 0.4790384051597772, "grad_norm": 1.846400839215357, "learning_rate": 9.577960140679953e-05, "loss": 3.8745126724243164, "step": 817, "token_acc": 0.22795407309778476 }, { "epoch": 0.47962474347698625, "grad_norm": 1.7701654805658873, "learning_rate": 9.58968347010551e-05, "loss": 3.862316131591797, "step": 818, "token_acc": 0.22820242274212202 }, { "epoch": 0.48021108179419525, "grad_norm": 1.716739886261576, "learning_rate": 9.601406799531067e-05, "loss": 3.8438267707824707, "step": 819, "token_acc": 0.23013671227251908 }, { "epoch": 0.4807974201114043, "grad_norm": 1.8530692487312084, "learning_rate": 9.613130128956624e-05, "loss": 3.8538131713867188, "step": 820, "token_acc": 0.22755805208956828 }, { "epoch": 0.4813837584286133, "grad_norm": 1.7477164041091169, "learning_rate": 9.624853458382182e-05, "loss": 3.883127212524414, "step": 821, "token_acc": 0.22496032785369838 }, { "epoch": 0.48197009674582236, "grad_norm": 2.0859655635067873, "learning_rate": 9.636576787807737e-05, "loss": 3.873800277709961, "step": 822, "token_acc": 0.22847557625010845 }, { "epoch": 0.48255643506303136, "grad_norm": 1.6743160528208407, "learning_rate": 9.648300117233295e-05, "loss": 3.8300459384918213, "step": 823, "token_acc": 0.2311213819101965 }, { "epoch": 0.4831427733802404, "grad_norm": 2.377277112859616, "learning_rate": 9.660023446658851e-05, "loss": 3.863558769226074, "step": 824, "token_acc": 0.22637062959109683 }, { "epoch": 0.4837291116974494, "grad_norm": 1.3795840428609316, "learning_rate": 9.671746776084409e-05, "loss": 3.9056777954101562, "step": 825, "token_acc": 0.22270229737941022 }, { "epoch": 0.4843154500146585, "grad_norm": 2.5341038548867947, "learning_rate": 9.683470105509965e-05, "loss": 3.840391159057617, "step": 826, "token_acc": 0.23263906782718133 }, { "epoch": 0.4849017883318675, "grad_norm": 1.5028432775377962, "learning_rate": 9.695193434935522e-05, "loss": 3.810879707336426, "step": 827, "token_acc": 0.23285418668243293 }, { "epoch": 0.48548812664907653, "grad_norm": 1.8140075471214623, "learning_rate": 9.706916764361079e-05, "loss": 3.917994260787964, "step": 828, "token_acc": 0.22115723101134763 }, { "epoch": 0.48607446496628554, "grad_norm": 1.9311951666675868, "learning_rate": 9.718640093786636e-05, "loss": 3.866739273071289, "step": 829, "token_acc": 0.22711367867703428 }, { "epoch": 0.4866608032834946, "grad_norm": 1.5652147363547808, "learning_rate": 9.730363423212193e-05, "loss": 3.8571949005126953, "step": 830, "token_acc": 0.22794324157886736 }, { "epoch": 0.4872471416007036, "grad_norm": 1.5963904007804146, "learning_rate": 9.742086752637749e-05, "loss": 3.8184940814971924, "step": 831, "token_acc": 0.23298841173483809 }, { "epoch": 0.48783347991791265, "grad_norm": 1.934804762361602, "learning_rate": 9.753810082063307e-05, "loss": 3.9114720821380615, "step": 832, "token_acc": 0.22209300212607438 }, { "epoch": 0.48841981823512165, "grad_norm": 2.181504126974787, "learning_rate": 9.765533411488863e-05, "loss": 3.825620651245117, "step": 833, "token_acc": 0.23236013563877253 }, { "epoch": 0.4890061565523307, "grad_norm": 1.3501053666987277, "learning_rate": 9.77725674091442e-05, "loss": 3.8362808227539062, "step": 834, "token_acc": 0.22815712722812798 }, { "epoch": 0.4895924948695397, "grad_norm": 2.1709396292068512, "learning_rate": 9.788980070339977e-05, "loss": 3.8445987701416016, "step": 835, "token_acc": 0.22872417662200514 }, { "epoch": 0.49017883318674876, "grad_norm": 1.8524811589318733, "learning_rate": 9.800703399765534e-05, "loss": 3.8186087608337402, "step": 836, "token_acc": 0.22899310066872072 }, { "epoch": 0.49076517150395776, "grad_norm": 1.9013615109708906, "learning_rate": 9.81242672919109e-05, "loss": 3.896653175354004, "step": 837, "token_acc": 0.22417524877975442 }, { "epoch": 0.4913515098211668, "grad_norm": 1.5993231767825782, "learning_rate": 9.824150058616648e-05, "loss": 3.8276803493499756, "step": 838, "token_acc": 0.2329621095750128 }, { "epoch": 0.4919378481383758, "grad_norm": 1.5379382240093624, "learning_rate": 9.835873388042205e-05, "loss": 3.857423782348633, "step": 839, "token_acc": 0.22638995819417768 }, { "epoch": 0.4925241864555849, "grad_norm": 1.9325385810138958, "learning_rate": 9.847596717467761e-05, "loss": 3.8191981315612793, "step": 840, "token_acc": 0.23254505214970866 }, { "epoch": 0.4931105247727939, "grad_norm": 2.0185300492717646, "learning_rate": 9.859320046893318e-05, "loss": 3.8605804443359375, "step": 841, "token_acc": 0.2257750988692554 }, { "epoch": 0.49369686309000294, "grad_norm": 2.137494691892694, "learning_rate": 9.871043376318875e-05, "loss": 3.8438796997070312, "step": 842, "token_acc": 0.22847942050462317 }, { "epoch": 0.49428320140721194, "grad_norm": 1.6701073786712586, "learning_rate": 9.882766705744432e-05, "loss": 3.8718926906585693, "step": 843, "token_acc": 0.22499588537324386 }, { "epoch": 0.494869539724421, "grad_norm": 2.44596872111063, "learning_rate": 9.894490035169989e-05, "loss": 3.8270504474639893, "step": 844, "token_acc": 0.23281811158439822 }, { "epoch": 0.49545587804163, "grad_norm": 1.4396085371604697, "learning_rate": 9.906213364595546e-05, "loss": 3.787402629852295, "step": 845, "token_acc": 0.2341805672679405 }, { "epoch": 0.49604221635883905, "grad_norm": 2.1093892691958294, "learning_rate": 9.917936694021102e-05, "loss": 3.825852632522583, "step": 846, "token_acc": 0.2288999992382442 }, { "epoch": 0.4966285546760481, "grad_norm": 1.3845878898796742, "learning_rate": 9.92966002344666e-05, "loss": 3.8680436611175537, "step": 847, "token_acc": 0.22720113956684423 }, { "epoch": 0.4972148929932571, "grad_norm": 1.7096703494505658, "learning_rate": 9.941383352872216e-05, "loss": 3.8637142181396484, "step": 848, "token_acc": 0.22786956150280868 }, { "epoch": 0.49780123131046616, "grad_norm": 1.9757041679481984, "learning_rate": 9.953106682297773e-05, "loss": 3.9013869762420654, "step": 849, "token_acc": 0.22096929045834154 }, { "epoch": 0.49838756962767516, "grad_norm": 1.7708102881508065, "learning_rate": 9.964830011723329e-05, "loss": 3.8716068267822266, "step": 850, "token_acc": 0.2249626042643815 }, { "epoch": 0.4989739079448842, "grad_norm": 1.7217166317282757, "learning_rate": 9.976553341148887e-05, "loss": 3.804633140563965, "step": 851, "token_acc": 0.2343589863950368 }, { "epoch": 0.4995602462620932, "grad_norm": 1.80491131215425, "learning_rate": 9.988276670574443e-05, "loss": 3.856325626373291, "step": 852, "token_acc": 0.22735655345687358 }, { "epoch": 0.5001465845793023, "grad_norm": 1.7306387003338115, "learning_rate": 0.0001, "loss": 3.8415231704711914, "step": 853, "token_acc": 0.22943609841062487 }, { "epoch": 0.5007329228965113, "grad_norm": 1.4543956068024768, "learning_rate": 0.00010011723329425558, "loss": 3.833892345428467, "step": 854, "token_acc": 0.22797933966267506 }, { "epoch": 0.5013192612137203, "grad_norm": 2.4912687753891865, "learning_rate": 0.00010023446658851114, "loss": 3.8768229484558105, "step": 855, "token_acc": 0.22621239130262039 }, { "epoch": 0.5019055995309294, "grad_norm": 1.6059915633961455, "learning_rate": 0.00010035169988276672, "loss": 3.8482866287231445, "step": 856, "token_acc": 0.22745292368681863 }, { "epoch": 0.5024919378481384, "grad_norm": 1.8684922419831205, "learning_rate": 0.00010046893317702228, "loss": 3.8241138458251953, "step": 857, "token_acc": 0.2299262666576473 }, { "epoch": 0.5030782761653474, "grad_norm": 1.733250431300054, "learning_rate": 0.00010058616647127786, "loss": 3.84586501121521, "step": 858, "token_acc": 0.2268165269153445 }, { "epoch": 0.5036646144825564, "grad_norm": 1.7962357758711094, "learning_rate": 0.00010070339976553342, "loss": 3.859968662261963, "step": 859, "token_acc": 0.22739971672492432 }, { "epoch": 0.5042509527997655, "grad_norm": 1.968146608694918, "learning_rate": 0.000100820633059789, "loss": 3.840559959411621, "step": 860, "token_acc": 0.22714941899052896 }, { "epoch": 0.5048372911169745, "grad_norm": 2.029459209917352, "learning_rate": 0.00010093786635404456, "loss": 3.8006794452667236, "step": 861, "token_acc": 0.23248480901095095 }, { "epoch": 0.5054236294341835, "grad_norm": 1.9457035827435212, "learning_rate": 0.00010105509964830011, "loss": 3.8426709175109863, "step": 862, "token_acc": 0.22913558299736517 }, { "epoch": 0.5060099677513925, "grad_norm": 1.8138451516963194, "learning_rate": 0.00010117233294255569, "loss": 3.8550000190734863, "step": 863, "token_acc": 0.22490673112290474 }, { "epoch": 0.5065963060686016, "grad_norm": 1.7123521094464096, "learning_rate": 0.00010128956623681125, "loss": 3.871476411819458, "step": 864, "token_acc": 0.2238877891227339 }, { "epoch": 0.5071826443858106, "grad_norm": 1.964610991944096, "learning_rate": 0.00010140679953106683, "loss": 3.8169126510620117, "step": 865, "token_acc": 0.23134838785521156 }, { "epoch": 0.5077689827030196, "grad_norm": 1.7420097589471077, "learning_rate": 0.00010152403282532239, "loss": 3.8456084728240967, "step": 866, "token_acc": 0.2285793508997261 }, { "epoch": 0.5083553210202286, "grad_norm": 2.094009505514912, "learning_rate": 0.00010164126611957796, "loss": 3.7970852851867676, "step": 867, "token_acc": 0.23256878923058702 }, { "epoch": 0.5089416593374377, "grad_norm": 1.4531493423760264, "learning_rate": 0.00010175849941383353, "loss": 3.7433066368103027, "step": 868, "token_acc": 0.23901962450089204 }, { "epoch": 0.5095279976546467, "grad_norm": 2.4676101825746053, "learning_rate": 0.0001018757327080891, "loss": 3.8639516830444336, "step": 869, "token_acc": 0.22586000257010755 }, { "epoch": 0.5101143359718557, "grad_norm": 1.4596223269244695, "learning_rate": 0.00010199296600234467, "loss": 3.8177967071533203, "step": 870, "token_acc": 0.23146747739655388 }, { "epoch": 0.5107006742890647, "grad_norm": 2.086274189884841, "learning_rate": 0.00010211019929660024, "loss": 3.8644027709960938, "step": 871, "token_acc": 0.22530568621330047 }, { "epoch": 0.5112870126062738, "grad_norm": 1.4906891681833554, "learning_rate": 0.0001022274325908558, "loss": 3.7592356204986572, "step": 872, "token_acc": 0.23870524787070854 }, { "epoch": 0.5118733509234829, "grad_norm": 1.813031339407144, "learning_rate": 0.00010234466588511138, "loss": 3.8510303497314453, "step": 873, "token_acc": 0.2251747542497511 }, { "epoch": 0.5124596892406919, "grad_norm": 1.9028976719121513, "learning_rate": 0.00010246189917936694, "loss": 3.790393352508545, "step": 874, "token_acc": 0.23384112795006262 }, { "epoch": 0.513046027557901, "grad_norm": 1.9782628570344742, "learning_rate": 0.00010257913247362252, "loss": 3.766150951385498, "step": 875, "token_acc": 0.23384121362014829 }, { "epoch": 0.51363236587511, "grad_norm": 1.5214097874722905, "learning_rate": 0.00010269636576787808, "loss": 3.776675224304199, "step": 876, "token_acc": 0.2348864863742107 }, { "epoch": 0.514218704192319, "grad_norm": 1.7257292481633826, "learning_rate": 0.00010281359906213366, "loss": 3.7921090126037598, "step": 877, "token_acc": 0.2324697342975973 }, { "epoch": 0.514805042509528, "grad_norm": 1.8749626116343154, "learning_rate": 0.00010293083235638922, "loss": 3.7958459854125977, "step": 878, "token_acc": 0.2325338070225837 }, { "epoch": 0.5153913808267371, "grad_norm": 1.7442970438803282, "learning_rate": 0.0001030480656506448, "loss": 3.835383653640747, "step": 879, "token_acc": 0.22866630324251225 }, { "epoch": 0.5159777191439461, "grad_norm": 1.6921559194705589, "learning_rate": 0.00010316529894490035, "loss": 3.7514145374298096, "step": 880, "token_acc": 0.23953068970578983 }, { "epoch": 0.5165640574611551, "grad_norm": 1.7685275933137885, "learning_rate": 0.00010328253223915591, "loss": 3.811249256134033, "step": 881, "token_acc": 0.22951919281562566 }, { "epoch": 0.5171503957783641, "grad_norm": 1.5976335129970325, "learning_rate": 0.00010339976553341149, "loss": 3.76723313331604, "step": 882, "token_acc": 0.23343578348406657 }, { "epoch": 0.5177367340955732, "grad_norm": 1.446161383733489, "learning_rate": 0.00010351699882766705, "loss": 3.8000478744506836, "step": 883, "token_acc": 0.23038577026820703 }, { "epoch": 0.5183230724127822, "grad_norm": 2.6438643568947913, "learning_rate": 0.00010363423212192263, "loss": 3.7621946334838867, "step": 884, "token_acc": 0.2357914611851902 }, { "epoch": 0.5189094107299912, "grad_norm": 1.7984800858785115, "learning_rate": 0.00010375146541617819, "loss": 3.7875049114227295, "step": 885, "token_acc": 0.2339973790343062 }, { "epoch": 0.5194957490472002, "grad_norm": 2.057739881614775, "learning_rate": 0.00010386869871043376, "loss": 3.8160524368286133, "step": 886, "token_acc": 0.2294129875871361 }, { "epoch": 0.5200820873644093, "grad_norm": 1.3221060403789813, "learning_rate": 0.00010398593200468934, "loss": 3.811464548110962, "step": 887, "token_acc": 0.2297662252217836 }, { "epoch": 0.5206684256816183, "grad_norm": 2.579994915319218, "learning_rate": 0.0001041031652989449, "loss": 3.7498552799224854, "step": 888, "token_acc": 0.23858625456937563 }, { "epoch": 0.5212547639988273, "grad_norm": 1.6194049991898507, "learning_rate": 0.00010422039859320048, "loss": 3.7258219718933105, "step": 889, "token_acc": 0.24174800448857742 }, { "epoch": 0.5218411023160363, "grad_norm": 2.2252441754806904, "learning_rate": 0.00010433763188745604, "loss": 3.9231722354888916, "step": 890, "token_acc": 0.22072242471334583 }, { "epoch": 0.5224274406332454, "grad_norm": 1.508934049123201, "learning_rate": 0.00010445486518171162, "loss": 3.821866989135742, "step": 891, "token_acc": 0.2280791724033397 }, { "epoch": 0.5230137789504544, "grad_norm": 1.9029437974971617, "learning_rate": 0.00010457209847596718, "loss": 3.783266305923462, "step": 892, "token_acc": 0.2320349860006442 }, { "epoch": 0.5236001172676634, "grad_norm": 2.061112201745218, "learning_rate": 0.00010468933177022276, "loss": 3.8460941314697266, "step": 893, "token_acc": 0.22568094427846033 }, { "epoch": 0.5241864555848724, "grad_norm": 1.6800004489068228, "learning_rate": 0.00010480656506447832, "loss": 3.8569884300231934, "step": 894, "token_acc": 0.22416407289258547 }, { "epoch": 0.5247727939020815, "grad_norm": 1.876459454000299, "learning_rate": 0.0001049237983587339, "loss": 3.776334524154663, "step": 895, "token_acc": 0.23372316963315007 }, { "epoch": 0.5253591322192905, "grad_norm": 2.196412564145204, "learning_rate": 0.00010504103165298946, "loss": 3.7707877159118652, "step": 896, "token_acc": 0.23543779904921974 }, { "epoch": 0.5259454705364995, "grad_norm": 1.262367050458971, "learning_rate": 0.00010515826494724504, "loss": 3.8026881217956543, "step": 897, "token_acc": 0.23261858518295406 }, { "epoch": 0.5265318088537085, "grad_norm": 2.003904875585833, "learning_rate": 0.00010527549824150059, "loss": 3.8653600215911865, "step": 898, "token_acc": 0.22501260742304274 }, { "epoch": 0.5271181471709177, "grad_norm": 2.025059662809299, "learning_rate": 0.00010539273153575615, "loss": 3.8105030059814453, "step": 899, "token_acc": 0.23099826192095518 }, { "epoch": 0.5277044854881267, "grad_norm": 1.9684297037770544, "learning_rate": 0.00010550996483001172, "loss": 3.772536277770996, "step": 900, "token_acc": 0.2345436096073513 }, { "epoch": 0.5282908238053357, "grad_norm": 1.675156861231727, "learning_rate": 0.00010562719812426729, "loss": 3.7684578895568848, "step": 901, "token_acc": 0.23502143355597646 }, { "epoch": 0.5288771621225447, "grad_norm": 1.7973141739107814, "learning_rate": 0.00010574443141852286, "loss": 3.8040270805358887, "step": 902, "token_acc": 0.23088953797132236 }, { "epoch": 0.5294635004397538, "grad_norm": 2.20925332979252, "learning_rate": 0.00010586166471277843, "loss": 3.8547840118408203, "step": 903, "token_acc": 0.22518435723729946 }, { "epoch": 0.5300498387569628, "grad_norm": 1.4999529571292936, "learning_rate": 0.000105978898007034, "loss": 3.8437447547912598, "step": 904, "token_acc": 0.22553469606208443 }, { "epoch": 0.5306361770741718, "grad_norm": 1.8571005394765723, "learning_rate": 0.00010609613130128957, "loss": 3.819962978363037, "step": 905, "token_acc": 0.2276766843785414 }, { "epoch": 0.5312225153913809, "grad_norm": 1.92435176695105, "learning_rate": 0.00010621336459554514, "loss": 3.8028135299682617, "step": 906, "token_acc": 0.2323573534194266 }, { "epoch": 0.5318088537085899, "grad_norm": 1.578137152214784, "learning_rate": 0.0001063305978898007, "loss": 3.7573342323303223, "step": 907, "token_acc": 0.2364324050767776 }, { "epoch": 0.5323951920257989, "grad_norm": 1.9135186047711916, "learning_rate": 0.00010644783118405628, "loss": 3.7947237491607666, "step": 908, "token_acc": 0.2321420236187394 }, { "epoch": 0.5329815303430079, "grad_norm": 1.9462589392980687, "learning_rate": 0.00010656506447831184, "loss": 3.7544782161712646, "step": 909, "token_acc": 0.23690342058916833 }, { "epoch": 0.533567868660217, "grad_norm": 1.2507640993549884, "learning_rate": 0.00010668229777256742, "loss": 3.808830738067627, "step": 910, "token_acc": 0.23140994503387446 }, { "epoch": 0.534154206977426, "grad_norm": 2.378214650949949, "learning_rate": 0.00010679953106682298, "loss": 3.867976188659668, "step": 911, "token_acc": 0.22268661018519473 }, { "epoch": 0.534740545294635, "grad_norm": 1.3070852369055637, "learning_rate": 0.00010691676436107856, "loss": 3.8175320625305176, "step": 912, "token_acc": 0.2271232033616834 }, { "epoch": 0.535326883611844, "grad_norm": 1.8245769629936717, "learning_rate": 0.00010703399765533414, "loss": 3.8083512783050537, "step": 913, "token_acc": 0.23271369949745815 }, { "epoch": 0.5359132219290531, "grad_norm": 1.539603236136721, "learning_rate": 0.0001071512309495897, "loss": 3.7720625400543213, "step": 914, "token_acc": 0.23479382305773572 }, { "epoch": 0.5364995602462621, "grad_norm": 2.1312462253701034, "learning_rate": 0.00010726846424384527, "loss": 3.767002582550049, "step": 915, "token_acc": 0.2339139624387474 }, { "epoch": 0.5370858985634711, "grad_norm": 1.799797612861782, "learning_rate": 0.00010738569753810081, "loss": 3.755925178527832, "step": 916, "token_acc": 0.23522587046955107 }, { "epoch": 0.5376722368806801, "grad_norm": 1.3268815812318113, "learning_rate": 0.00010750293083235639, "loss": 3.7807416915893555, "step": 917, "token_acc": 0.23370628297782575 }, { "epoch": 0.5382585751978892, "grad_norm": 1.8272570883668895, "learning_rate": 0.00010762016412661195, "loss": 3.7635884284973145, "step": 918, "token_acc": 0.23440094899169633 }, { "epoch": 0.5388449135150982, "grad_norm": 1.6569650320141702, "learning_rate": 0.00010773739742086753, "loss": 3.8084869384765625, "step": 919, "token_acc": 0.23000301153747846 }, { "epoch": 0.5394312518323072, "grad_norm": 1.6493331312472133, "learning_rate": 0.0001078546307151231, "loss": 3.7704248428344727, "step": 920, "token_acc": 0.23372646514871298 }, { "epoch": 0.5400175901495162, "grad_norm": 1.901996980044487, "learning_rate": 0.00010797186400937866, "loss": 3.7993061542510986, "step": 921, "token_acc": 0.22834509795462757 }, { "epoch": 0.5406039284667253, "grad_norm": 1.857921370257786, "learning_rate": 0.00010808909730363424, "loss": 3.78391695022583, "step": 922, "token_acc": 0.23128040698530647 }, { "epoch": 0.5411902667839343, "grad_norm": 1.5643984865513767, "learning_rate": 0.0001082063305978898, "loss": 3.778407096862793, "step": 923, "token_acc": 0.23182526824975774 }, { "epoch": 0.5417766051011433, "grad_norm": 2.160622739821634, "learning_rate": 0.00010832356389214538, "loss": 3.8278236389160156, "step": 924, "token_acc": 0.2279319383069058 }, { "epoch": 0.5423629434183523, "grad_norm": 1.7577557152303356, "learning_rate": 0.00010844079718640094, "loss": 3.7881975173950195, "step": 925, "token_acc": 0.23241702532693262 }, { "epoch": 0.5429492817355615, "grad_norm": 1.6376321911289973, "learning_rate": 0.00010855803048065652, "loss": 3.774430751800537, "step": 926, "token_acc": 0.23250080286045907 }, { "epoch": 0.5435356200527705, "grad_norm": 2.2510626806122014, "learning_rate": 0.00010867526377491208, "loss": 3.8066251277923584, "step": 927, "token_acc": 0.22921893027637527 }, { "epoch": 0.5441219583699795, "grad_norm": 1.3507963593683292, "learning_rate": 0.00010879249706916766, "loss": 3.8082144260406494, "step": 928, "token_acc": 0.22806348944760885 }, { "epoch": 0.5447082966871885, "grad_norm": 1.721975309121372, "learning_rate": 0.00010890973036342322, "loss": 3.76438307762146, "step": 929, "token_acc": 0.23699217163945577 }, { "epoch": 0.5452946350043976, "grad_norm": 1.7543096211850333, "learning_rate": 0.0001090269636576788, "loss": 3.758983850479126, "step": 930, "token_acc": 0.23427981306851373 }, { "epoch": 0.5458809733216066, "grad_norm": 2.2392104181490744, "learning_rate": 0.00010914419695193436, "loss": 3.8164052963256836, "step": 931, "token_acc": 0.22810268298007255 }, { "epoch": 0.5464673116388156, "grad_norm": 1.3443931431814997, "learning_rate": 0.00010926143024618994, "loss": 3.7565994262695312, "step": 932, "token_acc": 0.2363223524834168 }, { "epoch": 0.5470536499560247, "grad_norm": 1.9892276651408798, "learning_rate": 0.00010937866354044548, "loss": 3.7771010398864746, "step": 933, "token_acc": 0.2334187228022037 }, { "epoch": 0.5476399882732337, "grad_norm": 1.422651020578365, "learning_rate": 0.00010949589683470105, "loss": 3.787266969680786, "step": 934, "token_acc": 0.23238941357290635 }, { "epoch": 0.5482263265904427, "grad_norm": 1.54850828933813, "learning_rate": 0.00010961313012895662, "loss": 3.7013561725616455, "step": 935, "token_acc": 0.243059228782932 }, { "epoch": 0.5488126649076517, "grad_norm": 1.7871858193673842, "learning_rate": 0.00010973036342321219, "loss": 3.786162853240967, "step": 936, "token_acc": 0.23171379610290174 }, { "epoch": 0.5493990032248608, "grad_norm": 1.5475729081770393, "learning_rate": 0.00010984759671746776, "loss": 3.7843947410583496, "step": 937, "token_acc": 0.23255325185737855 }, { "epoch": 0.5499853415420698, "grad_norm": 1.6485880047228505, "learning_rate": 0.00010996483001172333, "loss": 3.780978202819824, "step": 938, "token_acc": 0.23280722687956737 }, { "epoch": 0.5505716798592788, "grad_norm": 1.7559637306591538, "learning_rate": 0.0001100820633059789, "loss": 3.7709059715270996, "step": 939, "token_acc": 0.23403717382974898 }, { "epoch": 0.5511580181764878, "grad_norm": 2.0445619793785146, "learning_rate": 0.00011019929660023446, "loss": 3.835878610610962, "step": 940, "token_acc": 0.2242450152284803 }, { "epoch": 0.5517443564936969, "grad_norm": 1.71281684025806, "learning_rate": 0.00011031652989449004, "loss": 3.7487008571624756, "step": 941, "token_acc": 0.23653796922134923 }, { "epoch": 0.5523306948109059, "grad_norm": 2.0816977456445978, "learning_rate": 0.0001104337631887456, "loss": 3.805837392807007, "step": 942, "token_acc": 0.2290530772408096 }, { "epoch": 0.5529170331281149, "grad_norm": 1.3372348512750465, "learning_rate": 0.00011055099648300118, "loss": 3.74919056892395, "step": 943, "token_acc": 0.2353206636242987 }, { "epoch": 0.5535033714453239, "grad_norm": 2.3229828997542636, "learning_rate": 0.00011066822977725674, "loss": 3.7610578536987305, "step": 944, "token_acc": 0.23237212880888647 }, { "epoch": 0.554089709762533, "grad_norm": 1.6633473163118637, "learning_rate": 0.00011078546307151232, "loss": 3.7717862129211426, "step": 945, "token_acc": 0.23228038610099191 }, { "epoch": 0.554676048079742, "grad_norm": 1.6284272767111407, "learning_rate": 0.0001109026963657679, "loss": 3.761871337890625, "step": 946, "token_acc": 0.23458261587264753 }, { "epoch": 0.555262386396951, "grad_norm": 1.812537473430118, "learning_rate": 0.00011101992966002346, "loss": 3.7693562507629395, "step": 947, "token_acc": 0.23165263188819182 }, { "epoch": 0.55584872471416, "grad_norm": 2.014904504689635, "learning_rate": 0.00011113716295427903, "loss": 3.7694320678710938, "step": 948, "token_acc": 0.23309949999466945 }, { "epoch": 0.5564350630313691, "grad_norm": 2.0184673137181286, "learning_rate": 0.0001112543962485346, "loss": 3.763669967651367, "step": 949, "token_acc": 0.23315400723497204 }, { "epoch": 0.5570214013485781, "grad_norm": 1.7798335442382502, "learning_rate": 0.00011137162954279017, "loss": 3.7868223190307617, "step": 950, "token_acc": 0.2307915083561389 }, { "epoch": 0.5576077396657871, "grad_norm": 2.453680530842864, "learning_rate": 0.00011148886283704571, "loss": 3.7713940143585205, "step": 951, "token_acc": 0.23199098815326546 }, { "epoch": 0.5581940779829961, "grad_norm": 1.49336162751524, "learning_rate": 0.00011160609613130129, "loss": 3.7290735244750977, "step": 952, "token_acc": 0.23638256641437336 }, { "epoch": 0.5587804163002053, "grad_norm": 1.921848350128607, "learning_rate": 0.00011172332942555686, "loss": 3.79306960105896, "step": 953, "token_acc": 0.2287944816136972 }, { "epoch": 0.5593667546174143, "grad_norm": 1.716134004776264, "learning_rate": 0.00011184056271981242, "loss": 3.704774856567383, "step": 954, "token_acc": 0.23754556342766878 }, { "epoch": 0.5599530929346233, "grad_norm": 1.8374662527703833, "learning_rate": 0.000111957796014068, "loss": 3.708160400390625, "step": 955, "token_acc": 0.23707179071339932 }, { "epoch": 0.5605394312518323, "grad_norm": 1.8095621647414326, "learning_rate": 0.00011207502930832356, "loss": 3.649742603302002, "step": 956, "token_acc": 0.24341598602930808 }, { "epoch": 0.5611257695690414, "grad_norm": 1.3847027472870947, "learning_rate": 0.00011219226260257914, "loss": 3.6778266429901123, "step": 957, "token_acc": 0.2415768115942029 }, { "epoch": 0.5617121078862504, "grad_norm": 2.056697609684765, "learning_rate": 0.0001123094958968347, "loss": 3.6843035221099854, "step": 958, "token_acc": 0.24111701958960155 }, { "epoch": 0.5622984462034594, "grad_norm": 2.518201220929928, "learning_rate": 0.00011242672919109028, "loss": 3.7458324432373047, "step": 959, "token_acc": 0.2350365776766022 }, { "epoch": 0.5628847845206685, "grad_norm": 2.2554232379779724, "learning_rate": 0.00011254396248534584, "loss": 3.723310947418213, "step": 960, "token_acc": 0.23437844569264096 }, { "epoch": 0.5634711228378775, "grad_norm": 1.402262058604264, "learning_rate": 0.00011266119577960142, "loss": 3.7047066688537598, "step": 961, "token_acc": 0.23892739345075129 }, { "epoch": 0.5640574611550865, "grad_norm": 2.0569922023113025, "learning_rate": 0.00011277842907385698, "loss": 3.7138237953186035, "step": 962, "token_acc": 0.23666115056891182 }, { "epoch": 0.5646437994722955, "grad_norm": 2.071576549804621, "learning_rate": 0.00011289566236811256, "loss": 3.661585807800293, "step": 963, "token_acc": 0.24430577383601143 }, { "epoch": 0.5652301377895046, "grad_norm": 1.6598968721395821, "learning_rate": 0.00011301289566236812, "loss": 3.656017303466797, "step": 964, "token_acc": 0.24012215520433164 }, { "epoch": 0.5658164761067136, "grad_norm": 2.4405248472422114, "learning_rate": 0.0001131301289566237, "loss": 3.6877269744873047, "step": 965, "token_acc": 0.23853472562390013 }, { "epoch": 0.5664028144239226, "grad_norm": 1.5687434808702136, "learning_rate": 0.00011324736225087926, "loss": 3.691927909851074, "step": 966, "token_acc": 0.24020295742562833 }, { "epoch": 0.5669891527411316, "grad_norm": 1.9184569816695656, "learning_rate": 0.00011336459554513483, "loss": 3.737863063812256, "step": 967, "token_acc": 0.23096642686474808 }, { "epoch": 0.5675754910583407, "grad_norm": 1.3414726313118897, "learning_rate": 0.0001134818288393904, "loss": 3.6842947006225586, "step": 968, "token_acc": 0.2379509556552493 }, { "epoch": 0.5681618293755497, "grad_norm": 2.287396406370554, "learning_rate": 0.00011359906213364595, "loss": 3.6681060791015625, "step": 969, "token_acc": 0.24232340861891272 }, { "epoch": 0.5687481676927587, "grad_norm": 1.5438155016982122, "learning_rate": 0.00011371629542790152, "loss": 3.7281970977783203, "step": 970, "token_acc": 0.23182750012187267 }, { "epoch": 0.5693345060099677, "grad_norm": 1.7546438757153469, "learning_rate": 0.00011383352872215709, "loss": 3.6396799087524414, "step": 971, "token_acc": 0.24385292367941566 }, { "epoch": 0.5699208443271768, "grad_norm": 2.070338535019644, "learning_rate": 0.00011395076201641266, "loss": 3.6432807445526123, "step": 972, "token_acc": 0.24361332245036413 }, { "epoch": 0.5705071826443858, "grad_norm": 1.3792662791881012, "learning_rate": 0.00011406799531066822, "loss": 3.674607515335083, "step": 973, "token_acc": 0.23749031633744447 }, { "epoch": 0.5710935209615948, "grad_norm": 1.6276588527399602, "learning_rate": 0.0001141852286049238, "loss": 3.6293632984161377, "step": 974, "token_acc": 0.2411245255372049 }, { "epoch": 0.5716798592788038, "grad_norm": 2.0925242589180106, "learning_rate": 0.00011430246189917936, "loss": 3.6402673721313477, "step": 975, "token_acc": 0.24347585106603403 }, { "epoch": 0.5722661975960129, "grad_norm": 2.140237356216463, "learning_rate": 0.00011441969519343494, "loss": 3.604214668273926, "step": 976, "token_acc": 0.24692300579867119 }, { "epoch": 0.5728525359132219, "grad_norm": 1.7583526965991143, "learning_rate": 0.0001145369284876905, "loss": 3.6000659465789795, "step": 977, "token_acc": 0.24751831368944927 }, { "epoch": 0.5734388742304309, "grad_norm": 1.7354266205377469, "learning_rate": 0.00011465416178194608, "loss": 3.615461587905884, "step": 978, "token_acc": 0.24299503445480342 }, { "epoch": 0.5740252125476399, "grad_norm": 1.8684657430988447, "learning_rate": 0.00011477139507620166, "loss": 3.6727147102355957, "step": 979, "token_acc": 0.23572003625371524 }, { "epoch": 0.574611550864849, "grad_norm": 2.0169862020881597, "learning_rate": 0.00011488862837045722, "loss": 3.6161770820617676, "step": 980, "token_acc": 0.24400889121338912 }, { "epoch": 0.575197889182058, "grad_norm": 2.086021665945833, "learning_rate": 0.0001150058616647128, "loss": 3.685018539428711, "step": 981, "token_acc": 0.23590091302457505 }, { "epoch": 0.575784227499267, "grad_norm": 1.7657187461744435, "learning_rate": 0.00011512309495896836, "loss": 3.6133508682250977, "step": 982, "token_acc": 0.24450684730965278 }, { "epoch": 0.576370565816476, "grad_norm": 1.6640375825801352, "learning_rate": 0.00011524032825322393, "loss": 3.644221782684326, "step": 983, "token_acc": 0.24128423865563492 }, { "epoch": 0.5769569041336852, "grad_norm": 1.6260652624574337, "learning_rate": 0.0001153575615474795, "loss": 3.55635929107666, "step": 984, "token_acc": 0.2501803600928462 }, { "epoch": 0.5775432424508942, "grad_norm": 1.9391807019065288, "learning_rate": 0.00011547479484173507, "loss": 3.57075834274292, "step": 985, "token_acc": 0.24781159366696073 }, { "epoch": 0.5781295807681032, "grad_norm": 1.5168846814109316, "learning_rate": 0.00011559202813599064, "loss": 3.5659799575805664, "step": 986, "token_acc": 0.25015804945894404 }, { "epoch": 0.5787159190853123, "grad_norm": 2.253670056227686, "learning_rate": 0.00011570926143024618, "loss": 3.651451349258423, "step": 987, "token_acc": 0.23741272833647534 }, { "epoch": 0.5793022574025213, "grad_norm": 1.5832478240738754, "learning_rate": 0.00011582649472450176, "loss": 3.618858814239502, "step": 988, "token_acc": 0.24266613656488883 }, { "epoch": 0.5798885957197303, "grad_norm": 1.9384595711708528, "learning_rate": 0.00011594372801875732, "loss": 3.591041326522827, "step": 989, "token_acc": 0.2462241108431388 }, { "epoch": 0.5804749340369393, "grad_norm": 1.9409775983104318, "learning_rate": 0.0001160609613130129, "loss": 3.6330628395080566, "step": 990, "token_acc": 0.24072135756398977 }, { "epoch": 0.5810612723541484, "grad_norm": 1.8649630902274321, "learning_rate": 0.00011617819460726846, "loss": 3.5837771892547607, "step": 991, "token_acc": 0.24744177658697444 }, { "epoch": 0.5816476106713574, "grad_norm": 2.007962694836894, "learning_rate": 0.00011629542790152404, "loss": 3.622023105621338, "step": 992, "token_acc": 0.240656168937927 }, { "epoch": 0.5822339489885664, "grad_norm": 1.6953485128758858, "learning_rate": 0.0001164126611957796, "loss": 3.644559860229492, "step": 993, "token_acc": 0.23858164042858698 }, { "epoch": 0.5828202873057754, "grad_norm": 1.8791298938661651, "learning_rate": 0.00011652989449003518, "loss": 3.5799875259399414, "step": 994, "token_acc": 0.24684730786184395 }, { "epoch": 0.5834066256229845, "grad_norm": 1.9800222468744946, "learning_rate": 0.00011664712778429074, "loss": 3.588566780090332, "step": 995, "token_acc": 0.2442843915584062 }, { "epoch": 0.5839929639401935, "grad_norm": 1.6936884817749778, "learning_rate": 0.00011676436107854632, "loss": 3.603142261505127, "step": 996, "token_acc": 0.2431729586761323 }, { "epoch": 0.5845793022574025, "grad_norm": 1.917207538491688, "learning_rate": 0.00011688159437280188, "loss": 3.5945591926574707, "step": 997, "token_acc": 0.24411139261113315 }, { "epoch": 0.5851656405746115, "grad_norm": 1.8963396992981865, "learning_rate": 0.00011699882766705746, "loss": 3.60992431640625, "step": 998, "token_acc": 0.2410411698255303 }, { "epoch": 0.5857519788918206, "grad_norm": 2.114140147694409, "learning_rate": 0.00011711606096131302, "loss": 3.5892937183380127, "step": 999, "token_acc": 0.24507057797210516 }, { "epoch": 0.5863383172090296, "grad_norm": 1.5279872071452991, "learning_rate": 0.0001172332942555686, "loss": 3.5577597618103027, "step": 1000, "token_acc": 0.24804607157548333 }, { "epoch": 0.5869246555262386, "grad_norm": 1.8380089007207305, "learning_rate": 0.00011735052754982416, "loss": 3.5900192260742188, "step": 1001, "token_acc": 0.24536310495550057 }, { "epoch": 0.5875109938434476, "grad_norm": 1.5345365657474852, "learning_rate": 0.00011746776084407973, "loss": 3.629021406173706, "step": 1002, "token_acc": 0.24034049022633877 }, { "epoch": 0.5880973321606567, "grad_norm": 2.0175488502446672, "learning_rate": 0.0001175849941383353, "loss": 3.6154723167419434, "step": 1003, "token_acc": 0.2418381409494844 }, { "epoch": 0.5886836704778657, "grad_norm": 1.6442053720874912, "learning_rate": 0.00011770222743259087, "loss": 3.567788600921631, "step": 1004, "token_acc": 0.2463777426236573 }, { "epoch": 0.5892700087950747, "grad_norm": 2.3014889718091567, "learning_rate": 0.00011781946072684642, "loss": 3.59326171875, "step": 1005, "token_acc": 0.24266820304692757 }, { "epoch": 0.5898563471122837, "grad_norm": 1.6215891741265107, "learning_rate": 0.00011793669402110198, "loss": 3.587190628051758, "step": 1006, "token_acc": 0.24356150170363747 }, { "epoch": 0.5904426854294929, "grad_norm": 2.1067037835523723, "learning_rate": 0.00011805392731535756, "loss": 3.5938265323638916, "step": 1007, "token_acc": 0.2460848367664104 }, { "epoch": 0.5910290237467019, "grad_norm": 2.0533641055689604, "learning_rate": 0.00011817116060961312, "loss": 3.5194754600524902, "step": 1008, "token_acc": 0.252768340254184 }, { "epoch": 0.5916153620639109, "grad_norm": 1.851628456518367, "learning_rate": 0.0001182883939038687, "loss": 3.5379581451416016, "step": 1009, "token_acc": 0.25209016208226664 }, { "epoch": 0.5922017003811199, "grad_norm": 1.9423931309802098, "learning_rate": 0.00011840562719812426, "loss": 3.5537705421447754, "step": 1010, "token_acc": 0.24610016526699166 }, { "epoch": 0.592788038698329, "grad_norm": 1.4879750769961273, "learning_rate": 0.00011852286049237984, "loss": 3.5941834449768066, "step": 1011, "token_acc": 0.24298500492917552 }, { "epoch": 0.593374377015538, "grad_norm": 1.7865343627672126, "learning_rate": 0.00011864009378663542, "loss": 3.5110604763031006, "step": 1012, "token_acc": 0.2530759994172055 }, { "epoch": 0.593960715332747, "grad_norm": 2.2710812531249096, "learning_rate": 0.00011875732708089098, "loss": 3.5475857257843018, "step": 1013, "token_acc": 0.24811421104637663 }, { "epoch": 0.594547053649956, "grad_norm": 1.7002491154210158, "learning_rate": 0.00011887456037514655, "loss": 3.5835108757019043, "step": 1014, "token_acc": 0.24467408543787103 }, { "epoch": 0.5951333919671651, "grad_norm": 2.2862630423372297, "learning_rate": 0.00011899179366940212, "loss": 3.5338692665100098, "step": 1015, "token_acc": 0.2500873650439978 }, { "epoch": 0.5957197302843741, "grad_norm": 1.3693836962539019, "learning_rate": 0.0001191090269636577, "loss": 3.564061164855957, "step": 1016, "token_acc": 0.24591350811180113 }, { "epoch": 0.5963060686015831, "grad_norm": 2.4472082545421423, "learning_rate": 0.00011922626025791326, "loss": 3.5913989543914795, "step": 1017, "token_acc": 0.24268384782403474 }, { "epoch": 0.5968924069187922, "grad_norm": 1.405334717963625, "learning_rate": 0.00011934349355216883, "loss": 3.565342903137207, "step": 1018, "token_acc": 0.24828950467910144 }, { "epoch": 0.5974787452360012, "grad_norm": 2.331025007443443, "learning_rate": 0.0001194607268464244, "loss": 3.5455679893493652, "step": 1019, "token_acc": 0.24851790871264526 }, { "epoch": 0.5980650835532102, "grad_norm": 1.7212425563401157, "learning_rate": 0.00011957796014067997, "loss": 3.5542187690734863, "step": 1020, "token_acc": 0.2487347733348725 }, { "epoch": 0.5986514218704192, "grad_norm": 1.4235933517609585, "learning_rate": 0.00011969519343493553, "loss": 3.5619888305664062, "step": 1021, "token_acc": 0.2462524929210144 }, { "epoch": 0.5992377601876283, "grad_norm": 1.437780044595699, "learning_rate": 0.00011981242672919108, "loss": 3.528001308441162, "step": 1022, "token_acc": 0.2525707935525284 }, { "epoch": 0.5998240985048373, "grad_norm": 1.8358999290776543, "learning_rate": 0.00011992966002344666, "loss": 3.5367085933685303, "step": 1023, "token_acc": 0.2500249428939008 }, { "epoch": 0.6004104368220463, "grad_norm": 1.7309382950918344, "learning_rate": 0.00012004689331770222, "loss": 3.5310699939727783, "step": 1024, "token_acc": 0.2503434770126728 }, { "epoch": 0.6009967751392553, "grad_norm": 1.7271494701512455, "learning_rate": 0.0001201641266119578, "loss": 3.519312620162964, "step": 1025, "token_acc": 0.24971892370711457 }, { "epoch": 0.6015831134564644, "grad_norm": 1.6427595325413202, "learning_rate": 0.00012028135990621336, "loss": 3.5315117835998535, "step": 1026, "token_acc": 0.24985313527576236 }, { "epoch": 0.6021694517736734, "grad_norm": 2.4052887256712334, "learning_rate": 0.00012039859320046894, "loss": 3.54502534866333, "step": 1027, "token_acc": 0.24903145572272464 }, { "epoch": 0.6027557900908824, "grad_norm": 1.6272249422076415, "learning_rate": 0.0001205158264947245, "loss": 3.530763626098633, "step": 1028, "token_acc": 0.25066152732294644 }, { "epoch": 0.6033421284080914, "grad_norm": 1.6975485142504392, "learning_rate": 0.00012063305978898008, "loss": 3.5671520233154297, "step": 1029, "token_acc": 0.24636307143198585 }, { "epoch": 0.6039284667253005, "grad_norm": 1.9413921795179907, "learning_rate": 0.00012075029308323564, "loss": 3.528944253921509, "step": 1030, "token_acc": 0.24968577844973627 }, { "epoch": 0.6045148050425095, "grad_norm": 1.4661973754385087, "learning_rate": 0.00012086752637749122, "loss": 3.5883588790893555, "step": 1031, "token_acc": 0.24417072832176165 }, { "epoch": 0.6051011433597185, "grad_norm": 1.3027521205746548, "learning_rate": 0.00012098475967174678, "loss": 3.521371841430664, "step": 1032, "token_acc": 0.2522332377904402 }, { "epoch": 0.6056874816769275, "grad_norm": 1.564390745879449, "learning_rate": 0.00012110199296600236, "loss": 3.545868158340454, "step": 1033, "token_acc": 0.24922245260663506 }, { "epoch": 0.6062738199941367, "grad_norm": 2.0398894688915297, "learning_rate": 0.00012121922626025792, "loss": 3.5891737937927246, "step": 1034, "token_acc": 0.24372606620154225 }, { "epoch": 0.6068601583113457, "grad_norm": 1.7606517882190575, "learning_rate": 0.0001213364595545135, "loss": 3.5038208961486816, "step": 1035, "token_acc": 0.2536491752291313 }, { "epoch": 0.6074464966285547, "grad_norm": 2.333883518702912, "learning_rate": 0.00012145369284876906, "loss": 3.5442709922790527, "step": 1036, "token_acc": 0.24726089257643302 }, { "epoch": 0.6080328349457637, "grad_norm": 1.114544756021895, "learning_rate": 0.00012157092614302463, "loss": 3.54575514793396, "step": 1037, "token_acc": 0.2496079379540105 }, { "epoch": 0.6086191732629728, "grad_norm": 2.5881243038473682, "learning_rate": 0.0001216881594372802, "loss": 3.5461928844451904, "step": 1038, "token_acc": 0.2511278234838494 }, { "epoch": 0.6092055115801818, "grad_norm": 1.5597162463514849, "learning_rate": 0.00012180539273153577, "loss": 3.5267446041107178, "step": 1039, "token_acc": 0.25204129706155526 }, { "epoch": 0.6097918498973908, "grad_norm": 2.516686640525285, "learning_rate": 0.00012192262602579132, "loss": 3.530693531036377, "step": 1040, "token_acc": 0.25008318395998586 }, { "epoch": 0.6103781882145998, "grad_norm": 1.6850491492445012, "learning_rate": 0.00012203985932004688, "loss": 3.5639073848724365, "step": 1041, "token_acc": 0.24865802386467173 }, { "epoch": 0.6109645265318089, "grad_norm": 1.4679810620191514, "learning_rate": 0.00012215709261430245, "loss": 3.519987106323242, "step": 1042, "token_acc": 0.25096974597261174 }, { "epoch": 0.6115508648490179, "grad_norm": 1.5092842726632996, "learning_rate": 0.00012227432590855802, "loss": 3.550071954727173, "step": 1043, "token_acc": 0.24859595676997523 }, { "epoch": 0.6121372031662269, "grad_norm": 1.9934694210660846, "learning_rate": 0.0001223915592028136, "loss": 3.4802637100219727, "step": 1044, "token_acc": 0.2578290441368231 }, { "epoch": 0.612723541483436, "grad_norm": 2.060691871395609, "learning_rate": 0.00012250879249706918, "loss": 3.5283963680267334, "step": 1045, "token_acc": 0.24987356309728265 }, { "epoch": 0.613309879800645, "grad_norm": 1.610775755258291, "learning_rate": 0.00012262602579132475, "loss": 3.513293981552124, "step": 1046, "token_acc": 0.2521797858335686 }, { "epoch": 0.613896218117854, "grad_norm": 1.567077117287682, "learning_rate": 0.0001227432590855803, "loss": 3.5368242263793945, "step": 1047, "token_acc": 0.24913905961650692 }, { "epoch": 0.614482556435063, "grad_norm": 1.2397506038477286, "learning_rate": 0.00012286049237983588, "loss": 3.4795591831207275, "step": 1048, "token_acc": 0.2553865767673025 }, { "epoch": 0.6150688947522721, "grad_norm": 2.040014403136, "learning_rate": 0.00012297772567409145, "loss": 3.528956651687622, "step": 1049, "token_acc": 0.25158624874025626 }, { "epoch": 0.6156552330694811, "grad_norm": 1.3637790558172789, "learning_rate": 0.00012309495896834703, "loss": 3.5730528831481934, "step": 1050, "token_acc": 0.24380263666934499 }, { "epoch": 0.6162415713866901, "grad_norm": 2.0188828791662043, "learning_rate": 0.00012321219226260258, "loss": 3.4821696281433105, "step": 1051, "token_acc": 0.2546078234599534 }, { "epoch": 0.6168279097038991, "grad_norm": 1.6221663290511539, "learning_rate": 0.00012332942555685816, "loss": 3.5753936767578125, "step": 1052, "token_acc": 0.24374691758948086 }, { "epoch": 0.6174142480211082, "grad_norm": 1.3056918655843865, "learning_rate": 0.00012344665885111373, "loss": 3.489816188812256, "step": 1053, "token_acc": 0.2523561981796686 }, { "epoch": 0.6180005863383172, "grad_norm": 1.7606388602798402, "learning_rate": 0.0001235638921453693, "loss": 3.536217212677002, "step": 1054, "token_acc": 0.24904610632801868 }, { "epoch": 0.6185869246555262, "grad_norm": 1.8754227219939648, "learning_rate": 0.00012368112543962486, "loss": 3.4936347007751465, "step": 1055, "token_acc": 0.25389181524029886 }, { "epoch": 0.6191732629727352, "grad_norm": 1.627350070674053, "learning_rate": 0.00012379835873388043, "loss": 3.5174760818481445, "step": 1056, "token_acc": 0.2514314801821509 }, { "epoch": 0.6197596012899443, "grad_norm": 2.038677065727236, "learning_rate": 0.000123915592028136, "loss": 3.519094944000244, "step": 1057, "token_acc": 0.2526970338486536 }, { "epoch": 0.6203459396071533, "grad_norm": 2.1219571998137474, "learning_rate": 0.00012403282532239156, "loss": 3.512622833251953, "step": 1058, "token_acc": 0.25292715119002 }, { "epoch": 0.6209322779243623, "grad_norm": 1.299068027420354, "learning_rate": 0.00012415005861664714, "loss": 3.5118026733398438, "step": 1059, "token_acc": 0.2518489933332446 }, { "epoch": 0.6215186162415713, "grad_norm": 1.7519605670437755, "learning_rate": 0.00012426729191090268, "loss": 3.5191869735717773, "step": 1060, "token_acc": 0.2499653107622026 }, { "epoch": 0.6221049545587805, "grad_norm": 1.8271166639104148, "learning_rate": 0.00012438452520515826, "loss": 3.5297532081604004, "step": 1061, "token_acc": 0.2502845398745261 }, { "epoch": 0.6226912928759895, "grad_norm": 1.8922368681259265, "learning_rate": 0.00012450175849941384, "loss": 3.515394687652588, "step": 1062, "token_acc": 0.2519606520606354 }, { "epoch": 0.6232776311931985, "grad_norm": 1.539420971927679, "learning_rate": 0.0001246189917936694, "loss": 3.4920270442962646, "step": 1063, "token_acc": 0.2542557358840476 }, { "epoch": 0.6238639695104075, "grad_norm": 1.9999533787208568, "learning_rate": 0.00012473622508792496, "loss": 3.5001182556152344, "step": 1064, "token_acc": 0.25403221544608034 }, { "epoch": 0.6244503078276166, "grad_norm": 1.7162620503065502, "learning_rate": 0.00012485345838218054, "loss": 3.550278663635254, "step": 1065, "token_acc": 0.24859334770137545 }, { "epoch": 0.6250366461448256, "grad_norm": 2.0711543389115703, "learning_rate": 0.00012497069167643612, "loss": 3.5615978240966797, "step": 1066, "token_acc": 0.2443112978698021 }, { "epoch": 0.6256229844620346, "grad_norm": 1.358213927617702, "learning_rate": 0.0001250879249706917, "loss": 3.5284581184387207, "step": 1067, "token_acc": 0.24863853143998796 }, { "epoch": 0.6262093227792436, "grad_norm": 2.4227399183564216, "learning_rate": 0.00012520515826494724, "loss": 3.548661231994629, "step": 1068, "token_acc": 0.24701709041728034 }, { "epoch": 0.6267956610964527, "grad_norm": 1.6267457221115287, "learning_rate": 0.00012532239155920282, "loss": 3.444316864013672, "step": 1069, "token_acc": 0.26093052867080957 }, { "epoch": 0.6273819994136617, "grad_norm": 2.1591777111398778, "learning_rate": 0.0001254396248534584, "loss": 3.5324316024780273, "step": 1070, "token_acc": 0.2469512036434613 }, { "epoch": 0.6279683377308707, "grad_norm": 1.8230344124061826, "learning_rate": 0.00012555685814771397, "loss": 3.5422282218933105, "step": 1071, "token_acc": 0.24894721302814166 }, { "epoch": 0.6285546760480798, "grad_norm": 1.590985352686556, "learning_rate": 0.00012567409144196955, "loss": 3.5391898155212402, "step": 1072, "token_acc": 0.24830772853022545 }, { "epoch": 0.6291410143652888, "grad_norm": 2.5241418747300806, "learning_rate": 0.0001257913247362251, "loss": 3.5325958728790283, "step": 1073, "token_acc": 0.24972000347854503 }, { "epoch": 0.6297273526824978, "grad_norm": 1.186593901828575, "learning_rate": 0.00012590855803048067, "loss": 3.550349235534668, "step": 1074, "token_acc": 0.24905727527038732 }, { "epoch": 0.6303136909997068, "grad_norm": 1.8635284633897857, "learning_rate": 0.00012602579132473625, "loss": 3.477795124053955, "step": 1075, "token_acc": 0.2554012947507995 }, { "epoch": 0.6309000293169159, "grad_norm": 1.2308935970133303, "learning_rate": 0.0001261430246189918, "loss": 3.4932165145874023, "step": 1076, "token_acc": 0.2534502923976608 }, { "epoch": 0.6314863676341249, "grad_norm": 1.6951857620001005, "learning_rate": 0.00012626025791324735, "loss": 3.4882161617279053, "step": 1077, "token_acc": 0.25234785113039837 }, { "epoch": 0.6320727059513339, "grad_norm": 2.084310509374996, "learning_rate": 0.00012637749120750292, "loss": 3.517474889755249, "step": 1078, "token_acc": 0.250917833673959 }, { "epoch": 0.6326590442685429, "grad_norm": 1.5971346790556673, "learning_rate": 0.0001264947245017585, "loss": 3.5417568683624268, "step": 1079, "token_acc": 0.244869416447781 }, { "epoch": 0.633245382585752, "grad_norm": 1.7307696948581068, "learning_rate": 0.00012661195779601407, "loss": 3.5061228275299072, "step": 1080, "token_acc": 0.25109964992918415 }, { "epoch": 0.633831720902961, "grad_norm": 1.422347164184185, "learning_rate": 0.00012672919109026965, "loss": 3.4645237922668457, "step": 1081, "token_acc": 0.2591485483660982 }, { "epoch": 0.63441805922017, "grad_norm": 1.5545632056258527, "learning_rate": 0.0001268464243845252, "loss": 3.4975290298461914, "step": 1082, "token_acc": 0.2529178124045218 }, { "epoch": 0.635004397537379, "grad_norm": 1.7581962442441528, "learning_rate": 0.00012696365767878078, "loss": 3.577753782272339, "step": 1083, "token_acc": 0.24156160931829354 }, { "epoch": 0.6355907358545881, "grad_norm": 2.029382316035274, "learning_rate": 0.00012708089097303635, "loss": 3.5328097343444824, "step": 1084, "token_acc": 0.25042286473071024 }, { "epoch": 0.6361770741717971, "grad_norm": 1.744941716582373, "learning_rate": 0.00012719812426729193, "loss": 3.5380783081054688, "step": 1085, "token_acc": 0.24770657102253502 }, { "epoch": 0.6367634124890061, "grad_norm": 2.1457773193567626, "learning_rate": 0.00012731535756154748, "loss": 3.552659511566162, "step": 1086, "token_acc": 0.24597393222785918 }, { "epoch": 0.6373497508062151, "grad_norm": 1.1034536806597226, "learning_rate": 0.00012743259085580305, "loss": 3.5078885555267334, "step": 1087, "token_acc": 0.2502465059597607 }, { "epoch": 0.6379360891234243, "grad_norm": 2.650228498942423, "learning_rate": 0.00012754982415005863, "loss": 3.509427070617676, "step": 1088, "token_acc": 0.2510381370216017 }, { "epoch": 0.6385224274406333, "grad_norm": 1.3299083509225766, "learning_rate": 0.0001276670574443142, "loss": 3.5170092582702637, "step": 1089, "token_acc": 0.25079531505728314 }, { "epoch": 0.6391087657578423, "grad_norm": 2.413161617838325, "learning_rate": 0.00012778429073856976, "loss": 3.515328884124756, "step": 1090, "token_acc": 0.252939483361085 }, { "epoch": 0.6396951040750513, "grad_norm": 1.5195580244089815, "learning_rate": 0.00012790152403282533, "loss": 3.5002281665802, "step": 1091, "token_acc": 0.2523816193161548 }, { "epoch": 0.6402814423922604, "grad_norm": 1.827565115895193, "learning_rate": 0.0001280187573270809, "loss": 3.4980390071868896, "step": 1092, "token_acc": 0.25216120565226546 }, { "epoch": 0.6408677807094694, "grad_norm": 1.3941520607158913, "learning_rate": 0.00012813599062133646, "loss": 3.578462600708008, "step": 1093, "token_acc": 0.24434646673142302 }, { "epoch": 0.6414541190266784, "grad_norm": 1.6278234400731246, "learning_rate": 0.00012825322391559203, "loss": 3.492392063140869, "step": 1094, "token_acc": 0.25403324518525894 }, { "epoch": 0.6420404573438874, "grad_norm": 1.498206000755919, "learning_rate": 0.00012837045720984758, "loss": 3.4665799140930176, "step": 1095, "token_acc": 0.2553039102827884 }, { "epoch": 0.6426267956610965, "grad_norm": 1.8632723546682861, "learning_rate": 0.00012848769050410316, "loss": 3.488762378692627, "step": 1096, "token_acc": 0.25406301404529924 }, { "epoch": 0.6432131339783055, "grad_norm": 1.6216461967877078, "learning_rate": 0.00012860492379835874, "loss": 3.483595371246338, "step": 1097, "token_acc": 0.2537727592364937 }, { "epoch": 0.6437994722955145, "grad_norm": 1.530482642788609, "learning_rate": 0.0001287221570926143, "loss": 3.5086817741394043, "step": 1098, "token_acc": 0.25126196268846124 }, { "epoch": 0.6443858106127235, "grad_norm": 1.5286022307510954, "learning_rate": 0.00012883939038686986, "loss": 3.517098903656006, "step": 1099, "token_acc": 0.25082736680740236 }, { "epoch": 0.6449721489299326, "grad_norm": 1.8723222869852563, "learning_rate": 0.00012895662368112544, "loss": 3.5396828651428223, "step": 1100, "token_acc": 0.24596171980528583 }, { "epoch": 0.6455584872471416, "grad_norm": 1.2057591076547487, "learning_rate": 0.00012907385697538101, "loss": 3.5273144245147705, "step": 1101, "token_acc": 0.2507911666147651 }, { "epoch": 0.6461448255643506, "grad_norm": 1.56219115690715, "learning_rate": 0.0001291910902696366, "loss": 3.467332601547241, "step": 1102, "token_acc": 0.25445789832907806 }, { "epoch": 0.6467311638815597, "grad_norm": 1.8889136923915626, "learning_rate": 0.00012930832356389214, "loss": 3.4939475059509277, "step": 1103, "token_acc": 0.25297546229087486 }, { "epoch": 0.6473175021987687, "grad_norm": 1.369326308728851, "learning_rate": 0.00012942555685814772, "loss": 3.47835111618042, "step": 1104, "token_acc": 0.25634802333960266 }, { "epoch": 0.6479038405159777, "grad_norm": 1.7569054045285029, "learning_rate": 0.0001295427901524033, "loss": 3.535393238067627, "step": 1105, "token_acc": 0.2467586940201812 }, { "epoch": 0.6484901788331867, "grad_norm": 1.4719551234319308, "learning_rate": 0.00012966002344665887, "loss": 3.478846549987793, "step": 1106, "token_acc": 0.25528716923713135 }, { "epoch": 0.6490765171503958, "grad_norm": 2.0550446970114646, "learning_rate": 0.00012977725674091445, "loss": 3.497086524963379, "step": 1107, "token_acc": 0.2520218300360883 }, { "epoch": 0.6496628554676048, "grad_norm": 1.6069513815207634, "learning_rate": 0.00012989449003517, "loss": 3.5329363346099854, "step": 1108, "token_acc": 0.24734512155017582 }, { "epoch": 0.6502491937848138, "grad_norm": 1.2804483281919128, "learning_rate": 0.00013001172332942557, "loss": 3.424595832824707, "step": 1109, "token_acc": 0.2618272569444444 }, { "epoch": 0.6508355321020228, "grad_norm": 2.039101242262156, "learning_rate": 0.00013012895662368115, "loss": 3.5017282962799072, "step": 1110, "token_acc": 0.2523237122569409 }, { "epoch": 0.6514218704192319, "grad_norm": 1.3664521359312445, "learning_rate": 0.0001302461899179367, "loss": 3.445675849914551, "step": 1111, "token_acc": 0.2569971745844011 }, { "epoch": 0.6520082087364409, "grad_norm": 2.1525887606817453, "learning_rate": 0.00013036342321219227, "loss": 3.497997283935547, "step": 1112, "token_acc": 0.2519394990908191 }, { "epoch": 0.6525945470536499, "grad_norm": 1.3205000448848785, "learning_rate": 0.00013048065650644782, "loss": 3.463045835494995, "step": 1113, "token_acc": 0.25626202755753985 }, { "epoch": 0.6531808853708589, "grad_norm": 2.1881990930939876, "learning_rate": 0.0001305978898007034, "loss": 3.532336473464966, "step": 1114, "token_acc": 0.2501864373418055 }, { "epoch": 0.653767223688068, "grad_norm": 1.3557148426305365, "learning_rate": 0.00013071512309495897, "loss": 3.5048410892486572, "step": 1115, "token_acc": 0.2526701980424238 }, { "epoch": 0.6543535620052771, "grad_norm": 2.090818690777382, "learning_rate": 0.00013083235638921455, "loss": 3.5050439834594727, "step": 1116, "token_acc": 0.2500398582588884 }, { "epoch": 0.6549399003224861, "grad_norm": 1.399410097228022, "learning_rate": 0.0001309495896834701, "loss": 3.448899030685425, "step": 1117, "token_acc": 0.25704342492384785 }, { "epoch": 0.6555262386396951, "grad_norm": 1.2068609383343245, "learning_rate": 0.00013106682297772568, "loss": 3.5348777770996094, "step": 1118, "token_acc": 0.24623374349546712 }, { "epoch": 0.6561125769569042, "grad_norm": 1.958200666790498, "learning_rate": 0.00013118405627198125, "loss": 3.5034704208374023, "step": 1119, "token_acc": 0.25266238264113544 }, { "epoch": 0.6566989152741132, "grad_norm": 1.489734388560857, "learning_rate": 0.00013130128956623683, "loss": 3.477001667022705, "step": 1120, "token_acc": 0.25504203392592795 }, { "epoch": 0.6572852535913222, "grad_norm": 1.6449328602949582, "learning_rate": 0.00013141852286049238, "loss": 3.4948043823242188, "step": 1121, "token_acc": 0.2527632082769042 }, { "epoch": 0.6578715919085312, "grad_norm": 1.5537009140122566, "learning_rate": 0.00013153575615474795, "loss": 3.535090208053589, "step": 1122, "token_acc": 0.24608306261345386 }, { "epoch": 0.6584579302257403, "grad_norm": 1.4467939324894372, "learning_rate": 0.00013165298944900353, "loss": 3.507394790649414, "step": 1123, "token_acc": 0.25186225504791665 }, { "epoch": 0.6590442685429493, "grad_norm": 1.848536295175601, "learning_rate": 0.0001317702227432591, "loss": 3.4809556007385254, "step": 1124, "token_acc": 0.25317463170164367 }, { "epoch": 0.6596306068601583, "grad_norm": 1.3540894022617835, "learning_rate": 0.00013188745603751466, "loss": 3.420164108276367, "step": 1125, "token_acc": 0.2625032262142417 }, { "epoch": 0.6602169451773673, "grad_norm": 1.7658244362011437, "learning_rate": 0.00013200468933177023, "loss": 3.476072311401367, "step": 1126, "token_acc": 0.2514708501888419 }, { "epoch": 0.6608032834945764, "grad_norm": 1.2030198146154674, "learning_rate": 0.0001321219226260258, "loss": 3.4701123237609863, "step": 1127, "token_acc": 0.2556297174224939 }, { "epoch": 0.6613896218117854, "grad_norm": 1.9478204145441387, "learning_rate": 0.00013223915592028138, "loss": 3.509843111038208, "step": 1128, "token_acc": 0.2506740699833325 }, { "epoch": 0.6619759601289944, "grad_norm": 1.5709301998951004, "learning_rate": 0.00013235638921453693, "loss": 3.4634764194488525, "step": 1129, "token_acc": 0.2570987277974231 }, { "epoch": 0.6625622984462035, "grad_norm": 1.6644135462592604, "learning_rate": 0.00013247362250879248, "loss": 3.4557394981384277, "step": 1130, "token_acc": 0.25608796032454983 }, { "epoch": 0.6631486367634125, "grad_norm": 1.3209046507378825, "learning_rate": 0.00013259085580304806, "loss": 3.492227792739868, "step": 1131, "token_acc": 0.2507001174390543 }, { "epoch": 0.6637349750806215, "grad_norm": 1.601190670556154, "learning_rate": 0.00013270808909730364, "loss": 3.505174160003662, "step": 1132, "token_acc": 0.25047178863868985 }, { "epoch": 0.6643213133978305, "grad_norm": 1.2937312085375858, "learning_rate": 0.0001328253223915592, "loss": 3.5209531784057617, "step": 1133, "token_acc": 0.25001055319867455 }, { "epoch": 0.6649076517150396, "grad_norm": 1.5136100568934243, "learning_rate": 0.00013294255568581476, "loss": 3.5230441093444824, "step": 1134, "token_acc": 0.2491668412807695 }, { "epoch": 0.6654939900322486, "grad_norm": 1.3476310158466427, "learning_rate": 0.00013305978898007034, "loss": 3.476250410079956, "step": 1135, "token_acc": 0.2552725572454639 }, { "epoch": 0.6660803283494576, "grad_norm": 1.612856545570486, "learning_rate": 0.00013317702227432591, "loss": 3.4437780380249023, "step": 1136, "token_acc": 0.26018507119185746 }, { "epoch": 0.6666666666666666, "grad_norm": 1.6901966602018512, "learning_rate": 0.0001332942555685815, "loss": 3.492274522781372, "step": 1137, "token_acc": 0.2519536002335868 }, { "epoch": 0.6672530049838757, "grad_norm": 1.6451507271722177, "learning_rate": 0.00013341148886283707, "loss": 3.4644644260406494, "step": 1138, "token_acc": 0.2554292386809039 }, { "epoch": 0.6678393433010847, "grad_norm": 1.7317421627476886, "learning_rate": 0.00013352872215709262, "loss": 3.485468626022339, "step": 1139, "token_acc": 0.2518462972399936 }, { "epoch": 0.6684256816182937, "grad_norm": 1.406611673191925, "learning_rate": 0.0001336459554513482, "loss": 3.4783644676208496, "step": 1140, "token_acc": 0.25571126222589574 }, { "epoch": 0.6690120199355027, "grad_norm": 1.4433754487081043, "learning_rate": 0.00013376318874560377, "loss": 3.477328300476074, "step": 1141, "token_acc": 0.2538552512580294 }, { "epoch": 0.6695983582527119, "grad_norm": 1.6052659185720268, "learning_rate": 0.00013388042203985934, "loss": 3.451526641845703, "step": 1142, "token_acc": 0.2585951539327396 }, { "epoch": 0.6701846965699209, "grad_norm": 1.935123880711027, "learning_rate": 0.0001339976553341149, "loss": 3.463585138320923, "step": 1143, "token_acc": 0.2562888924376216 }, { "epoch": 0.6707710348871299, "grad_norm": 1.7345535778589711, "learning_rate": 0.00013411488862837047, "loss": 3.493476390838623, "step": 1144, "token_acc": 0.2517500955086711 }, { "epoch": 0.6713573732043389, "grad_norm": 1.9665978031881401, "learning_rate": 0.00013423212192262605, "loss": 3.4867167472839355, "step": 1145, "token_acc": 0.2516953431275492 }, { "epoch": 0.671943711521548, "grad_norm": 1.2055017270599082, "learning_rate": 0.00013434935521688162, "loss": 3.4312050342559814, "step": 1146, "token_acc": 0.26020613419226907 }, { "epoch": 0.672530049838757, "grad_norm": 1.9362557441138761, "learning_rate": 0.00013446658851113717, "loss": 3.4426937103271484, "step": 1147, "token_acc": 0.25910963571782586 }, { "epoch": 0.673116388155966, "grad_norm": 1.4747065249377906, "learning_rate": 0.00013458382180539272, "loss": 3.4932241439819336, "step": 1148, "token_acc": 0.25150536348048014 }, { "epoch": 0.673702726473175, "grad_norm": 1.7730607946554675, "learning_rate": 0.0001347010550996483, "loss": 3.4713006019592285, "step": 1149, "token_acc": 0.25320306808930026 }, { "epoch": 0.6742890647903841, "grad_norm": 1.4865355728025393, "learning_rate": 0.00013481828839390387, "loss": 3.488659381866455, "step": 1150, "token_acc": 0.25279314240933526 }, { "epoch": 0.6748754031075931, "grad_norm": 1.648445234210883, "learning_rate": 0.00013493552168815945, "loss": 3.4634125232696533, "step": 1151, "token_acc": 0.25615860421577596 }, { "epoch": 0.6754617414248021, "grad_norm": 1.4977150332918276, "learning_rate": 0.000135052754982415, "loss": 3.4460153579711914, "step": 1152, "token_acc": 0.2573434857078827 }, { "epoch": 0.6760480797420111, "grad_norm": 1.6378107882960005, "learning_rate": 0.00013516998827667058, "loss": 3.458651065826416, "step": 1153, "token_acc": 0.25469644059239316 }, { "epoch": 0.6766344180592202, "grad_norm": 1.4152790273997304, "learning_rate": 0.00013528722157092615, "loss": 3.4933724403381348, "step": 1154, "token_acc": 0.25211046502314705 }, { "epoch": 0.6772207563764292, "grad_norm": 2.382720307770914, "learning_rate": 0.00013540445486518173, "loss": 3.4635462760925293, "step": 1155, "token_acc": 0.2553938716700291 }, { "epoch": 0.6778070946936382, "grad_norm": 0.9658115323946195, "learning_rate": 0.00013552168815943728, "loss": 3.487952470779419, "step": 1156, "token_acc": 0.25169223508409233 }, { "epoch": 0.6783934330108473, "grad_norm": 2.370806679735023, "learning_rate": 0.00013563892145369285, "loss": 3.427581310272217, "step": 1157, "token_acc": 0.2600588493489181 }, { "epoch": 0.6789797713280563, "grad_norm": 1.545395581925495, "learning_rate": 0.00013575615474794843, "loss": 3.5274524688720703, "step": 1158, "token_acc": 0.24657122937012402 }, { "epoch": 0.6795661096452653, "grad_norm": 2.5188462999917864, "learning_rate": 0.000135873388042204, "loss": 3.491748809814453, "step": 1159, "token_acc": 0.2526028742489678 }, { "epoch": 0.6801524479624743, "grad_norm": 1.5118078808710111, "learning_rate": 0.00013599062133645955, "loss": 3.411062717437744, "step": 1160, "token_acc": 0.26083639642138845 }, { "epoch": 0.6807387862796834, "grad_norm": 1.715140864966689, "learning_rate": 0.00013610785463071513, "loss": 3.5262625217437744, "step": 1161, "token_acc": 0.24832881662149955 }, { "epoch": 0.6813251245968924, "grad_norm": 1.8745914458701944, "learning_rate": 0.0001362250879249707, "loss": 3.4538118839263916, "step": 1162, "token_acc": 0.2572644479944114 }, { "epoch": 0.6819114629141014, "grad_norm": 1.6597030765621454, "learning_rate": 0.00013634232121922628, "loss": 3.503361701965332, "step": 1163, "token_acc": 0.25056399655830935 }, { "epoch": 0.6824978012313104, "grad_norm": 2.075399261672626, "learning_rate": 0.00013645955451348183, "loss": 3.4680135250091553, "step": 1164, "token_acc": 0.25656342223268597 }, { "epoch": 0.6830841395485195, "grad_norm": 1.3848175296512513, "learning_rate": 0.00013657678780773738, "loss": 3.4982101917266846, "step": 1165, "token_acc": 0.24971137746221148 }, { "epoch": 0.6836704778657285, "grad_norm": 1.8366172051748701, "learning_rate": 0.00013669402110199296, "loss": 3.479870557785034, "step": 1166, "token_acc": 0.25457881128949855 }, { "epoch": 0.6842568161829375, "grad_norm": 1.6676678514974097, "learning_rate": 0.00013681125439624853, "loss": 3.505056858062744, "step": 1167, "token_acc": 0.2515033080573154 }, { "epoch": 0.6848431545001465, "grad_norm": 1.545090889989793, "learning_rate": 0.0001369284876905041, "loss": 3.493459701538086, "step": 1168, "token_acc": 0.25335046719247495 }, { "epoch": 0.6854294928173557, "grad_norm": 1.3231709050723404, "learning_rate": 0.00013704572098475966, "loss": 3.4779584407806396, "step": 1169, "token_acc": 0.25170333343946005 }, { "epoch": 0.6860158311345647, "grad_norm": 1.274085655715862, "learning_rate": 0.00013716295427901524, "loss": 3.4401192665100098, "step": 1170, "token_acc": 0.25725683596271526 }, { "epoch": 0.6866021694517737, "grad_norm": 1.5637680209566254, "learning_rate": 0.0001372801875732708, "loss": 3.4660308361053467, "step": 1171, "token_acc": 0.25411138938210914 }, { "epoch": 0.6871885077689827, "grad_norm": 2.0975606997572345, "learning_rate": 0.0001373974208675264, "loss": 3.4714128971099854, "step": 1172, "token_acc": 0.2526440723348163 }, { "epoch": 0.6877748460861918, "grad_norm": 1.2120288961775754, "learning_rate": 0.00013751465416178197, "loss": 3.46285343170166, "step": 1173, "token_acc": 0.2542201591511936 }, { "epoch": 0.6883611844034008, "grad_norm": 1.9373218951808908, "learning_rate": 0.00013763188745603751, "loss": 3.4868929386138916, "step": 1174, "token_acc": 0.2544079962912207 }, { "epoch": 0.6889475227206098, "grad_norm": 1.2102680020113552, "learning_rate": 0.0001377491207502931, "loss": 3.4891185760498047, "step": 1175, "token_acc": 0.25215280838980186 }, { "epoch": 0.6895338610378188, "grad_norm": 2.120038596659682, "learning_rate": 0.00013786635404454867, "loss": 3.500941038131714, "step": 1176, "token_acc": 0.2507066243395172 }, { "epoch": 0.6901201993550279, "grad_norm": 1.3143829428572822, "learning_rate": 0.00013798358733880424, "loss": 3.467639446258545, "step": 1177, "token_acc": 0.2514419411656458 }, { "epoch": 0.6907065376722369, "grad_norm": 2.216158579059092, "learning_rate": 0.0001381008206330598, "loss": 3.5305237770080566, "step": 1178, "token_acc": 0.24700482402920784 }, { "epoch": 0.6912928759894459, "grad_norm": 1.5039306542106392, "learning_rate": 0.00013821805392731537, "loss": 3.49605131149292, "step": 1179, "token_acc": 0.2527539731284977 }, { "epoch": 0.6918792143066549, "grad_norm": 1.4823825099735322, "learning_rate": 0.00013833528722157095, "loss": 3.4661059379577637, "step": 1180, "token_acc": 0.25359403415729426 }, { "epoch": 0.692465552623864, "grad_norm": 1.2336420752638715, "learning_rate": 0.00013845252051582652, "loss": 3.5125927925109863, "step": 1181, "token_acc": 0.24958010983545514 }, { "epoch": 0.693051890941073, "grad_norm": 1.5499329078256574, "learning_rate": 0.00013856975381008207, "loss": 3.4173507690429688, "step": 1182, "token_acc": 0.2592866948644761 }, { "epoch": 0.693638229258282, "grad_norm": 1.704060419983667, "learning_rate": 0.00013868698710433762, "loss": 3.436004161834717, "step": 1183, "token_acc": 0.2581195476575121 }, { "epoch": 0.6942245675754911, "grad_norm": 1.5624784944045353, "learning_rate": 0.0001388042203985932, "loss": 3.4286084175109863, "step": 1184, "token_acc": 0.2586951738775933 }, { "epoch": 0.6948109058927001, "grad_norm": 1.6598018768124778, "learning_rate": 0.00013892145369284877, "loss": 3.4594101905822754, "step": 1185, "token_acc": 0.2564892108369623 }, { "epoch": 0.6953972442099091, "grad_norm": 1.630253826126385, "learning_rate": 0.00013903868698710435, "loss": 3.4498209953308105, "step": 1186, "token_acc": 0.25624231551434823 }, { "epoch": 0.6959835825271181, "grad_norm": 1.2925787028343558, "learning_rate": 0.0001391559202813599, "loss": 3.47819185256958, "step": 1187, "token_acc": 0.2531844705595316 }, { "epoch": 0.6965699208443272, "grad_norm": 1.7395817545075576, "learning_rate": 0.00013927315357561547, "loss": 3.4408469200134277, "step": 1188, "token_acc": 0.2551191272939393 }, { "epoch": 0.6971562591615362, "grad_norm": 1.4333187255233182, "learning_rate": 0.00013939038686987105, "loss": 3.4492459297180176, "step": 1189, "token_acc": 0.25624443284895826 }, { "epoch": 0.6977425974787452, "grad_norm": 1.8954820162326889, "learning_rate": 0.00013950762016412663, "loss": 3.497751235961914, "step": 1190, "token_acc": 0.2515698723518011 }, { "epoch": 0.6983289357959542, "grad_norm": 1.0768710632639547, "learning_rate": 0.00013962485345838218, "loss": 3.466113567352295, "step": 1191, "token_acc": 0.2554972332765328 }, { "epoch": 0.6989152741131633, "grad_norm": 2.202932416719709, "learning_rate": 0.00013974208675263775, "loss": 3.4374327659606934, "step": 1192, "token_acc": 0.2571900485705379 }, { "epoch": 0.6995016124303723, "grad_norm": 1.2470936737497804, "learning_rate": 0.00013985932004689333, "loss": 3.480226516723633, "step": 1193, "token_acc": 0.2524677906487747 }, { "epoch": 0.7000879507475813, "grad_norm": 1.566575092916346, "learning_rate": 0.0001399765533411489, "loss": 3.4235100746154785, "step": 1194, "token_acc": 0.2607239874492492 }, { "epoch": 0.7006742890647903, "grad_norm": 1.6899843128382854, "learning_rate": 0.00014009378663540445, "loss": 3.486229181289673, "step": 1195, "token_acc": 0.25187803566081685 }, { "epoch": 0.7012606273819995, "grad_norm": 1.4562076013023741, "learning_rate": 0.00014021101992966003, "loss": 3.45029878616333, "step": 1196, "token_acc": 0.2562012248282901 }, { "epoch": 0.7018469656992085, "grad_norm": 1.800918602986552, "learning_rate": 0.0001403282532239156, "loss": 3.4462833404541016, "step": 1197, "token_acc": 0.25553232973492196 }, { "epoch": 0.7024333040164175, "grad_norm": 1.4128034542778685, "learning_rate": 0.00014044548651817118, "loss": 3.5032455921173096, "step": 1198, "token_acc": 0.24997202990068662 }, { "epoch": 0.7030196423336265, "grad_norm": 1.4769940162096624, "learning_rate": 0.00014056271981242676, "loss": 3.485568046569824, "step": 1199, "token_acc": 0.2522193805646135 }, { "epoch": 0.7036059806508356, "grad_norm": 1.3036482528323072, "learning_rate": 0.00014067995310668228, "loss": 3.474188804626465, "step": 1200, "token_acc": 0.2537372817175032 }, { "epoch": 0.7041923189680446, "grad_norm": 1.4723395310551526, "learning_rate": 0.00014079718640093786, "loss": 3.4770054817199707, "step": 1201, "token_acc": 0.25310733851583983 }, { "epoch": 0.7047786572852536, "grad_norm": 1.4346245786714835, "learning_rate": 0.00014091441969519343, "loss": 3.4641005992889404, "step": 1202, "token_acc": 0.2551397503438611 }, { "epoch": 0.7053649956024626, "grad_norm": 1.9910262359209665, "learning_rate": 0.000141031652989449, "loss": 3.499342679977417, "step": 1203, "token_acc": 0.2502527659057731 }, { "epoch": 0.7059513339196717, "grad_norm": 1.1008432105637287, "learning_rate": 0.0001411488862837046, "loss": 3.413872480392456, "step": 1204, "token_acc": 0.2591675108691064 }, { "epoch": 0.7065376722368807, "grad_norm": 1.9399993744358432, "learning_rate": 0.00014126611957796014, "loss": 3.451542615890503, "step": 1205, "token_acc": 0.2568799910138893 }, { "epoch": 0.7071240105540897, "grad_norm": 1.3098845892311908, "learning_rate": 0.0001413833528722157, "loss": 3.46754789352417, "step": 1206, "token_acc": 0.2540631990827233 }, { "epoch": 0.7077103488712987, "grad_norm": 1.9204628656422873, "learning_rate": 0.0001415005861664713, "loss": 3.489941120147705, "step": 1207, "token_acc": 0.25179534387438607 }, { "epoch": 0.7082966871885078, "grad_norm": 1.1355505445567018, "learning_rate": 0.00014161781946072686, "loss": 3.461859703063965, "step": 1208, "token_acc": 0.2556939287453718 }, { "epoch": 0.7088830255057168, "grad_norm": 1.7566338045040304, "learning_rate": 0.00014173505275498241, "loss": 3.433861255645752, "step": 1209, "token_acc": 0.25782065972398266 }, { "epoch": 0.7094693638229258, "grad_norm": 1.588510194498836, "learning_rate": 0.000141852286049238, "loss": 3.4759602546691895, "step": 1210, "token_acc": 0.25250954370965856 }, { "epoch": 0.7100557021401348, "grad_norm": 1.4002796477850517, "learning_rate": 0.00014196951934349357, "loss": 3.443500518798828, "step": 1211, "token_acc": 0.25674212854540723 }, { "epoch": 0.7106420404573439, "grad_norm": 1.4191876157312409, "learning_rate": 0.00014208675263774914, "loss": 3.4555506706237793, "step": 1212, "token_acc": 0.2563782834829119 }, { "epoch": 0.7112283787745529, "grad_norm": 1.5005901937434414, "learning_rate": 0.0001422039859320047, "loss": 3.430723190307617, "step": 1213, "token_acc": 0.25753400348646643 }, { "epoch": 0.7118147170917619, "grad_norm": 1.6088040769827598, "learning_rate": 0.00014232121922626027, "loss": 3.4346261024475098, "step": 1214, "token_acc": 0.2557810155872556 }, { "epoch": 0.712401055408971, "grad_norm": 1.3423283013158216, "learning_rate": 0.00014243845252051584, "loss": 3.4267687797546387, "step": 1215, "token_acc": 0.25865128660159714 }, { "epoch": 0.71298739372618, "grad_norm": 1.6098935344855423, "learning_rate": 0.00014255568581477142, "loss": 3.4264941215515137, "step": 1216, "token_acc": 0.25920328329785747 }, { "epoch": 0.713573732043389, "grad_norm": 2.020396576089131, "learning_rate": 0.00014267291910902697, "loss": 3.4637906551361084, "step": 1217, "token_acc": 0.25425219345065625 }, { "epoch": 0.714160070360598, "grad_norm": 1.6529506103097913, "learning_rate": 0.00014279015240328252, "loss": 3.469149112701416, "step": 1218, "token_acc": 0.2524846926964005 }, { "epoch": 0.7147464086778071, "grad_norm": 1.4243087086883681, "learning_rate": 0.0001429073856975381, "loss": 3.425438165664673, "step": 1219, "token_acc": 0.2580548409199622 }, { "epoch": 0.7153327469950161, "grad_norm": 1.747125321342865, "learning_rate": 0.00014302461899179367, "loss": 3.462268590927124, "step": 1220, "token_acc": 0.25417433336061224 }, { "epoch": 0.7159190853122251, "grad_norm": 1.1244870775445328, "learning_rate": 0.00014314185228604925, "loss": 3.441065549850464, "step": 1221, "token_acc": 0.2571446563058865 }, { "epoch": 0.7165054236294341, "grad_norm": 1.7202691526506555, "learning_rate": 0.0001432590855803048, "loss": 3.4279541969299316, "step": 1222, "token_acc": 0.25888663563480474 }, { "epoch": 0.7170917619466433, "grad_norm": 1.2019259449746946, "learning_rate": 0.00014337631887456037, "loss": 3.457341432571411, "step": 1223, "token_acc": 0.25481550847347406 }, { "epoch": 0.7176781002638523, "grad_norm": 1.4470131795694363, "learning_rate": 0.00014349355216881595, "loss": 3.3964061737060547, "step": 1224, "token_acc": 0.2638310887113639 }, { "epoch": 0.7182644385810613, "grad_norm": 1.3847022714095332, "learning_rate": 0.00014361078546307153, "loss": 3.4749650955200195, "step": 1225, "token_acc": 0.25443063008424854 }, { "epoch": 0.7188507768982703, "grad_norm": 2.4091107675152097, "learning_rate": 0.00014372801875732708, "loss": 3.4837698936462402, "step": 1226, "token_acc": 0.25130256462188916 }, { "epoch": 0.7194371152154794, "grad_norm": 1.070933155686379, "learning_rate": 0.00014384525205158265, "loss": 3.4488720893859863, "step": 1227, "token_acc": 0.2558907879417146 }, { "epoch": 0.7200234535326884, "grad_norm": 1.90006729372095, "learning_rate": 0.00014396248534583823, "loss": 3.4941225051879883, "step": 1228, "token_acc": 0.24938360225960488 }, { "epoch": 0.7206097918498974, "grad_norm": 1.4756717727350943, "learning_rate": 0.0001440797186400938, "loss": 3.41542911529541, "step": 1229, "token_acc": 0.2618272421771129 }, { "epoch": 0.7211961301671064, "grad_norm": 1.717199483069328, "learning_rate": 0.00014419695193434935, "loss": 3.458010673522949, "step": 1230, "token_acc": 0.2551103571437696 }, { "epoch": 0.7217824684843155, "grad_norm": 1.5982566554184876, "learning_rate": 0.00014431418522860493, "loss": 3.472919464111328, "step": 1231, "token_acc": 0.2528250758134579 }, { "epoch": 0.7223688068015245, "grad_norm": 1.5848855529324406, "learning_rate": 0.0001444314185228605, "loss": 3.4550588130950928, "step": 1232, "token_acc": 0.25651067677394257 }, { "epoch": 0.7229551451187335, "grad_norm": 1.1142469544379312, "learning_rate": 0.00014454865181711608, "loss": 3.4776453971862793, "step": 1233, "token_acc": 0.25244300518134716 }, { "epoch": 0.7235414834359425, "grad_norm": 1.4252964258088752, "learning_rate": 0.00014466588511137166, "loss": 3.4053268432617188, "step": 1234, "token_acc": 0.26168830280563043 }, { "epoch": 0.7241278217531516, "grad_norm": 1.5215669994356336, "learning_rate": 0.0001447831184056272, "loss": 3.419320583343506, "step": 1235, "token_acc": 0.2621442456807112 }, { "epoch": 0.7247141600703606, "grad_norm": 1.7620545535774552, "learning_rate": 0.00014490035169988276, "loss": 3.4716854095458984, "step": 1236, "token_acc": 0.2529282595010434 }, { "epoch": 0.7253004983875696, "grad_norm": 1.125677312131224, "learning_rate": 0.00014501758499413833, "loss": 3.4332993030548096, "step": 1237, "token_acc": 0.2582909167235137 }, { "epoch": 0.7258868367047786, "grad_norm": 1.6530519878535423, "learning_rate": 0.0001451348182883939, "loss": 3.438295841217041, "step": 1238, "token_acc": 0.25633007656415147 }, { "epoch": 0.7264731750219877, "grad_norm": 1.3717098277020645, "learning_rate": 0.00014525205158264949, "loss": 3.470151424407959, "step": 1239, "token_acc": 0.2530085862583296 }, { "epoch": 0.7270595133391967, "grad_norm": 1.3297734199312607, "learning_rate": 0.00014536928487690504, "loss": 3.403834819793701, "step": 1240, "token_acc": 0.2608286551005952 }, { "epoch": 0.7276458516564057, "grad_norm": 1.0204923456862767, "learning_rate": 0.0001454865181711606, "loss": 3.4251315593719482, "step": 1241, "token_acc": 0.2607074554425213 }, { "epoch": 0.7282321899736148, "grad_norm": 1.8614463934254661, "learning_rate": 0.0001456037514654162, "loss": 3.4099807739257812, "step": 1242, "token_acc": 0.2611788128381741 }, { "epoch": 0.7288185282908238, "grad_norm": 1.3041772868140586, "learning_rate": 0.00014572098475967176, "loss": 3.4974818229675293, "step": 1243, "token_acc": 0.25138140499575157 }, { "epoch": 0.7294048666080328, "grad_norm": 1.0564648533267627, "learning_rate": 0.0001458382180539273, "loss": 3.4022369384765625, "step": 1244, "token_acc": 0.26258460024501434 }, { "epoch": 0.7299912049252418, "grad_norm": 1.5660257008685003, "learning_rate": 0.0001459554513481829, "loss": 3.5017075538635254, "step": 1245, "token_acc": 0.2478392421708435 }, { "epoch": 0.7305775432424509, "grad_norm": 1.2967295612996657, "learning_rate": 0.00014607268464243847, "loss": 3.4555978775024414, "step": 1246, "token_acc": 0.25633834953517004 }, { "epoch": 0.73116388155966, "grad_norm": 1.6036217524756642, "learning_rate": 0.00014618991793669404, "loss": 3.4531102180480957, "step": 1247, "token_acc": 0.25551073004975366 }, { "epoch": 0.731750219876869, "grad_norm": 1.8233658190421305, "learning_rate": 0.0001463071512309496, "loss": 3.4363183975219727, "step": 1248, "token_acc": 0.25739716565920356 }, { "epoch": 0.732336558194078, "grad_norm": 1.4089515172355789, "learning_rate": 0.00014642438452520517, "loss": 3.4581356048583984, "step": 1249, "token_acc": 0.2550260601121613 }, { "epoch": 0.7329228965112871, "grad_norm": 1.607016573916611, "learning_rate": 0.00014654161781946074, "loss": 3.4439473152160645, "step": 1250, "token_acc": 0.25563950842057354 }, { "epoch": 0.7335092348284961, "grad_norm": 1.1982802132043384, "learning_rate": 0.00014665885111371632, "loss": 3.4321069717407227, "step": 1251, "token_acc": 0.2599147181673893 }, { "epoch": 0.7340955731457051, "grad_norm": 1.7916351410224547, "learning_rate": 0.00014677608440797187, "loss": 3.421072006225586, "step": 1252, "token_acc": 0.25949011141664224 }, { "epoch": 0.7346819114629141, "grad_norm": 1.0673689664702601, "learning_rate": 0.00014689331770222742, "loss": 3.436981439590454, "step": 1253, "token_acc": 0.2575099137726195 }, { "epoch": 0.7352682497801232, "grad_norm": 2.200834619563645, "learning_rate": 0.000147010550996483, "loss": 3.5111794471740723, "step": 1254, "token_acc": 0.24833392065005597 }, { "epoch": 0.7358545880973322, "grad_norm": 1.2114155715238122, "learning_rate": 0.00014712778429073857, "loss": 3.465855598449707, "step": 1255, "token_acc": 0.25337390507231616 }, { "epoch": 0.7364409264145412, "grad_norm": 1.8737492870695667, "learning_rate": 0.00014724501758499415, "loss": 3.4212727546691895, "step": 1256, "token_acc": 0.2583062956261309 }, { "epoch": 0.7370272647317502, "grad_norm": 1.2925024235484885, "learning_rate": 0.0001473622508792497, "loss": 3.516406536102295, "step": 1257, "token_acc": 0.24767110093880912 }, { "epoch": 0.7376136030489593, "grad_norm": 1.3301291188019166, "learning_rate": 0.00014747948417350527, "loss": 3.4706199169158936, "step": 1258, "token_acc": 0.2524147376225389 }, { "epoch": 0.7381999413661683, "grad_norm": 1.462833984668704, "learning_rate": 0.00014759671746776085, "loss": 3.42460298538208, "step": 1259, "token_acc": 0.2579313166414959 }, { "epoch": 0.7387862796833773, "grad_norm": 1.463724768341424, "learning_rate": 0.00014771395076201643, "loss": 3.4307332038879395, "step": 1260, "token_acc": 0.25681902280526564 }, { "epoch": 0.7393726180005863, "grad_norm": 1.9347344545582497, "learning_rate": 0.00014783118405627197, "loss": 3.4678549766540527, "step": 1261, "token_acc": 0.25190761193722877 }, { "epoch": 0.7399589563177954, "grad_norm": 1.071088548058867, "learning_rate": 0.00014794841735052755, "loss": 3.409794569015503, "step": 1262, "token_acc": 0.2600979207995055 }, { "epoch": 0.7405452946350044, "grad_norm": 1.3377183751567074, "learning_rate": 0.00014806565064478313, "loss": 3.426347255706787, "step": 1263, "token_acc": 0.2566860011934008 }, { "epoch": 0.7411316329522134, "grad_norm": 1.2992008834059154, "learning_rate": 0.0001481828839390387, "loss": 3.3940846920013428, "step": 1264, "token_acc": 0.2626419599212203 }, { "epoch": 0.7417179712694224, "grad_norm": 1.4215781057479917, "learning_rate": 0.00014830011723329428, "loss": 3.489041805267334, "step": 1265, "token_acc": 0.2505786647472776 }, { "epoch": 0.7423043095866315, "grad_norm": 1.6380426361759095, "learning_rate": 0.00014841735052754983, "loss": 3.400296688079834, "step": 1266, "token_acc": 0.26068273928385105 }, { "epoch": 0.7428906479038405, "grad_norm": 1.4624959953044716, "learning_rate": 0.0001485345838218054, "loss": 3.386765480041504, "step": 1267, "token_acc": 0.2634867093277663 }, { "epoch": 0.7434769862210495, "grad_norm": 1.1925947158925931, "learning_rate": 0.00014865181711606098, "loss": 3.405383348464966, "step": 1268, "token_acc": 0.26165629461706974 }, { "epoch": 0.7440633245382586, "grad_norm": 1.5621420987415842, "learning_rate": 0.00014876905041031656, "loss": 3.455237865447998, "step": 1269, "token_acc": 0.25608269723867205 }, { "epoch": 0.7446496628554676, "grad_norm": 1.2501024366577473, "learning_rate": 0.0001488862837045721, "loss": 3.451732635498047, "step": 1270, "token_acc": 0.25428297483582685 }, { "epoch": 0.7452360011726766, "grad_norm": 1.5867269645544448, "learning_rate": 0.00014900351699882766, "loss": 3.4027304649353027, "step": 1271, "token_acc": 0.26125793800224917 }, { "epoch": 0.7458223394898856, "grad_norm": 1.4362521735255311, "learning_rate": 0.00014912075029308323, "loss": 3.4019625186920166, "step": 1272, "token_acc": 0.2621157117107969 }, { "epoch": 0.7464086778070947, "grad_norm": 1.4031492824481668, "learning_rate": 0.0001492379835873388, "loss": 3.4212403297424316, "step": 1273, "token_acc": 0.25754086028153916 }, { "epoch": 0.7469950161243037, "grad_norm": 1.1736845539272112, "learning_rate": 0.00014935521688159439, "loss": 3.372968912124634, "step": 1274, "token_acc": 0.2637764211331364 }, { "epoch": 0.7475813544415127, "grad_norm": 2.1327757325866665, "learning_rate": 0.00014947245017584993, "loss": 3.4224917888641357, "step": 1275, "token_acc": 0.2572750514305629 }, { "epoch": 0.7481676927587217, "grad_norm": 0.9704083716251074, "learning_rate": 0.0001495896834701055, "loss": 3.469961643218994, "step": 1276, "token_acc": 0.2511131296796077 }, { "epoch": 0.7487540310759309, "grad_norm": 1.684596950507075, "learning_rate": 0.0001497069167643611, "loss": 3.4014203548431396, "step": 1277, "token_acc": 0.2611449381377652 }, { "epoch": 0.7493403693931399, "grad_norm": 1.495602383274155, "learning_rate": 0.00014982415005861666, "loss": 3.457026720046997, "step": 1278, "token_acc": 0.25410032171363417 }, { "epoch": 0.7499267077103489, "grad_norm": 1.1631171518798684, "learning_rate": 0.0001499413833528722, "loss": 3.440493106842041, "step": 1279, "token_acc": 0.2563788021259717 }, { "epoch": 0.7505130460275579, "grad_norm": 1.4239185209019283, "learning_rate": 0.0001500586166471278, "loss": 3.4252877235412598, "step": 1280, "token_acc": 0.2577691483379487 }, { "epoch": 0.751099384344767, "grad_norm": 1.503648105025796, "learning_rate": 0.00015017584994138336, "loss": 3.4105794429779053, "step": 1281, "token_acc": 0.25994719461785376 }, { "epoch": 0.751685722661976, "grad_norm": 1.4989788864720033, "learning_rate": 0.00015029308323563894, "loss": 3.3793835639953613, "step": 1282, "token_acc": 0.2641823459503802 }, { "epoch": 0.752272060979185, "grad_norm": 1.1259577353541548, "learning_rate": 0.0001504103165298945, "loss": 3.4287967681884766, "step": 1283, "token_acc": 0.258247277481624 }, { "epoch": 0.752858399296394, "grad_norm": 1.365508010565486, "learning_rate": 0.00015052754982415007, "loss": 3.4712073802948, "step": 1284, "token_acc": 0.2511922578844111 }, { "epoch": 0.7534447376136031, "grad_norm": 1.3879643463695752, "learning_rate": 0.00015064478311840564, "loss": 3.4485044479370117, "step": 1285, "token_acc": 0.2555265362183873 }, { "epoch": 0.7540310759308121, "grad_norm": 1.083976351073512, "learning_rate": 0.00015076201641266122, "loss": 3.3934457302093506, "step": 1286, "token_acc": 0.26216669351915256 }, { "epoch": 0.7546174142480211, "grad_norm": 1.9741451219382833, "learning_rate": 0.00015087924970691677, "loss": 3.487504005432129, "step": 1287, "token_acc": 0.2494730862545856 }, { "epoch": 0.7552037525652301, "grad_norm": 1.1428712878378593, "learning_rate": 0.00015099648300117234, "loss": 3.423361301422119, "step": 1288, "token_acc": 0.2589780945256001 }, { "epoch": 0.7557900908824392, "grad_norm": 2.101406189706504, "learning_rate": 0.0001511137162954279, "loss": 3.5219991207122803, "step": 1289, "token_acc": 0.24635049896560543 }, { "epoch": 0.7563764291996482, "grad_norm": 1.1749592828379896, "learning_rate": 0.00015123094958968347, "loss": 3.4067013263702393, "step": 1290, "token_acc": 0.2583606817438206 }, { "epoch": 0.7569627675168572, "grad_norm": 1.7453724325699769, "learning_rate": 0.00015134818288393905, "loss": 3.4472789764404297, "step": 1291, "token_acc": 0.25690357139000186 }, { "epoch": 0.7575491058340662, "grad_norm": 1.4255935976090912, "learning_rate": 0.0001514654161781946, "loss": 3.451256513595581, "step": 1292, "token_acc": 0.2533840859858418 }, { "epoch": 0.7581354441512753, "grad_norm": 1.3609357668653277, "learning_rate": 0.00015158264947245017, "loss": 3.456042528152466, "step": 1293, "token_acc": 0.2553718680096888 }, { "epoch": 0.7587217824684843, "grad_norm": 1.1425740824308384, "learning_rate": 0.00015169988276670575, "loss": 3.444441318511963, "step": 1294, "token_acc": 0.25538292193245155 }, { "epoch": 0.7593081207856933, "grad_norm": 1.349462185130714, "learning_rate": 0.00015181711606096132, "loss": 3.437311887741089, "step": 1295, "token_acc": 0.25674294578147616 }, { "epoch": 0.7598944591029023, "grad_norm": 1.1815147139675841, "learning_rate": 0.0001519343493552169, "loss": 3.3935465812683105, "step": 1296, "token_acc": 0.2591039459381589 }, { "epoch": 0.7604807974201114, "grad_norm": 1.3949443956168464, "learning_rate": 0.00015205158264947245, "loss": 3.437002182006836, "step": 1297, "token_acc": 0.2576903945336021 }, { "epoch": 0.7610671357373204, "grad_norm": 1.2568676986391838, "learning_rate": 0.00015216881594372803, "loss": 3.4558348655700684, "step": 1298, "token_acc": 0.2528200862920414 }, { "epoch": 0.7616534740545294, "grad_norm": 1.723296330765033, "learning_rate": 0.0001522860492379836, "loss": 3.407172203063965, "step": 1299, "token_acc": 0.2594460144525037 }, { "epoch": 0.7622398123717385, "grad_norm": 1.184642490472284, "learning_rate": 0.00015240328253223918, "loss": 3.3951706886291504, "step": 1300, "token_acc": 0.2610623112112838 }, { "epoch": 0.7628261506889475, "grad_norm": 1.391509690869005, "learning_rate": 0.00015252051582649473, "loss": 3.4404056072235107, "step": 1301, "token_acc": 0.25584921351277345 }, { "epoch": 0.7634124890061565, "grad_norm": 1.808794960352588, "learning_rate": 0.0001526377491207503, "loss": 3.410461902618408, "step": 1302, "token_acc": 0.2592892654233827 }, { "epoch": 0.7639988273233655, "grad_norm": 1.4129400271120751, "learning_rate": 0.00015275498241500588, "loss": 3.376692295074463, "step": 1303, "token_acc": 0.26259100967086174 }, { "epoch": 0.7645851656405747, "grad_norm": 1.3652975227714779, "learning_rate": 0.00015287221570926146, "loss": 3.443056583404541, "step": 1304, "token_acc": 0.25679442869800867 }, { "epoch": 0.7651715039577837, "grad_norm": 1.6500751105813, "learning_rate": 0.000152989449003517, "loss": 3.4749786853790283, "step": 1305, "token_acc": 0.25160368078042383 }, { "epoch": 0.7657578422749927, "grad_norm": 1.3643491048592706, "learning_rate": 0.00015310668229777258, "loss": 3.4558467864990234, "step": 1306, "token_acc": 0.25366106791004345 }, { "epoch": 0.7663441805922017, "grad_norm": 1.1966787537750312, "learning_rate": 0.00015322391559202813, "loss": 3.441133499145508, "step": 1307, "token_acc": 0.25537933176417443 }, { "epoch": 0.7669305189094108, "grad_norm": 1.5129240749984183, "learning_rate": 0.0001533411488862837, "loss": 3.3756179809570312, "step": 1308, "token_acc": 0.26349618123386875 }, { "epoch": 0.7675168572266198, "grad_norm": 1.1951269731142888, "learning_rate": 0.00015345838218053928, "loss": 3.39105224609375, "step": 1309, "token_acc": 0.25945828131054377 }, { "epoch": 0.7681031955438288, "grad_norm": 1.894693856226019, "learning_rate": 0.00015357561547479483, "loss": 3.372954845428467, "step": 1310, "token_acc": 0.2660941659615742 }, { "epoch": 0.7686895338610378, "grad_norm": 1.2533924620598944, "learning_rate": 0.0001536928487690504, "loss": 3.451615571975708, "step": 1311, "token_acc": 0.25490759007235286 }, { "epoch": 0.7692758721782469, "grad_norm": 1.6002890668463903, "learning_rate": 0.00015381008206330599, "loss": 3.440751075744629, "step": 1312, "token_acc": 0.2539912917271408 }, { "epoch": 0.7698622104954559, "grad_norm": 1.6960338715166758, "learning_rate": 0.00015392731535756156, "loss": 3.4475626945495605, "step": 1313, "token_acc": 0.2562255252516928 }, { "epoch": 0.7704485488126649, "grad_norm": 1.6382056215658634, "learning_rate": 0.0001540445486518171, "loss": 3.4409759044647217, "step": 1314, "token_acc": 0.2567159275735413 }, { "epoch": 0.7710348871298739, "grad_norm": 1.112109441841703, "learning_rate": 0.0001541617819460727, "loss": 3.400407314300537, "step": 1315, "token_acc": 0.2598234620379898 }, { "epoch": 0.771621225447083, "grad_norm": 1.7300435782980532, "learning_rate": 0.00015427901524032826, "loss": 3.423166036605835, "step": 1316, "token_acc": 0.25859233614990024 }, { "epoch": 0.772207563764292, "grad_norm": 1.0481917826667875, "learning_rate": 0.00015439624853458384, "loss": 3.36118221282959, "step": 1317, "token_acc": 0.2661040212904172 }, { "epoch": 0.772793902081501, "grad_norm": 1.5133302293363555, "learning_rate": 0.0001545134818288394, "loss": 3.4344353675842285, "step": 1318, "token_acc": 0.2549398361836664 }, { "epoch": 0.77338024039871, "grad_norm": 1.3923986861306115, "learning_rate": 0.00015463071512309497, "loss": 3.4415740966796875, "step": 1319, "token_acc": 0.2538437828151693 }, { "epoch": 0.7739665787159191, "grad_norm": 1.26715567321541, "learning_rate": 0.00015474794841735054, "loss": 3.4111063480377197, "step": 1320, "token_acc": 0.26099016709775424 }, { "epoch": 0.7745529170331281, "grad_norm": 1.6039435091472998, "learning_rate": 0.00015486518171160612, "loss": 3.3888659477233887, "step": 1321, "token_acc": 0.26226347675832495 }, { "epoch": 0.7751392553503371, "grad_norm": 1.299338894205598, "learning_rate": 0.00015498241500586167, "loss": 3.432020425796509, "step": 1322, "token_acc": 0.2575829078896564 }, { "epoch": 0.7757255936675461, "grad_norm": 2.1581629296393174, "learning_rate": 0.00015509964830011724, "loss": 3.439408302307129, "step": 1323, "token_acc": 0.2557061556649163 }, { "epoch": 0.7763119319847552, "grad_norm": 1.0742571991536303, "learning_rate": 0.0001552168815943728, "loss": 3.416118860244751, "step": 1324, "token_acc": 0.25807778076515525 }, { "epoch": 0.7768982703019642, "grad_norm": 2.070348002401218, "learning_rate": 0.00015533411488862837, "loss": 3.4130992889404297, "step": 1325, "token_acc": 0.25778490509608987 }, { "epoch": 0.7774846086191732, "grad_norm": 1.3415453751304076, "learning_rate": 0.00015545134818288395, "loss": 3.4504311084747314, "step": 1326, "token_acc": 0.25675405082858765 }, { "epoch": 0.7780709469363823, "grad_norm": 1.585903227171164, "learning_rate": 0.0001555685814771395, "loss": 3.4499378204345703, "step": 1327, "token_acc": 0.25374800427513955 }, { "epoch": 0.7786572852535913, "grad_norm": 1.2295335910085605, "learning_rate": 0.00015568581477139507, "loss": 3.4052810668945312, "step": 1328, "token_acc": 0.26061243094850095 }, { "epoch": 0.7792436235708003, "grad_norm": 1.5659238291239055, "learning_rate": 0.00015580304806565065, "loss": 3.402463912963867, "step": 1329, "token_acc": 0.2592266290350665 }, { "epoch": 0.7798299618880093, "grad_norm": 1.4527356133551028, "learning_rate": 0.00015592028135990622, "loss": 3.4638214111328125, "step": 1330, "token_acc": 0.2531017698294788 }, { "epoch": 0.7804163002052185, "grad_norm": 0.9328913251748306, "learning_rate": 0.0001560375146541618, "loss": 3.3938584327697754, "step": 1331, "token_acc": 0.2614598071754304 }, { "epoch": 0.7810026385224275, "grad_norm": 1.278046136398353, "learning_rate": 0.00015615474794841735, "loss": 3.432962417602539, "step": 1332, "token_acc": 0.25674984414214236 }, { "epoch": 0.7815889768396365, "grad_norm": 1.6059735919672473, "learning_rate": 0.00015627198124267293, "loss": 3.447343587875366, "step": 1333, "token_acc": 0.2535945372241119 }, { "epoch": 0.7821753151568455, "grad_norm": 1.4555663814285882, "learning_rate": 0.0001563892145369285, "loss": 3.4815449714660645, "step": 1334, "token_acc": 0.25159082057194737 }, { "epoch": 0.7827616534740546, "grad_norm": 1.3086545765548627, "learning_rate": 0.00015650644783118408, "loss": 3.4199166297912598, "step": 1335, "token_acc": 0.2569769906803189 }, { "epoch": 0.7833479917912636, "grad_norm": 1.4963351416344797, "learning_rate": 0.00015662368112543963, "loss": 3.386457920074463, "step": 1336, "token_acc": 0.26096128968056526 }, { "epoch": 0.7839343301084726, "grad_norm": 1.5883719751491485, "learning_rate": 0.0001567409144196952, "loss": 3.3737833499908447, "step": 1337, "token_acc": 0.2638557454155593 }, { "epoch": 0.7845206684256816, "grad_norm": 1.2353488782439634, "learning_rate": 0.00015685814771395078, "loss": 3.436501979827881, "step": 1338, "token_acc": 0.2563241222176783 }, { "epoch": 0.7851070067428907, "grad_norm": 1.7294240285955818, "learning_rate": 0.00015697538100820636, "loss": 3.4303267002105713, "step": 1339, "token_acc": 0.2563434290566703 }, { "epoch": 0.7856933450600997, "grad_norm": 1.246095593696147, "learning_rate": 0.0001570926143024619, "loss": 3.4334189891815186, "step": 1340, "token_acc": 0.2560173915307411 }, { "epoch": 0.7862796833773087, "grad_norm": 1.2776804162160535, "learning_rate": 0.00015720984759671748, "loss": 3.436596393585205, "step": 1341, "token_acc": 0.255089413858814 }, { "epoch": 0.7868660216945177, "grad_norm": 1.514762385377181, "learning_rate": 0.00015732708089097303, "loss": 3.430767059326172, "step": 1342, "token_acc": 0.2558594645833015 }, { "epoch": 0.7874523600117268, "grad_norm": 1.4027348139873257, "learning_rate": 0.0001574443141852286, "loss": 3.3967700004577637, "step": 1343, "token_acc": 0.26001397638893686 }, { "epoch": 0.7880386983289358, "grad_norm": 0.849016016734809, "learning_rate": 0.00015756154747948418, "loss": 3.4489917755126953, "step": 1344, "token_acc": 0.2538228204150157 }, { "epoch": 0.7886250366461448, "grad_norm": 1.2740348555365664, "learning_rate": 0.00015767878077373973, "loss": 3.395728826522827, "step": 1345, "token_acc": 0.2604047013591269 }, { "epoch": 0.7892113749633538, "grad_norm": 1.7118483109604976, "learning_rate": 0.0001577960140679953, "loss": 3.40120792388916, "step": 1346, "token_acc": 0.25980947824911343 }, { "epoch": 0.7897977132805629, "grad_norm": 1.0549361486081228, "learning_rate": 0.00015791324736225089, "loss": 3.4053001403808594, "step": 1347, "token_acc": 0.2620256645529124 }, { "epoch": 0.7903840515977719, "grad_norm": 1.950275266239379, "learning_rate": 0.00015803048065650646, "loss": 3.4412758350372314, "step": 1348, "token_acc": 0.254904207412934 }, { "epoch": 0.7909703899149809, "grad_norm": 1.1228903245861406, "learning_rate": 0.000158147713950762, "loss": 3.432236433029175, "step": 1349, "token_acc": 0.2571157247051683 }, { "epoch": 0.7915567282321899, "grad_norm": 1.3719579410268588, "learning_rate": 0.0001582649472450176, "loss": 3.3730268478393555, "step": 1350, "token_acc": 0.26413195183073307 }, { "epoch": 0.792143066549399, "grad_norm": 1.3933650515460838, "learning_rate": 0.00015838218053927316, "loss": 3.457695245742798, "step": 1351, "token_acc": 0.25246354793104175 }, { "epoch": 0.792729404866608, "grad_norm": 1.2398987593095994, "learning_rate": 0.00015849941383352874, "loss": 3.423532009124756, "step": 1352, "token_acc": 0.2594626607177961 }, { "epoch": 0.793315743183817, "grad_norm": 1.2534835683702565, "learning_rate": 0.0001586166471277843, "loss": 3.4474854469299316, "step": 1353, "token_acc": 0.25201994779531195 }, { "epoch": 0.7939020815010261, "grad_norm": 1.285133784299824, "learning_rate": 0.00015873388042203987, "loss": 3.47641658782959, "step": 1354, "token_acc": 0.2512447501694354 }, { "epoch": 0.7944884198182351, "grad_norm": 1.109232567566872, "learning_rate": 0.00015885111371629544, "loss": 3.397341012954712, "step": 1355, "token_acc": 0.26022165954090737 }, { "epoch": 0.7950747581354441, "grad_norm": 1.762289433959861, "learning_rate": 0.00015896834701055102, "loss": 3.3867056369781494, "step": 1356, "token_acc": 0.2630039572503675 }, { "epoch": 0.7956610964526531, "grad_norm": 1.120508650628853, "learning_rate": 0.0001590855803048066, "loss": 3.3182373046875, "step": 1357, "token_acc": 0.2717155273177257 }, { "epoch": 0.7962474347698623, "grad_norm": 1.5455193392268172, "learning_rate": 0.00015920281359906214, "loss": 3.47340989112854, "step": 1358, "token_acc": 0.24862044616470957 }, { "epoch": 0.7968337730870713, "grad_norm": 1.257763918145666, "learning_rate": 0.00015932004689331772, "loss": 3.4118173122406006, "step": 1359, "token_acc": 0.2599451845241889 }, { "epoch": 0.7974201114042803, "grad_norm": 1.5886698994149002, "learning_rate": 0.00015943728018757327, "loss": 3.4121928215026855, "step": 1360, "token_acc": 0.25824333705439445 }, { "epoch": 0.7980064497214893, "grad_norm": 1.1914023578162718, "learning_rate": 0.00015955451348182884, "loss": 3.391681432723999, "step": 1361, "token_acc": 0.2615334035739896 }, { "epoch": 0.7985927880386984, "grad_norm": 1.4961224966281925, "learning_rate": 0.00015967174677608442, "loss": 3.343844413757324, "step": 1362, "token_acc": 0.26985119112605627 }, { "epoch": 0.7991791263559074, "grad_norm": 1.0959747188625273, "learning_rate": 0.00015978898007033997, "loss": 3.4245657920837402, "step": 1363, "token_acc": 0.25810379864372207 }, { "epoch": 0.7997654646731164, "grad_norm": 1.7365757187062323, "learning_rate": 0.00015990621336459555, "loss": 3.386023998260498, "step": 1364, "token_acc": 0.2617228654632686 }, { "epoch": 0.8003518029903254, "grad_norm": 1.5458352769590975, "learning_rate": 0.00016002344665885112, "loss": 3.4225351810455322, "step": 1365, "token_acc": 0.2577579675334096 }, { "epoch": 0.8009381413075345, "grad_norm": 1.1889062179803223, "learning_rate": 0.0001601406799531067, "loss": 3.4193105697631836, "step": 1366, "token_acc": 0.25750404225916945 }, { "epoch": 0.8015244796247435, "grad_norm": 1.2835901201805882, "learning_rate": 0.00016025791324736225, "loss": 3.412130355834961, "step": 1367, "token_acc": 0.2601837543628296 }, { "epoch": 0.8021108179419525, "grad_norm": 1.2469611497385342, "learning_rate": 0.00016037514654161782, "loss": 3.4162330627441406, "step": 1368, "token_acc": 0.2566167064388572 }, { "epoch": 0.8026971562591615, "grad_norm": 1.2230045755904724, "learning_rate": 0.0001604923798358734, "loss": 3.3724398612976074, "step": 1369, "token_acc": 0.264433783198075 }, { "epoch": 0.8032834945763706, "grad_norm": 1.5382512758511608, "learning_rate": 0.00016060961313012898, "loss": 3.3815901279449463, "step": 1370, "token_acc": 0.26097927461139897 }, { "epoch": 0.8038698328935796, "grad_norm": 1.2621043706789439, "learning_rate": 0.00016072684642438453, "loss": 3.412454128265381, "step": 1371, "token_acc": 0.25885567801793125 }, { "epoch": 0.8044561712107886, "grad_norm": 1.5347829120672927, "learning_rate": 0.0001608440797186401, "loss": 3.4170382022857666, "step": 1372, "token_acc": 0.2598218303370546 }, { "epoch": 0.8050425095279976, "grad_norm": 1.11545032567644, "learning_rate": 0.00016096131301289568, "loss": 3.407500743865967, "step": 1373, "token_acc": 0.25997555783126564 }, { "epoch": 0.8056288478452067, "grad_norm": 1.4832187245841353, "learning_rate": 0.00016107854630715126, "loss": 3.4161930084228516, "step": 1374, "token_acc": 0.25709930931113406 }, { "epoch": 0.8062151861624157, "grad_norm": 1.2183620021343144, "learning_rate": 0.0001611957796014068, "loss": 3.401772975921631, "step": 1375, "token_acc": 0.2607040862194731 }, { "epoch": 0.8068015244796247, "grad_norm": 1.6897382285213896, "learning_rate": 0.00016131301289566238, "loss": 3.3984644412994385, "step": 1376, "token_acc": 0.2604184746425713 }, { "epoch": 0.8073878627968337, "grad_norm": 1.148681955178217, "learning_rate": 0.00016143024618991796, "loss": 3.4220640659332275, "step": 1377, "token_acc": 0.25745807360267076 }, { "epoch": 0.8079742011140428, "grad_norm": 1.3871973480585504, "learning_rate": 0.0001615474794841735, "loss": 3.4250235557556152, "step": 1378, "token_acc": 0.25745404825903884 }, { "epoch": 0.8085605394312518, "grad_norm": 1.2059697467200812, "learning_rate": 0.00016166471277842908, "loss": 3.391935348510742, "step": 1379, "token_acc": 0.2591476636645117 }, { "epoch": 0.8091468777484608, "grad_norm": 1.6435658372062054, "learning_rate": 0.00016178194607268463, "loss": 3.442290782928467, "step": 1380, "token_acc": 0.25447408569698116 }, { "epoch": 0.8097332160656698, "grad_norm": 1.0838856957383594, "learning_rate": 0.0001618991793669402, "loss": 3.424408435821533, "step": 1381, "token_acc": 0.255436966909302 }, { "epoch": 0.810319554382879, "grad_norm": 1.3007345446042187, "learning_rate": 0.00016201641266119578, "loss": 3.4230763912200928, "step": 1382, "token_acc": 0.25723885236577204 }, { "epoch": 0.810905892700088, "grad_norm": 1.4520047599653023, "learning_rate": 0.00016213364595545136, "loss": 3.4378623962402344, "step": 1383, "token_acc": 0.2536172648810061 }, { "epoch": 0.811492231017297, "grad_norm": 1.2279829832455855, "learning_rate": 0.0001622508792497069, "loss": 3.3899765014648438, "step": 1384, "token_acc": 0.2612733355143047 }, { "epoch": 0.8120785693345061, "grad_norm": 1.492650011748883, "learning_rate": 0.00016236811254396249, "loss": 3.385045051574707, "step": 1385, "token_acc": 0.2609395538203836 }, { "epoch": 0.8126649076517151, "grad_norm": 1.1957336848313698, "learning_rate": 0.00016248534583821806, "loss": 3.39316463470459, "step": 1386, "token_acc": 0.2610308908506195 }, { "epoch": 0.8132512459689241, "grad_norm": 0.9913932941187936, "learning_rate": 0.00016260257913247364, "loss": 3.370640754699707, "step": 1387, "token_acc": 0.2630724498988089 }, { "epoch": 0.8138375842861331, "grad_norm": 1.778919795056691, "learning_rate": 0.0001627198124267292, "loss": 3.451665163040161, "step": 1388, "token_acc": 0.25280098208899393 }, { "epoch": 0.8144239226033422, "grad_norm": 1.4065760392925368, "learning_rate": 0.00016283704572098476, "loss": 3.3990657329559326, "step": 1389, "token_acc": 0.25854908427720236 }, { "epoch": 0.8150102609205512, "grad_norm": 1.5865502333380712, "learning_rate": 0.00016295427901524034, "loss": 3.3746392726898193, "step": 1390, "token_acc": 0.2614234161466408 }, { "epoch": 0.8155965992377602, "grad_norm": 0.8139752770538708, "learning_rate": 0.00016307151230949592, "loss": 3.3624978065490723, "step": 1391, "token_acc": 0.26318448483848683 }, { "epoch": 0.8161829375549692, "grad_norm": 1.6002682530181054, "learning_rate": 0.0001631887456037515, "loss": 3.418271541595459, "step": 1392, "token_acc": 0.2594736223651742 }, { "epoch": 0.8167692758721783, "grad_norm": 1.095166228887982, "learning_rate": 0.00016330597889800704, "loss": 3.3780651092529297, "step": 1393, "token_acc": 0.2626238116887429 }, { "epoch": 0.8173556141893873, "grad_norm": 1.4719030043321082, "learning_rate": 0.00016342321219226262, "loss": 3.386111259460449, "step": 1394, "token_acc": 0.25987115905994823 }, { "epoch": 0.8179419525065963, "grad_norm": 1.033948798933131, "learning_rate": 0.0001635404454865182, "loss": 3.3902335166931152, "step": 1395, "token_acc": 0.26015015350120285 }, { "epoch": 0.8185282908238053, "grad_norm": 1.165489899796703, "learning_rate": 0.00016365767878077374, "loss": 3.3687996864318848, "step": 1396, "token_acc": 0.263056588230324 }, { "epoch": 0.8191146291410144, "grad_norm": 1.123544971930076, "learning_rate": 0.00016377491207502932, "loss": 3.416421890258789, "step": 1397, "token_acc": 0.25979316409747855 }, { "epoch": 0.8197009674582234, "grad_norm": 1.386911827937946, "learning_rate": 0.00016389214536928487, "loss": 3.457995891571045, "step": 1398, "token_acc": 0.25363085330885976 }, { "epoch": 0.8202873057754324, "grad_norm": 1.589659105437681, "learning_rate": 0.00016400937866354045, "loss": 3.4378838539123535, "step": 1399, "token_acc": 0.25467336992293643 }, { "epoch": 0.8208736440926414, "grad_norm": 1.2484402205481129, "learning_rate": 0.00016412661195779602, "loss": 3.386201858520508, "step": 1400, "token_acc": 0.2603389146661287 }, { "epoch": 0.8214599824098505, "grad_norm": 1.7403796109007217, "learning_rate": 0.0001642438452520516, "loss": 3.4289164543151855, "step": 1401, "token_acc": 0.2564007197617557 }, { "epoch": 0.8220463207270595, "grad_norm": 0.9383213961790757, "learning_rate": 0.00016436107854630715, "loss": 3.3875303268432617, "step": 1402, "token_acc": 0.26220817272226665 }, { "epoch": 0.8226326590442685, "grad_norm": 1.735972178394569, "learning_rate": 0.00016447831184056272, "loss": 3.3960866928100586, "step": 1403, "token_acc": 0.26150425467806787 }, { "epoch": 0.8232189973614775, "grad_norm": 1.0435792060814726, "learning_rate": 0.0001645955451348183, "loss": 3.356168270111084, "step": 1404, "token_acc": 0.2658659432290763 }, { "epoch": 0.8238053356786866, "grad_norm": 1.1976902584261255, "learning_rate": 0.00016471277842907388, "loss": 3.3646349906921387, "step": 1405, "token_acc": 0.26182173567103045 }, { "epoch": 0.8243916739958956, "grad_norm": 1.2784395426836739, "learning_rate": 0.00016483001172332943, "loss": 3.3935470581054688, "step": 1406, "token_acc": 0.258898987515772 }, { "epoch": 0.8249780123131046, "grad_norm": 1.4494234385569889, "learning_rate": 0.000164947245017585, "loss": 3.4402318000793457, "step": 1407, "token_acc": 0.2535252485579404 }, { "epoch": 0.8255643506303136, "grad_norm": 1.0203063742982343, "learning_rate": 0.00016506447831184058, "loss": 3.3333630561828613, "step": 1408, "token_acc": 0.2681763254949554 }, { "epoch": 0.8261506889475227, "grad_norm": 1.622378942597262, "learning_rate": 0.00016518171160609615, "loss": 3.4576644897460938, "step": 1409, "token_acc": 0.252765525169086 }, { "epoch": 0.8267370272647317, "grad_norm": 1.1218495729331617, "learning_rate": 0.0001652989449003517, "loss": 3.37540340423584, "step": 1410, "token_acc": 0.2617770668673943 }, { "epoch": 0.8273233655819408, "grad_norm": 1.1296709021138347, "learning_rate": 0.00016541617819460728, "loss": 3.4006543159484863, "step": 1411, "token_acc": 0.2601903311762467 }, { "epoch": 0.8279097038991499, "grad_norm": 1.1791375750025688, "learning_rate": 0.00016553341148886286, "loss": 3.3970694541931152, "step": 1412, "token_acc": 0.25899754965961636 }, { "epoch": 0.8284960422163589, "grad_norm": 0.8829390034570248, "learning_rate": 0.0001656506447831184, "loss": 3.346217393875122, "step": 1413, "token_acc": 0.2659625198584681 }, { "epoch": 0.8290823805335679, "grad_norm": 1.2655513374992022, "learning_rate": 0.00016576787807737398, "loss": 3.424145221710205, "step": 1414, "token_acc": 0.2559237366869084 }, { "epoch": 0.8296687188507769, "grad_norm": 1.1905826909476929, "learning_rate": 0.00016588511137162953, "loss": 3.3977012634277344, "step": 1415, "token_acc": 0.26070246134311365 }, { "epoch": 0.830255057167986, "grad_norm": 1.5004251628761933, "learning_rate": 0.0001660023446658851, "loss": 3.4330170154571533, "step": 1416, "token_acc": 0.2561364192522185 }, { "epoch": 0.830841395485195, "grad_norm": 1.757025167113541, "learning_rate": 0.00016611957796014068, "loss": 3.3519184589385986, "step": 1417, "token_acc": 0.26574610198664134 }, { "epoch": 0.831427733802404, "grad_norm": 0.9523233453133809, "learning_rate": 0.00016623681125439626, "loss": 3.4260971546173096, "step": 1418, "token_acc": 0.25653466896227894 }, { "epoch": 0.832014072119613, "grad_norm": 1.8447147161589188, "learning_rate": 0.0001663540445486518, "loss": 3.4153528213500977, "step": 1419, "token_acc": 0.2566249853682866 }, { "epoch": 0.8326004104368221, "grad_norm": 1.208719964117776, "learning_rate": 0.00016647127784290739, "loss": 3.3849730491638184, "step": 1420, "token_acc": 0.2612123694450133 }, { "epoch": 0.8331867487540311, "grad_norm": 1.3798265107313714, "learning_rate": 0.00016658851113716296, "loss": 3.442805290222168, "step": 1421, "token_acc": 0.25360380085504053 }, { "epoch": 0.8337730870712401, "grad_norm": 1.2747954913225212, "learning_rate": 0.00016670574443141854, "loss": 3.4103610515594482, "step": 1422, "token_acc": 0.25752999994664305 }, { "epoch": 0.8343594253884491, "grad_norm": 1.514706727116007, "learning_rate": 0.00016682297772567411, "loss": 3.3485260009765625, "step": 1423, "token_acc": 0.26686710305982647 }, { "epoch": 0.8349457637056582, "grad_norm": 1.0380132802134088, "learning_rate": 0.00016694021101992966, "loss": 3.3454668521881104, "step": 1424, "token_acc": 0.2657586972612879 }, { "epoch": 0.8355321020228672, "grad_norm": 1.3767539039099006, "learning_rate": 0.00016705744431418524, "loss": 3.3448781967163086, "step": 1425, "token_acc": 0.26729262288914446 }, { "epoch": 0.8361184403400762, "grad_norm": 1.3993449161884737, "learning_rate": 0.00016717467760844082, "loss": 3.3844680786132812, "step": 1426, "token_acc": 0.2601063046689782 }, { "epoch": 0.8367047786572852, "grad_norm": 1.371283751056096, "learning_rate": 0.0001672919109026964, "loss": 3.40334415435791, "step": 1427, "token_acc": 0.2597881625743528 }, { "epoch": 0.8372911169744943, "grad_norm": 1.152520742035871, "learning_rate": 0.00016740914419695194, "loss": 3.4200756549835205, "step": 1428, "token_acc": 0.2566177504073463 }, { "epoch": 0.8378774552917033, "grad_norm": 1.7393925132629913, "learning_rate": 0.00016752637749120752, "loss": 3.3893885612487793, "step": 1429, "token_acc": 0.2589688966794248 }, { "epoch": 0.8384637936089123, "grad_norm": 1.0337838621521869, "learning_rate": 0.0001676436107854631, "loss": 3.398236036300659, "step": 1430, "token_acc": 0.2590882267295784 }, { "epoch": 0.8390501319261213, "grad_norm": 1.5778013181855102, "learning_rate": 0.00016776084407971864, "loss": 3.4039158821105957, "step": 1431, "token_acc": 0.2594672722469467 }, { "epoch": 0.8396364702433304, "grad_norm": 0.9642733717561321, "learning_rate": 0.00016787807737397422, "loss": 3.3822526931762695, "step": 1432, "token_acc": 0.2612234161482514 }, { "epoch": 0.8402228085605394, "grad_norm": 1.5432076240472155, "learning_rate": 0.00016799531066822977, "loss": 3.4229960441589355, "step": 1433, "token_acc": 0.25693975760302645 }, { "epoch": 0.8408091468777484, "grad_norm": 1.5409483700047024, "learning_rate": 0.00016811254396248535, "loss": 3.388065814971924, "step": 1434, "token_acc": 0.2598596917352489 }, { "epoch": 0.8413954851949574, "grad_norm": 1.2065574350500252, "learning_rate": 0.00016822977725674092, "loss": 3.385983467102051, "step": 1435, "token_acc": 0.26186015079438696 }, { "epoch": 0.8419818235121665, "grad_norm": 1.3630722912432809, "learning_rate": 0.0001683470105509965, "loss": 3.424725294113159, "step": 1436, "token_acc": 0.2560455528432426 }, { "epoch": 0.8425681618293756, "grad_norm": 1.2030244597933781, "learning_rate": 0.00016846424384525205, "loss": 3.4501466751098633, "step": 1437, "token_acc": 0.25450108416901945 }, { "epoch": 0.8431545001465846, "grad_norm": 1.0413621962361068, "learning_rate": 0.00016858147713950762, "loss": 3.409201145172119, "step": 1438, "token_acc": 0.2596036829779237 }, { "epoch": 0.8437408384637937, "grad_norm": 1.2623221674477745, "learning_rate": 0.0001686987104337632, "loss": 3.385174512863159, "step": 1439, "token_acc": 0.2593272105823389 }, { "epoch": 0.8443271767810027, "grad_norm": 0.9411913690571712, "learning_rate": 0.00016881594372801878, "loss": 3.38513445854187, "step": 1440, "token_acc": 0.2620564172446704 }, { "epoch": 0.8449135150982117, "grad_norm": 1.360864186045538, "learning_rate": 0.00016893317702227432, "loss": 3.4050650596618652, "step": 1441, "token_acc": 0.2587739324443522 }, { "epoch": 0.8454998534154207, "grad_norm": 1.1054797246575565, "learning_rate": 0.0001690504103165299, "loss": 3.3806333541870117, "step": 1442, "token_acc": 0.25974824519608286 }, { "epoch": 0.8460861917326298, "grad_norm": 1.4179040184340541, "learning_rate": 0.00016916764361078548, "loss": 3.431645393371582, "step": 1443, "token_acc": 0.2547193630874358 }, { "epoch": 0.8466725300498388, "grad_norm": 0.9801711189000787, "learning_rate": 0.00016928487690504105, "loss": 3.3983826637268066, "step": 1444, "token_acc": 0.25908235005167984 }, { "epoch": 0.8472588683670478, "grad_norm": 1.2106105538420087, "learning_rate": 0.0001694021101992966, "loss": 3.380951404571533, "step": 1445, "token_acc": 0.2609729458551294 }, { "epoch": 0.8478452066842568, "grad_norm": 1.3818651652596927, "learning_rate": 0.00016951934349355218, "loss": 3.404722213745117, "step": 1446, "token_acc": 0.25812670153223144 }, { "epoch": 0.8484315450014659, "grad_norm": 1.3789259248888228, "learning_rate": 0.00016963657678780776, "loss": 3.359693765640259, "step": 1447, "token_acc": 0.2657556477441266 }, { "epoch": 0.8490178833186749, "grad_norm": 1.108831893018942, "learning_rate": 0.00016975381008206333, "loss": 3.412679672241211, "step": 1448, "token_acc": 0.25687653173773756 }, { "epoch": 0.8496042216358839, "grad_norm": 1.2391722519744244, "learning_rate": 0.00016987104337631888, "loss": 3.39201283454895, "step": 1449, "token_acc": 0.26091004481529106 }, { "epoch": 0.8501905599530929, "grad_norm": 1.218245224599716, "learning_rate": 0.00016998827667057443, "loss": 3.380063772201538, "step": 1450, "token_acc": 0.26086066380566164 }, { "epoch": 0.850776898270302, "grad_norm": 1.4912963455771948, "learning_rate": 0.00017010550996483, "loss": 3.3462018966674805, "step": 1451, "token_acc": 0.2675874434800686 }, { "epoch": 0.851363236587511, "grad_norm": 1.1009741632522507, "learning_rate": 0.00017022274325908558, "loss": 3.3902649879455566, "step": 1452, "token_acc": 0.2602115225749865 }, { "epoch": 0.85194957490472, "grad_norm": 1.4848246642260061, "learning_rate": 0.00017033997655334116, "loss": 3.4019291400909424, "step": 1453, "token_acc": 0.2597050539924878 }, { "epoch": 0.852535913221929, "grad_norm": 1.079156558361002, "learning_rate": 0.0001704572098475967, "loss": 3.396552801132202, "step": 1454, "token_acc": 0.2582360587379828 }, { "epoch": 0.8531222515391381, "grad_norm": 1.342325168555911, "learning_rate": 0.00017057444314185228, "loss": 3.3619723320007324, "step": 1455, "token_acc": 0.26495436483215856 }, { "epoch": 0.8537085898563471, "grad_norm": 1.3079130800654015, "learning_rate": 0.00017069167643610786, "loss": 3.2954182624816895, "step": 1456, "token_acc": 0.27105714749903537 }, { "epoch": 0.8542949281735561, "grad_norm": 1.284983108830768, "learning_rate": 0.00017080890973036344, "loss": 3.3426520824432373, "step": 1457, "token_acc": 0.2663694074568128 }, { "epoch": 0.8548812664907651, "grad_norm": 1.4309767515396352, "learning_rate": 0.000170926143024619, "loss": 3.397672414779663, "step": 1458, "token_acc": 0.26012984336356143 }, { "epoch": 0.8554676048079742, "grad_norm": 1.1185046909742629, "learning_rate": 0.00017104337631887456, "loss": 3.316408634185791, "step": 1459, "token_acc": 0.27010468388273534 }, { "epoch": 0.8560539431251832, "grad_norm": 1.5050914891082074, "learning_rate": 0.00017116060961313014, "loss": 3.3816070556640625, "step": 1460, "token_acc": 0.26188591728174637 }, { "epoch": 0.8566402814423922, "grad_norm": 1.218411932089343, "learning_rate": 0.00017127784290738572, "loss": 3.3504467010498047, "step": 1461, "token_acc": 0.26462275727212514 }, { "epoch": 0.8572266197596012, "grad_norm": 1.3706023121231508, "learning_rate": 0.0001713950762016413, "loss": 3.39461088180542, "step": 1462, "token_acc": 0.26109746758562413 }, { "epoch": 0.8578129580768104, "grad_norm": 0.9611026830318272, "learning_rate": 0.00017151230949589684, "loss": 3.3955650329589844, "step": 1463, "token_acc": 0.2602593257114245 }, { "epoch": 0.8583992963940194, "grad_norm": 1.3148266847507588, "learning_rate": 0.00017162954279015242, "loss": 3.368884801864624, "step": 1464, "token_acc": 0.2636086704765741 }, { "epoch": 0.8589856347112284, "grad_norm": 1.1069858969393984, "learning_rate": 0.000171746776084408, "loss": 3.3595385551452637, "step": 1465, "token_acc": 0.2638883811294543 }, { "epoch": 0.8595719730284375, "grad_norm": 1.3789698013931453, "learning_rate": 0.00017186400937866357, "loss": 3.3402113914489746, "step": 1466, "token_acc": 0.266412021149323 }, { "epoch": 0.8601583113456465, "grad_norm": 1.2340979412692332, "learning_rate": 0.00017198124267291912, "loss": 3.3974380493164062, "step": 1467, "token_acc": 0.25799578504774023 }, { "epoch": 0.8607446496628555, "grad_norm": 1.2950079205083143, "learning_rate": 0.00017209847596717467, "loss": 3.3972349166870117, "step": 1468, "token_acc": 0.2576024040391237 }, { "epoch": 0.8613309879800645, "grad_norm": 1.062214385558007, "learning_rate": 0.00017221570926143024, "loss": 3.3747265338897705, "step": 1469, "token_acc": 0.26251473168687633 }, { "epoch": 0.8619173262972736, "grad_norm": 1.4292231887001252, "learning_rate": 0.00017233294255568582, "loss": 3.3396902084350586, "step": 1470, "token_acc": 0.2678973850748157 }, { "epoch": 0.8625036646144826, "grad_norm": 1.2545980116133342, "learning_rate": 0.0001724501758499414, "loss": 3.417757987976074, "step": 1471, "token_acc": 0.25574512002091915 }, { "epoch": 0.8630900029316916, "grad_norm": 0.8717300775605415, "learning_rate": 0.00017256740914419695, "loss": 3.4178900718688965, "step": 1472, "token_acc": 0.25856996608206484 }, { "epoch": 0.8636763412489006, "grad_norm": 1.4890831767095827, "learning_rate": 0.00017268464243845252, "loss": 3.3594353199005127, "step": 1473, "token_acc": 0.265992410650493 }, { "epoch": 0.8642626795661097, "grad_norm": 0.9579228720723092, "learning_rate": 0.0001728018757327081, "loss": 3.413370370864868, "step": 1474, "token_acc": 0.2575329530414322 }, { "epoch": 0.8648490178833187, "grad_norm": 1.155826149920576, "learning_rate": 0.00017291910902696367, "loss": 3.3810291290283203, "step": 1475, "token_acc": 0.25937102297327436 }, { "epoch": 0.8654353562005277, "grad_norm": 1.399884311989567, "learning_rate": 0.00017303634232121922, "loss": 3.3536834716796875, "step": 1476, "token_acc": 0.2650819792508051 }, { "epoch": 0.8660216945177367, "grad_norm": 1.2756653677508316, "learning_rate": 0.0001731535756154748, "loss": 3.3735735416412354, "step": 1477, "token_acc": 0.2613435984024083 }, { "epoch": 0.8666080328349458, "grad_norm": 1.0320424687456473, "learning_rate": 0.00017327080890973038, "loss": 3.41560697555542, "step": 1478, "token_acc": 0.25624816779042187 }, { "epoch": 0.8671943711521548, "grad_norm": 1.2321800203382742, "learning_rate": 0.00017338804220398595, "loss": 3.3585915565490723, "step": 1479, "token_acc": 0.2626471017819138 }, { "epoch": 0.8677807094693638, "grad_norm": 1.1478764139345106, "learning_rate": 0.0001735052754982415, "loss": 3.3904266357421875, "step": 1480, "token_acc": 0.26105803395183574 }, { "epoch": 0.8683670477865728, "grad_norm": 1.0322712865679524, "learning_rate": 0.00017362250879249708, "loss": 3.3808116912841797, "step": 1481, "token_acc": 0.2612883553896913 }, { "epoch": 0.8689533861037819, "grad_norm": 1.6175135364430304, "learning_rate": 0.00017373974208675265, "loss": 3.324183940887451, "step": 1482, "token_acc": 0.26901215572439974 }, { "epoch": 0.8695397244209909, "grad_norm": 0.8772822431254642, "learning_rate": 0.00017385697538100823, "loss": 3.369882583618164, "step": 1483, "token_acc": 0.2621472591631528 }, { "epoch": 0.8701260627381999, "grad_norm": 1.1641519696498677, "learning_rate": 0.00017397420867526378, "loss": 3.393336772918701, "step": 1484, "token_acc": 0.25840881388702086 }, { "epoch": 0.8707124010554089, "grad_norm": 1.2882711860033051, "learning_rate": 0.00017409144196951933, "loss": 3.4106829166412354, "step": 1485, "token_acc": 0.2574341184670473 }, { "epoch": 0.871298739372618, "grad_norm": 1.2626286397657405, "learning_rate": 0.0001742086752637749, "loss": 3.404301404953003, "step": 1486, "token_acc": 0.25690334006230003 }, { "epoch": 0.871885077689827, "grad_norm": 1.1819986252413377, "learning_rate": 0.00017432590855803048, "loss": 3.369124412536621, "step": 1487, "token_acc": 0.26201105658610085 }, { "epoch": 0.872471416007036, "grad_norm": 1.2573971511765212, "learning_rate": 0.00017444314185228606, "loss": 3.408914089202881, "step": 1488, "token_acc": 0.25828872275893644 }, { "epoch": 0.873057754324245, "grad_norm": 1.100159807291389, "learning_rate": 0.00017456037514654163, "loss": 3.3941423892974854, "step": 1489, "token_acc": 0.26022142840368545 }, { "epoch": 0.8736440926414542, "grad_norm": 1.1743512324022334, "learning_rate": 0.00017467760844079718, "loss": 3.3964481353759766, "step": 1490, "token_acc": 0.25867709547625317 }, { "epoch": 0.8742304309586632, "grad_norm": 1.2954899078211197, "learning_rate": 0.00017479484173505276, "loss": 3.376728057861328, "step": 1491, "token_acc": 0.2615496460684905 }, { "epoch": 0.8748167692758722, "grad_norm": 1.2763621724988437, "learning_rate": 0.00017491207502930834, "loss": 3.3485350608825684, "step": 1492, "token_acc": 0.2655099026138166 }, { "epoch": 0.8754031075930812, "grad_norm": 0.9991407635116363, "learning_rate": 0.0001750293083235639, "loss": 3.3698904514312744, "step": 1493, "token_acc": 0.2607958864768984 }, { "epoch": 0.8759894459102903, "grad_norm": 1.352455401779166, "learning_rate": 0.00017514654161781946, "loss": 3.3584060668945312, "step": 1494, "token_acc": 0.26509359311277486 }, { "epoch": 0.8765757842274993, "grad_norm": 1.1208433807693403, "learning_rate": 0.00017526377491207504, "loss": 3.3936424255371094, "step": 1495, "token_acc": 0.25898503057794126 }, { "epoch": 0.8771621225447083, "grad_norm": 1.5152089916871343, "learning_rate": 0.00017538100820633061, "loss": 3.3777639865875244, "step": 1496, "token_acc": 0.2621632182858071 }, { "epoch": 0.8777484608619174, "grad_norm": 0.736659725372689, "learning_rate": 0.0001754982415005862, "loss": 3.4409842491149902, "step": 1497, "token_acc": 0.2521883962115106 }, { "epoch": 0.8783347991791264, "grad_norm": 1.1438099619520186, "learning_rate": 0.00017561547479484174, "loss": 3.3725433349609375, "step": 1498, "token_acc": 0.2612237375598968 }, { "epoch": 0.8789211374963354, "grad_norm": 1.6226355212468893, "learning_rate": 0.00017573270808909732, "loss": 3.3909592628479004, "step": 1499, "token_acc": 0.25940590002381725 }, { "epoch": 0.8795074758135444, "grad_norm": 0.9613854092073866, "learning_rate": 0.0001758499413833529, "loss": 3.383650064468384, "step": 1500, "token_acc": 0.26161368531566925 }, { "epoch": 0.8800938141307535, "grad_norm": 0.9310447066820396, "learning_rate": 0.00017596717467760847, "loss": 3.367323637008667, "step": 1501, "token_acc": 0.26356278589925874 }, { "epoch": 0.8806801524479625, "grad_norm": 1.0902444785263428, "learning_rate": 0.00017608440797186402, "loss": 3.354609489440918, "step": 1502, "token_acc": 0.26554700512764196 }, { "epoch": 0.8812664907651715, "grad_norm": 1.4774117537193938, "learning_rate": 0.00017620164126611957, "loss": 3.387373685836792, "step": 1503, "token_acc": 0.25984581780804317 }, { "epoch": 0.8818528290823805, "grad_norm": 2.266694979750916, "learning_rate": 0.00017631887456037514, "loss": 3.407223701477051, "step": 1504, "token_acc": 0.25702938882074367 }, { "epoch": 0.8824391673995896, "grad_norm": 2.0749896393951426, "learning_rate": 0.00017643610785463072, "loss": 3.404712677001953, "step": 1505, "token_acc": 0.26099025840006607 }, { "epoch": 0.8830255057167986, "grad_norm": 1.173192163339113, "learning_rate": 0.0001765533411488863, "loss": 3.351816177368164, "step": 1506, "token_acc": 0.2640559527221185 }, { "epoch": 0.8836118440340076, "grad_norm": 1.452009753822364, "learning_rate": 0.00017667057444314185, "loss": 3.4304556846618652, "step": 1507, "token_acc": 0.25553283149286343 }, { "epoch": 0.8841981823512166, "grad_norm": 1.15685785181313, "learning_rate": 0.00017678780773739742, "loss": 3.407529354095459, "step": 1508, "token_acc": 0.25884024471705913 }, { "epoch": 0.8847845206684257, "grad_norm": 1.5138501965965592, "learning_rate": 0.000176905041031653, "loss": 3.3573224544525146, "step": 1509, "token_acc": 0.2630266569036829 }, { "epoch": 0.8853708589856347, "grad_norm": 1.0157207828911852, "learning_rate": 0.00017702227432590857, "loss": 3.323700428009033, "step": 1510, "token_acc": 0.2691512717812135 }, { "epoch": 0.8859571973028437, "grad_norm": 1.3384678807577561, "learning_rate": 0.00017713950762016412, "loss": 3.406172037124634, "step": 1511, "token_acc": 0.25799224993945263 }, { "epoch": 0.8865435356200527, "grad_norm": 1.2471878125708253, "learning_rate": 0.0001772567409144197, "loss": 3.3650341033935547, "step": 1512, "token_acc": 0.2642705110952654 }, { "epoch": 0.8871298739372618, "grad_norm": 1.519185393671763, "learning_rate": 0.00017737397420867528, "loss": 3.360307216644287, "step": 1513, "token_acc": 0.26373175542406313 }, { "epoch": 0.8877162122544708, "grad_norm": 0.9706358535167096, "learning_rate": 0.00017749120750293085, "loss": 3.426914930343628, "step": 1514, "token_acc": 0.252681631322408 }, { "epoch": 0.8883025505716798, "grad_norm": 1.2134110848710862, "learning_rate": 0.00017760844079718643, "loss": 3.417804718017578, "step": 1515, "token_acc": 0.2581786868065955 }, { "epoch": 0.8888888888888888, "grad_norm": 1.6261002168992242, "learning_rate": 0.00017772567409144198, "loss": 3.3730387687683105, "step": 1516, "token_acc": 0.2605859012068107 }, { "epoch": 0.889475227206098, "grad_norm": 1.0997783048125969, "learning_rate": 0.00017784290738569755, "loss": 3.37394642829895, "step": 1517, "token_acc": 0.2603151199541986 }, { "epoch": 0.890061565523307, "grad_norm": 1.6285489879453978, "learning_rate": 0.00017796014067995313, "loss": 3.3897604942321777, "step": 1518, "token_acc": 0.26049355181506895 }, { "epoch": 0.890647903840516, "grad_norm": 3.3286981841905225, "learning_rate": 0.0001780773739742087, "loss": 3.4052653312683105, "step": 1519, "token_acc": 0.26048819927977573 }, { "epoch": 0.891234242157725, "grad_norm": 1.159283016464403, "learning_rate": 0.00017819460726846423, "loss": 3.361818313598633, "step": 1520, "token_acc": 0.26199120707289775 }, { "epoch": 0.8918205804749341, "grad_norm": 1.8574366638650486, "learning_rate": 0.0001783118405627198, "loss": 3.392158269882202, "step": 1521, "token_acc": 0.25933242648280447 }, { "epoch": 0.8924069187921431, "grad_norm": 1.2350760832240921, "learning_rate": 0.00017842907385697538, "loss": 3.419607162475586, "step": 1522, "token_acc": 0.25626366190631006 }, { "epoch": 0.8929932571093521, "grad_norm": 1.3560927403181309, "learning_rate": 0.00017854630715123096, "loss": 3.3976871967315674, "step": 1523, "token_acc": 0.25769128754226445 }, { "epoch": 0.8935795954265612, "grad_norm": 1.1181138982280656, "learning_rate": 0.00017866354044548653, "loss": 3.3918638229370117, "step": 1524, "token_acc": 0.2582132367106412 }, { "epoch": 0.8941659337437702, "grad_norm": 1.554557886780218, "learning_rate": 0.00017878077373974208, "loss": 3.3687663078308105, "step": 1525, "token_acc": 0.2614903793607753 }, { "epoch": 0.8947522720609792, "grad_norm": 1.1530624476758216, "learning_rate": 0.00017889800703399766, "loss": 3.374950647354126, "step": 1526, "token_acc": 0.26278817244505454 }, { "epoch": 0.8953386103781882, "grad_norm": 1.333841294396085, "learning_rate": 0.00017901524032825324, "loss": 3.3191957473754883, "step": 1527, "token_acc": 0.26790004551083263 }, { "epoch": 0.8959249486953973, "grad_norm": 0.9422572212953042, "learning_rate": 0.0001791324736225088, "loss": 3.421724319458008, "step": 1528, "token_acc": 0.2570019734002097 }, { "epoch": 0.8965112870126063, "grad_norm": 1.3299214955353553, "learning_rate": 0.00017924970691676436, "loss": 3.3493521213531494, "step": 1529, "token_acc": 0.26553432711582686 }, { "epoch": 0.8970976253298153, "grad_norm": 1.4589638858468008, "learning_rate": 0.00017936694021101994, "loss": 3.321837902069092, "step": 1530, "token_acc": 0.2685723435320426 }, { "epoch": 0.8976839636470243, "grad_norm": 1.2870542763683752, "learning_rate": 0.00017948417350527551, "loss": 3.3878555297851562, "step": 1531, "token_acc": 0.25975829316325505 }, { "epoch": 0.8982703019642334, "grad_norm": 1.2187544875158403, "learning_rate": 0.0001796014067995311, "loss": 3.3984103202819824, "step": 1532, "token_acc": 0.25735855262312646 }, { "epoch": 0.8988566402814424, "grad_norm": 1.29561914753019, "learning_rate": 0.00017971864009378664, "loss": 3.3379642963409424, "step": 1533, "token_acc": 0.2644837250747305 }, { "epoch": 0.8994429785986514, "grad_norm": 1.2966455411514421, "learning_rate": 0.00017983587338804222, "loss": 3.3390920162200928, "step": 1534, "token_acc": 0.26739066089357233 }, { "epoch": 0.9000293169158604, "grad_norm": 1.1386317992267077, "learning_rate": 0.0001799531066822978, "loss": 3.3433074951171875, "step": 1535, "token_acc": 0.2656255790928296 }, { "epoch": 0.9006156552330695, "grad_norm": 1.312571750111663, "learning_rate": 0.00018007033997655337, "loss": 3.3589138984680176, "step": 1536, "token_acc": 0.2636712168848914 }, { "epoch": 0.9012019935502785, "grad_norm": 1.211894350862937, "learning_rate": 0.00018018757327080892, "loss": 3.3316750526428223, "step": 1537, "token_acc": 0.2656379854089685 }, { "epoch": 0.9017883318674875, "grad_norm": 1.6156276018651505, "learning_rate": 0.00018030480656506447, "loss": 3.342571258544922, "step": 1538, "token_acc": 0.26371886229368413 }, { "epoch": 0.9023746701846965, "grad_norm": 0.9816790098360255, "learning_rate": 0.00018042203985932004, "loss": 3.3893649578094482, "step": 1539, "token_acc": 0.2592479339932253 }, { "epoch": 0.9029610085019056, "grad_norm": 1.4254902800405698, "learning_rate": 0.00018053927315357562, "loss": 3.345700740814209, "step": 1540, "token_acc": 0.2653448586075908 }, { "epoch": 0.9035473468191146, "grad_norm": 1.2825574459255813, "learning_rate": 0.0001806565064478312, "loss": 3.3366165161132812, "step": 1541, "token_acc": 0.2659552316501822 }, { "epoch": 0.9041336851363236, "grad_norm": 1.4129078528307166, "learning_rate": 0.00018077373974208674, "loss": 3.340075969696045, "step": 1542, "token_acc": 0.2659273755164166 }, { "epoch": 0.9047200234535326, "grad_norm": 0.9069217276918115, "learning_rate": 0.00018089097303634232, "loss": 3.3417882919311523, "step": 1543, "token_acc": 0.26521126684839 }, { "epoch": 0.9053063617707418, "grad_norm": 1.5657331153023728, "learning_rate": 0.0001810082063305979, "loss": 3.3857264518737793, "step": 1544, "token_acc": 0.26106904070891346 }, { "epoch": 0.9058927000879508, "grad_norm": 1.1012884221426182, "learning_rate": 0.00018112543962485347, "loss": 3.3069815635681152, "step": 1545, "token_acc": 0.27012238945341804 }, { "epoch": 0.9064790384051598, "grad_norm": 1.1739170478926193, "learning_rate": 0.00018124267291910902, "loss": 3.280078887939453, "step": 1546, "token_acc": 0.2727946511868975 }, { "epoch": 0.9070653767223688, "grad_norm": 1.175053454563694, "learning_rate": 0.0001813599062133646, "loss": 3.3163278102874756, "step": 1547, "token_acc": 0.26934326329982056 }, { "epoch": 0.9076517150395779, "grad_norm": 1.24818481620032, "learning_rate": 0.00018147713950762018, "loss": 3.3843674659729004, "step": 1548, "token_acc": 0.2582754371568118 }, { "epoch": 0.9082380533567869, "grad_norm": 1.3122585625308938, "learning_rate": 0.00018159437280187575, "loss": 3.3370323181152344, "step": 1549, "token_acc": 0.2653369841906081 }, { "epoch": 0.9088243916739959, "grad_norm": 1.286304506506278, "learning_rate": 0.00018171160609613133, "loss": 3.3194799423217773, "step": 1550, "token_acc": 0.26853451254059285 }, { "epoch": 0.909410729991205, "grad_norm": 1.0788506085500287, "learning_rate": 0.00018182883939038688, "loss": 3.353987216949463, "step": 1551, "token_acc": 0.264657159612043 }, { "epoch": 0.909997068308414, "grad_norm": 1.4452448808436773, "learning_rate": 0.00018194607268464245, "loss": 3.378814697265625, "step": 1552, "token_acc": 0.2593958374111046 }, { "epoch": 0.910583406625623, "grad_norm": 1.2357466809858635, "learning_rate": 0.00018206330597889803, "loss": 3.342198371887207, "step": 1553, "token_acc": 0.26663439312892834 }, { "epoch": 0.911169744942832, "grad_norm": 1.2129130656116176, "learning_rate": 0.0001821805392731536, "loss": 3.3796777725219727, "step": 1554, "token_acc": 0.2617166528059962 }, { "epoch": 0.9117560832600411, "grad_norm": 1.146604556201497, "learning_rate": 0.00018229777256740915, "loss": 3.396350860595703, "step": 1555, "token_acc": 0.25920019468298 }, { "epoch": 0.9123424215772501, "grad_norm": 2.05040081403037, "learning_rate": 0.0001824150058616647, "loss": 3.4083476066589355, "step": 1556, "token_acc": 0.25626040551594265 }, { "epoch": 0.9129287598944591, "grad_norm": 0.7773064290736583, "learning_rate": 0.00018253223915592028, "loss": 3.4253311157226562, "step": 1557, "token_acc": 0.25439652848210964 }, { "epoch": 0.9135150982116681, "grad_norm": 1.79627057337562, "learning_rate": 0.00018264947245017586, "loss": 3.391282558441162, "step": 1558, "token_acc": 0.2600664629429257 }, { "epoch": 0.9141014365288772, "grad_norm": 0.9184980870345989, "learning_rate": 0.00018276670574443143, "loss": 3.3942761421203613, "step": 1559, "token_acc": 0.25757309601731765 }, { "epoch": 0.9146877748460862, "grad_norm": 1.5377769612778618, "learning_rate": 0.00018288393903868698, "loss": 3.384425163269043, "step": 1560, "token_acc": 0.26069879383911443 }, { "epoch": 0.9152741131632952, "grad_norm": 1.2520046523629007, "learning_rate": 0.00018300117233294256, "loss": 3.295742988586426, "step": 1561, "token_acc": 0.27246077902770727 }, { "epoch": 0.9158604514805042, "grad_norm": 0.9420638984060122, "learning_rate": 0.00018311840562719813, "loss": 3.352247714996338, "step": 1562, "token_acc": 0.26366578156920917 }, { "epoch": 0.9164467897977133, "grad_norm": 1.1499957980941797, "learning_rate": 0.0001832356389214537, "loss": 3.325246810913086, "step": 1563, "token_acc": 0.26839646056747535 }, { "epoch": 0.9170331281149223, "grad_norm": 1.199653384799704, "learning_rate": 0.00018335287221570926, "loss": 3.3027188777923584, "step": 1564, "token_acc": 0.2728387244873506 }, { "epoch": 0.9176194664321313, "grad_norm": 1.3214843926296007, "learning_rate": 0.00018347010550996484, "loss": 3.3684563636779785, "step": 1565, "token_acc": 0.26195374187315873 }, { "epoch": 0.9182058047493403, "grad_norm": 1.0088904138662307, "learning_rate": 0.0001835873388042204, "loss": 3.3322877883911133, "step": 1566, "token_acc": 0.2671360414082261 }, { "epoch": 0.9187921430665494, "grad_norm": 1.8745183782424188, "learning_rate": 0.000183704572098476, "loss": 3.4052467346191406, "step": 1567, "token_acc": 0.25619687218573595 }, { "epoch": 0.9193784813837584, "grad_norm": 1.1478874787274114, "learning_rate": 0.00018382180539273154, "loss": 3.348512649536133, "step": 1568, "token_acc": 0.2656653861424669 }, { "epoch": 0.9199648197009674, "grad_norm": 1.3820644524817856, "learning_rate": 0.00018393903868698711, "loss": 3.327195167541504, "step": 1569, "token_acc": 0.2666618390221951 }, { "epoch": 0.9205511580181764, "grad_norm": 1.0844950939908533, "learning_rate": 0.0001840562719812427, "loss": 3.343712091445923, "step": 1570, "token_acc": 0.2670703929511573 }, { "epoch": 0.9211374963353856, "grad_norm": 1.4791056070916166, "learning_rate": 0.00018417350527549827, "loss": 3.3564248085021973, "step": 1571, "token_acc": 0.263818210393397 }, { "epoch": 0.9217238346525946, "grad_norm": 1.0240460325274745, "learning_rate": 0.00018429073856975382, "loss": 3.361377477645874, "step": 1572, "token_acc": 0.263559146354769 }, { "epoch": 0.9223101729698036, "grad_norm": 1.7780360415280625, "learning_rate": 0.00018440797186400937, "loss": 3.3474106788635254, "step": 1573, "token_acc": 0.26460279598935516 }, { "epoch": 0.9228965112870126, "grad_norm": 1.2707892836571437, "learning_rate": 0.00018452520515826494, "loss": 3.3712053298950195, "step": 1574, "token_acc": 0.2607989121897075 }, { "epoch": 0.9234828496042217, "grad_norm": 1.3658681696317136, "learning_rate": 0.00018464243845252052, "loss": 3.3270702362060547, "step": 1575, "token_acc": 0.26739641964942834 }, { "epoch": 0.9240691879214307, "grad_norm": 1.1444399495291406, "learning_rate": 0.0001847596717467761, "loss": 3.358936071395874, "step": 1576, "token_acc": 0.26351826678444296 }, { "epoch": 0.9246555262386397, "grad_norm": 1.7313194132063021, "learning_rate": 0.00018487690504103164, "loss": 3.3354296684265137, "step": 1577, "token_acc": 0.26686180890037264 }, { "epoch": 0.9252418645558487, "grad_norm": 0.9809863176003378, "learning_rate": 0.00018499413833528722, "loss": 3.4418704509735107, "step": 1578, "token_acc": 0.2533676839741049 }, { "epoch": 0.9258282028730578, "grad_norm": 1.4626773683461787, "learning_rate": 0.0001851113716295428, "loss": 3.3287243843078613, "step": 1579, "token_acc": 0.2658683516410984 }, { "epoch": 0.9264145411902668, "grad_norm": 1.1107742365425293, "learning_rate": 0.00018522860492379837, "loss": 3.3714849948883057, "step": 1580, "token_acc": 0.26082237584088525 }, { "epoch": 0.9270008795074758, "grad_norm": 1.1379131287331465, "learning_rate": 0.00018534583821805395, "loss": 3.3280763626098633, "step": 1581, "token_acc": 0.26735450497238705 }, { "epoch": 0.9275872178246849, "grad_norm": 1.1422660187588143, "learning_rate": 0.0001854630715123095, "loss": 3.3566975593566895, "step": 1582, "token_acc": 0.2637835350171905 }, { "epoch": 0.9281735561418939, "grad_norm": 1.667148809225169, "learning_rate": 0.00018558030480656507, "loss": 3.348209857940674, "step": 1583, "token_acc": 0.26456177855970214 }, { "epoch": 0.9287598944591029, "grad_norm": 1.0589916525400245, "learning_rate": 0.00018569753810082065, "loss": 3.3752589225769043, "step": 1584, "token_acc": 0.26295822252197704 }, { "epoch": 0.9293462327763119, "grad_norm": 1.5126729922013449, "learning_rate": 0.00018581477139507623, "loss": 3.3634002208709717, "step": 1585, "token_acc": 0.2607700258397933 }, { "epoch": 0.929932571093521, "grad_norm": 1.0568507383446433, "learning_rate": 0.00018593200468933178, "loss": 3.3675577640533447, "step": 1586, "token_acc": 0.2624037973977843 }, { "epoch": 0.93051890941073, "grad_norm": 1.7091785296659054, "learning_rate": 0.00018604923798358735, "loss": 3.3551533222198486, "step": 1587, "token_acc": 0.26344831526029333 }, { "epoch": 0.931105247727939, "grad_norm": 0.8770975885347173, "learning_rate": 0.00018616647127784293, "loss": 3.40421986579895, "step": 1588, "token_acc": 0.2572160555444066 }, { "epoch": 0.931691586045148, "grad_norm": 1.3252112943791836, "learning_rate": 0.0001862837045720985, "loss": 3.374990463256836, "step": 1589, "token_acc": 0.26150502451369695 }, { "epoch": 0.9322779243623571, "grad_norm": 1.1006264437220463, "learning_rate": 0.00018640093786635405, "loss": 3.3426198959350586, "step": 1590, "token_acc": 0.26584830068414994 }, { "epoch": 0.9328642626795661, "grad_norm": 1.298907876748038, "learning_rate": 0.0001865181711606096, "loss": 3.3545188903808594, "step": 1591, "token_acc": 0.26420038535645474 }, { "epoch": 0.9334506009967751, "grad_norm": 1.1254981141043234, "learning_rate": 0.00018663540445486518, "loss": 3.3663008213043213, "step": 1592, "token_acc": 0.2600399224246426 }, { "epoch": 0.9340369393139841, "grad_norm": 0.9673224135870939, "learning_rate": 0.00018675263774912076, "loss": 3.329442024230957, "step": 1593, "token_acc": 0.26830519442939144 }, { "epoch": 0.9346232776311932, "grad_norm": 1.2807819401048186, "learning_rate": 0.00018686987104337633, "loss": 3.3085124492645264, "step": 1594, "token_acc": 0.26717921147902635 }, { "epoch": 0.9352096159484022, "grad_norm": 1.536074091228922, "learning_rate": 0.00018698710433763188, "loss": 3.3698372840881348, "step": 1595, "token_acc": 0.2624507783912302 }, { "epoch": 0.9357959542656112, "grad_norm": 0.9167595605290084, "learning_rate": 0.00018710433763188746, "loss": 3.299405097961426, "step": 1596, "token_acc": 0.2704570749056434 }, { "epoch": 0.9363822925828202, "grad_norm": 1.3910912934898383, "learning_rate": 0.00018722157092614303, "loss": 3.395594596862793, "step": 1597, "token_acc": 0.25920034359614724 }, { "epoch": 0.9369686309000294, "grad_norm": 1.0860076009830906, "learning_rate": 0.0001873388042203986, "loss": 3.328111171722412, "step": 1598, "token_acc": 0.26592719458402675 }, { "epoch": 0.9375549692172384, "grad_norm": 1.0385921769905857, "learning_rate": 0.00018745603751465416, "loss": 3.3274950981140137, "step": 1599, "token_acc": 0.26740883122430453 }, { "epoch": 0.9381413075344474, "grad_norm": 1.2096505355335772, "learning_rate": 0.00018757327080890974, "loss": 3.3526041507720947, "step": 1600, "token_acc": 0.26282343576619055 }, { "epoch": 0.9387276458516564, "grad_norm": 1.7862437778363993, "learning_rate": 0.0001876905041031653, "loss": 3.371318817138672, "step": 1601, "token_acc": 0.26098816986804657 }, { "epoch": 0.9393139841688655, "grad_norm": 0.9278779931498861, "learning_rate": 0.0001878077373974209, "loss": 3.3075666427612305, "step": 1602, "token_acc": 0.2711867494651585 }, { "epoch": 0.9399003224860745, "grad_norm": 1.3853507347174254, "learning_rate": 0.00018792497069167644, "loss": 3.386536121368408, "step": 1603, "token_acc": 0.2577131342005218 }, { "epoch": 0.9404866608032835, "grad_norm": 1.2455254804914728, "learning_rate": 0.00018804220398593201, "loss": 3.3553366661071777, "step": 1604, "token_acc": 0.26208293829629725 }, { "epoch": 0.9410729991204925, "grad_norm": 1.1034562345082979, "learning_rate": 0.0001881594372801876, "loss": 3.4065918922424316, "step": 1605, "token_acc": 0.2567852023170673 }, { "epoch": 0.9416593374377016, "grad_norm": 1.7314831405483755, "learning_rate": 0.00018827667057444317, "loss": 3.404006004333496, "step": 1606, "token_acc": 0.2566374214229216 }, { "epoch": 0.9422456757549106, "grad_norm": 0.9528185692483703, "learning_rate": 0.00018839390386869872, "loss": 3.3299367427825928, "step": 1607, "token_acc": 0.2652288450898681 }, { "epoch": 0.9428320140721196, "grad_norm": 1.6403223377372316, "learning_rate": 0.0001885111371629543, "loss": 3.364982843399048, "step": 1608, "token_acc": 0.26326353771937766 }, { "epoch": 0.9434183523893287, "grad_norm": 1.0774998417516244, "learning_rate": 0.00018862837045720984, "loss": 3.382481813430786, "step": 1609, "token_acc": 0.2604807886000413 }, { "epoch": 0.9440046907065377, "grad_norm": 1.2288036718456758, "learning_rate": 0.00018874560375146542, "loss": 3.4060754776000977, "step": 1610, "token_acc": 0.25696048285763134 }, { "epoch": 0.9445910290237467, "grad_norm": 1.2183498809049207, "learning_rate": 0.000188862837045721, "loss": 3.350726842880249, "step": 1611, "token_acc": 0.26291805881769165 }, { "epoch": 0.9451773673409557, "grad_norm": 1.2340283636660205, "learning_rate": 0.00018898007033997654, "loss": 3.3825385570526123, "step": 1612, "token_acc": 0.26280442628044265 }, { "epoch": 0.9457637056581648, "grad_norm": 1.4681438532432032, "learning_rate": 0.00018909730363423212, "loss": 3.3893299102783203, "step": 1613, "token_acc": 0.2599766233697703 }, { "epoch": 0.9463500439753738, "grad_norm": 0.9809257475169157, "learning_rate": 0.0001892145369284877, "loss": 3.3195793628692627, "step": 1614, "token_acc": 0.2687134502923977 }, { "epoch": 0.9469363822925828, "grad_norm": 1.080422657307698, "learning_rate": 0.00018933177022274327, "loss": 3.3488292694091797, "step": 1615, "token_acc": 0.26532750725639975 }, { "epoch": 0.9475227206097918, "grad_norm": 1.0009022253200996, "learning_rate": 0.00018944900351699885, "loss": 3.341477870941162, "step": 1616, "token_acc": 0.2667368677920066 }, { "epoch": 0.9481090589270009, "grad_norm": 1.15274710216785, "learning_rate": 0.0001895662368112544, "loss": 3.3550496101379395, "step": 1617, "token_acc": 0.26292694117521526 }, { "epoch": 0.9486953972442099, "grad_norm": 1.5075813308380936, "learning_rate": 0.00018968347010550997, "loss": 3.3716845512390137, "step": 1618, "token_acc": 0.26060553414618737 }, { "epoch": 0.9492817355614189, "grad_norm": 1.2939910636519527, "learning_rate": 0.00018980070339976555, "loss": 3.3509111404418945, "step": 1619, "token_acc": 0.26327204522639613 }, { "epoch": 0.9498680738786279, "grad_norm": 1.1685680176518416, "learning_rate": 0.00018991793669402113, "loss": 3.3880279064178467, "step": 1620, "token_acc": 0.25947568727898707 }, { "epoch": 0.950454412195837, "grad_norm": 1.1646410392004614, "learning_rate": 0.00019003516998827668, "loss": 3.379462718963623, "step": 1621, "token_acc": 0.25987230167435893 }, { "epoch": 0.951040750513046, "grad_norm": 1.1853606258623022, "learning_rate": 0.00019015240328253225, "loss": 3.3054118156433105, "step": 1622, "token_acc": 0.26873828734523614 }, { "epoch": 0.951627088830255, "grad_norm": 1.1596618641034058, "learning_rate": 0.00019026963657678783, "loss": 3.3406171798706055, "step": 1623, "token_acc": 0.2640560872859681 }, { "epoch": 0.952213427147464, "grad_norm": 1.0399427731744997, "learning_rate": 0.0001903868698710434, "loss": 3.3570809364318848, "step": 1624, "token_acc": 0.26354488550827987 }, { "epoch": 0.9527997654646732, "grad_norm": 1.3145697144554607, "learning_rate": 0.00019050410316529895, "loss": 3.3618650436401367, "step": 1625, "token_acc": 0.2605064394938216 }, { "epoch": 0.9533861037818822, "grad_norm": 1.234950117312448, "learning_rate": 0.00019062133645955453, "loss": 3.333404541015625, "step": 1626, "token_acc": 0.2664859959286409 }, { "epoch": 0.9539724420990912, "grad_norm": 0.7601010073161639, "learning_rate": 0.00019073856975381008, "loss": 3.2304720878601074, "step": 1627, "token_acc": 0.2797544757569719 }, { "epoch": 0.9545587804163002, "grad_norm": 1.0934566867466122, "learning_rate": 0.00019085580304806566, "loss": 3.337761163711548, "step": 1628, "token_acc": 0.2670156927712458 }, { "epoch": 0.9551451187335093, "grad_norm": 1.3532055534212197, "learning_rate": 0.00019097303634232123, "loss": 3.3167316913604736, "step": 1629, "token_acc": 0.268028252558853 }, { "epoch": 0.9557314570507183, "grad_norm": 0.9863954009406589, "learning_rate": 0.00019109026963657678, "loss": 3.370453357696533, "step": 1630, "token_acc": 0.26140677374670884 }, { "epoch": 0.9563177953679273, "grad_norm": 1.083660466001697, "learning_rate": 0.00019120750293083236, "loss": 3.321348190307617, "step": 1631, "token_acc": 0.26936538699218204 }, { "epoch": 0.9569041336851363, "grad_norm": 1.025468990014639, "learning_rate": 0.00019132473622508793, "loss": 3.3492209911346436, "step": 1632, "token_acc": 0.26293236022416266 }, { "epoch": 0.9574904720023454, "grad_norm": 1.3090003190742197, "learning_rate": 0.0001914419695193435, "loss": 3.3504490852355957, "step": 1633, "token_acc": 0.26397720093024507 }, { "epoch": 0.9580768103195544, "grad_norm": 1.0616255883973102, "learning_rate": 0.00019155920281359906, "loss": 3.3855478763580322, "step": 1634, "token_acc": 0.2592721395986335 }, { "epoch": 0.9586631486367634, "grad_norm": 1.268569969842506, "learning_rate": 0.00019167643610785463, "loss": 3.3541502952575684, "step": 1635, "token_acc": 0.26462931404016854 }, { "epoch": 0.9592494869539725, "grad_norm": 0.9399033412577804, "learning_rate": 0.0001917936694021102, "loss": 3.3510520458221436, "step": 1636, "token_acc": 0.26333195104076196 }, { "epoch": 0.9598358252711815, "grad_norm": 1.3434907248263739, "learning_rate": 0.0001919109026963658, "loss": 3.329986095428467, "step": 1637, "token_acc": 0.26799489666829956 }, { "epoch": 0.9604221635883905, "grad_norm": 0.9557131361184009, "learning_rate": 0.00019202813599062134, "loss": 3.313842296600342, "step": 1638, "token_acc": 0.2673025062213013 }, { "epoch": 0.9610085019055995, "grad_norm": 1.3746672616070599, "learning_rate": 0.0001921453692848769, "loss": 3.3858346939086914, "step": 1639, "token_acc": 0.26089356096697436 }, { "epoch": 0.9615948402228086, "grad_norm": 1.2466323604766458, "learning_rate": 0.0001922626025791325, "loss": 3.3362507820129395, "step": 1640, "token_acc": 0.2641340981062967 }, { "epoch": 0.9621811785400176, "grad_norm": 1.1661886070180711, "learning_rate": 0.00019237983587338807, "loss": 3.34262752532959, "step": 1641, "token_acc": 0.2638271435191551 }, { "epoch": 0.9627675168572266, "grad_norm": 1.0763985551493827, "learning_rate": 0.00019249706916764364, "loss": 3.38920259475708, "step": 1642, "token_acc": 0.2574238146236978 }, { "epoch": 0.9633538551744356, "grad_norm": 1.4672173336155059, "learning_rate": 0.0001926143024618992, "loss": 3.4152050018310547, "step": 1643, "token_acc": 0.2557633730982246 }, { "epoch": 0.9639401934916447, "grad_norm": 1.0097615490274985, "learning_rate": 0.00019273153575615474, "loss": 3.3637962341308594, "step": 1644, "token_acc": 0.2617014567698808 }, { "epoch": 0.9645265318088537, "grad_norm": 1.4088918757593316, "learning_rate": 0.00019284876905041032, "loss": 3.3202617168426514, "step": 1645, "token_acc": 0.26727753405137655 }, { "epoch": 0.9651128701260627, "grad_norm": 0.8159601394987848, "learning_rate": 0.0001929660023446659, "loss": 3.355109691619873, "step": 1646, "token_acc": 0.2618737966175746 }, { "epoch": 0.9656992084432717, "grad_norm": 1.3380815813123037, "learning_rate": 0.00019308323563892147, "loss": 3.286242723464966, "step": 1647, "token_acc": 0.2713701593331223 }, { "epoch": 0.9662855467604808, "grad_norm": 1.069492747115931, "learning_rate": 0.00019320046893317702, "loss": 3.333033323287964, "step": 1648, "token_acc": 0.2669163912730537 }, { "epoch": 0.9668718850776898, "grad_norm": 1.409310148376673, "learning_rate": 0.0001933177022274326, "loss": 3.369143009185791, "step": 1649, "token_acc": 0.2602760636990466 }, { "epoch": 0.9674582233948988, "grad_norm": 0.8765148746893767, "learning_rate": 0.00019343493552168817, "loss": 3.3058595657348633, "step": 1650, "token_acc": 0.26922890280001943 }, { "epoch": 0.9680445617121078, "grad_norm": 1.2416515988593004, "learning_rate": 0.00019355216881594375, "loss": 3.374819278717041, "step": 1651, "token_acc": 0.260814549785172 }, { "epoch": 0.968630900029317, "grad_norm": 1.193887346640774, "learning_rate": 0.0001936694021101993, "loss": 3.3142249584198, "step": 1652, "token_acc": 0.2698347298974594 }, { "epoch": 0.969217238346526, "grad_norm": 1.301001313648477, "learning_rate": 0.00019378663540445487, "loss": 3.3638806343078613, "step": 1653, "token_acc": 0.26062861295798373 }, { "epoch": 0.969803576663735, "grad_norm": 1.2074897494380648, "learning_rate": 0.00019390386869871045, "loss": 3.3101613521575928, "step": 1654, "token_acc": 0.2697356348872009 }, { "epoch": 0.970389914980944, "grad_norm": 1.188749264177797, "learning_rate": 0.00019402110199296603, "loss": 3.3214831352233887, "step": 1655, "token_acc": 0.2668062699061004 }, { "epoch": 0.9709762532981531, "grad_norm": 1.1283344902120356, "learning_rate": 0.00019413833528722157, "loss": 3.32655930519104, "step": 1656, "token_acc": 0.26514057794140405 }, { "epoch": 0.9715625916153621, "grad_norm": 1.1359896286023063, "learning_rate": 0.00019425556858147715, "loss": 3.3051419258117676, "step": 1657, "token_acc": 0.2687050172108063 }, { "epoch": 0.9721489299325711, "grad_norm": 1.0982522470553582, "learning_rate": 0.00019437280187573273, "loss": 3.3482327461242676, "step": 1658, "token_acc": 0.2631812769889645 }, { "epoch": 0.9727352682497801, "grad_norm": 1.3726150948191387, "learning_rate": 0.0001944900351699883, "loss": 3.3184661865234375, "step": 1659, "token_acc": 0.2679870865960748 }, { "epoch": 0.9733216065669892, "grad_norm": 0.82247418854886, "learning_rate": 0.00019460726846424385, "loss": 3.3374791145324707, "step": 1660, "token_acc": 0.2626613638465706 }, { "epoch": 0.9739079448841982, "grad_norm": 1.1078922024567954, "learning_rate": 0.00019472450175849943, "loss": 3.3170008659362793, "step": 1661, "token_acc": 0.26823623554516496 }, { "epoch": 0.9744942832014072, "grad_norm": 1.4095959082789207, "learning_rate": 0.00019484173505275498, "loss": 3.3128089904785156, "step": 1662, "token_acc": 0.2679425069351745 }, { "epoch": 0.9750806215186162, "grad_norm": 1.124856845808925, "learning_rate": 0.00019495896834701055, "loss": 3.341902732849121, "step": 1663, "token_acc": 0.2643564710876091 }, { "epoch": 0.9756669598358253, "grad_norm": 1.254935463989328, "learning_rate": 0.00019507620164126613, "loss": 3.384016513824463, "step": 1664, "token_acc": 0.26044897038053194 }, { "epoch": 0.9762532981530343, "grad_norm": 0.9877355941561444, "learning_rate": 0.00019519343493552168, "loss": 3.329348564147949, "step": 1665, "token_acc": 0.2653352833827317 }, { "epoch": 0.9768396364702433, "grad_norm": 1.2405044810523733, "learning_rate": 0.00019531066822977726, "loss": 3.363208770751953, "step": 1666, "token_acc": 0.2611990303444244 }, { "epoch": 0.9774259747874524, "grad_norm": 1.3913658890510827, "learning_rate": 0.00019542790152403283, "loss": 3.3351492881774902, "step": 1667, "token_acc": 0.26598281772008886 }, { "epoch": 0.9780123131046614, "grad_norm": 1.0463702076469992, "learning_rate": 0.0001955451348182884, "loss": 3.325552463531494, "step": 1668, "token_acc": 0.26524881838646086 }, { "epoch": 0.9785986514218704, "grad_norm": 1.3362195386632891, "learning_rate": 0.00019566236811254396, "loss": 3.3576231002807617, "step": 1669, "token_acc": 0.26250827864410864 }, { "epoch": 0.9791849897390794, "grad_norm": 1.2372034602087982, "learning_rate": 0.00019577960140679953, "loss": 3.3630270957946777, "step": 1670, "token_acc": 0.261734803136759 }, { "epoch": 0.9797713280562885, "grad_norm": 1.0897028163957025, "learning_rate": 0.0001958968347010551, "loss": 3.269686222076416, "step": 1671, "token_acc": 0.27446752437768035 }, { "epoch": 0.9803576663734975, "grad_norm": 0.8559570187978328, "learning_rate": 0.0001960140679953107, "loss": 3.369194269180298, "step": 1672, "token_acc": 0.25938527753984114 }, { "epoch": 0.9809440046907065, "grad_norm": 1.1928655157623724, "learning_rate": 0.00019613130128956624, "loss": 3.3436310291290283, "step": 1673, "token_acc": 0.26232732332187825 }, { "epoch": 0.9815303430079155, "grad_norm": 1.110231146103432, "learning_rate": 0.0001962485345838218, "loss": 3.3459889888763428, "step": 1674, "token_acc": 0.2655485240258312 }, { "epoch": 0.9821166813251246, "grad_norm": 1.139243369163609, "learning_rate": 0.0001963657678780774, "loss": 3.3928275108337402, "step": 1675, "token_acc": 0.2585317398404427 }, { "epoch": 0.9827030196423336, "grad_norm": 1.3943270513203412, "learning_rate": 0.00019648300117233296, "loss": 3.3056437969207764, "step": 1676, "token_acc": 0.2691272676063551 }, { "epoch": 0.9832893579595426, "grad_norm": 0.8656601760412752, "learning_rate": 0.00019660023446658854, "loss": 3.3615996837615967, "step": 1677, "token_acc": 0.2604645273325856 }, { "epoch": 0.9838756962767516, "grad_norm": 1.0567517919130265, "learning_rate": 0.0001967174677608441, "loss": 3.355379104614258, "step": 1678, "token_acc": 0.2640887747217391 }, { "epoch": 0.9844620345939608, "grad_norm": 1.0912951100232295, "learning_rate": 0.00019683470105509967, "loss": 3.3363449573516846, "step": 1679, "token_acc": 0.2645881705565771 }, { "epoch": 0.9850483729111698, "grad_norm": 1.0629044257606912, "learning_rate": 0.00019695193434935522, "loss": 3.3193612098693848, "step": 1680, "token_acc": 0.26749073252232025 }, { "epoch": 0.9856347112283788, "grad_norm": 1.2775943008961042, "learning_rate": 0.0001970691676436108, "loss": 3.3616628646850586, "step": 1681, "token_acc": 0.26352890950894214 }, { "epoch": 0.9862210495455878, "grad_norm": 1.0545305769637172, "learning_rate": 0.00019718640093786637, "loss": 3.313585042953491, "step": 1682, "token_acc": 0.26803396115113975 }, { "epoch": 0.9868073878627969, "grad_norm": 1.4977277396578403, "learning_rate": 0.00019730363423212192, "loss": 3.3703408241271973, "step": 1683, "token_acc": 0.2614033910136795 }, { "epoch": 0.9873937261800059, "grad_norm": 0.7038650037196329, "learning_rate": 0.0001974208675263775, "loss": 3.3422064781188965, "step": 1684, "token_acc": 0.2643694724138552 }, { "epoch": 0.9879800644972149, "grad_norm": 0.9994849745233718, "learning_rate": 0.00019753810082063307, "loss": 3.3329501152038574, "step": 1685, "token_acc": 0.2643449852989219 }, { "epoch": 0.9885664028144239, "grad_norm": 1.240826872987356, "learning_rate": 0.00019765533411488865, "loss": 3.3225011825561523, "step": 1686, "token_acc": 0.2679423794747557 }, { "epoch": 0.989152741131633, "grad_norm": 0.7525156425662606, "learning_rate": 0.0001977725674091442, "loss": 3.2979655265808105, "step": 1687, "token_acc": 0.2704380875334561 }, { "epoch": 0.989739079448842, "grad_norm": 1.129174319725173, "learning_rate": 0.00019788980070339977, "loss": 3.35269832611084, "step": 1688, "token_acc": 0.26368715681762234 }, { "epoch": 0.990325417766051, "grad_norm": 1.0608179493864747, "learning_rate": 0.00019800703399765535, "loss": 3.3505702018737793, "step": 1689, "token_acc": 0.26353862997797095 }, { "epoch": 0.99091175608326, "grad_norm": 1.2304127161934297, "learning_rate": 0.00019812426729191092, "loss": 3.3202266693115234, "step": 1690, "token_acc": 0.2672586436934661 }, { "epoch": 0.9914980944004691, "grad_norm": 1.3773044486110175, "learning_rate": 0.00019824150058616647, "loss": 3.3369665145874023, "step": 1691, "token_acc": 0.2653060689611792 }, { "epoch": 0.9920844327176781, "grad_norm": 1.439023029743615, "learning_rate": 0.00019835873388042205, "loss": 3.3842334747314453, "step": 1692, "token_acc": 0.259608531169472 }, { "epoch": 0.9926707710348871, "grad_norm": 0.6211096328699252, "learning_rate": 0.00019847596717467763, "loss": 3.3407256603240967, "step": 1693, "token_acc": 0.26415282302576665 }, { "epoch": 0.9932571093520962, "grad_norm": 1.1934650957874657, "learning_rate": 0.0001985932004689332, "loss": 3.346158981323242, "step": 1694, "token_acc": 0.2637598514346047 }, { "epoch": 0.9938434476693052, "grad_norm": 1.2851550551049356, "learning_rate": 0.00019871043376318875, "loss": 3.325650691986084, "step": 1695, "token_acc": 0.26750293137378156 }, { "epoch": 0.9944297859865142, "grad_norm": 1.1508433553141901, "learning_rate": 0.00019882766705744433, "loss": 3.2948293685913086, "step": 1696, "token_acc": 0.27120840789266654 }, { "epoch": 0.9950161243037232, "grad_norm": 0.925348185731345, "learning_rate": 0.0001989449003516999, "loss": 3.3334450721740723, "step": 1697, "token_acc": 0.2650206492295395 }, { "epoch": 0.9956024626209323, "grad_norm": 1.0793354554960248, "learning_rate": 0.00019906213364595545, "loss": 3.3594651222229004, "step": 1698, "token_acc": 0.2616705998862176 }, { "epoch": 0.9961888009381413, "grad_norm": 1.1589122134078231, "learning_rate": 0.00019917936694021103, "loss": 3.356414318084717, "step": 1699, "token_acc": 0.26265433904851077 }, { "epoch": 0.9967751392553503, "grad_norm": 0.8063315224940307, "learning_rate": 0.00019929660023446658, "loss": 3.2872724533081055, "step": 1700, "token_acc": 0.27135269402531514 }, { "epoch": 0.9973614775725593, "grad_norm": 0.9275315135898576, "learning_rate": 0.00019941383352872216, "loss": 3.3003945350646973, "step": 1701, "token_acc": 0.2693471513356969 }, { "epoch": 0.9979478158897684, "grad_norm": 1.3977515363576045, "learning_rate": 0.00019953106682297773, "loss": 3.3070321083068848, "step": 1702, "token_acc": 0.2674597083653108 }, { "epoch": 0.9985341542069774, "grad_norm": 0.8099153127824107, "learning_rate": 0.0001996483001172333, "loss": 3.3445329666137695, "step": 1703, "token_acc": 0.26612909436781346 }, { "epoch": 0.9991204925241864, "grad_norm": 1.004579303177753, "learning_rate": 0.00019976553341148886, "loss": 3.2992019653320312, "step": 1704, "token_acc": 0.27126144178848344 }, { "epoch": 0.9997068308413954, "grad_norm": 1.1612980474686398, "learning_rate": 0.00019988276670574443, "loss": 3.3426826000213623, "step": 1705, "token_acc": 0.26442122186495176 }, { "epoch": 1.0, "grad_norm": 1.2283793216853351, "learning_rate": 0.0002, "loss": 3.2606582641601562, "step": 1706, "token_acc": 0.27698120124646125 }, { "epoch": 1.0, "eval_loss": 3.3105568885803223, "eval_runtime": 16.4318, "eval_samples_per_second": 15.58, "eval_steps_per_second": 1.947, "eval_token_acc": 0.26746655304875117, "step": 1706 }, { "epoch": 1.0005863383172091, "grad_norm": 1.556494600009794, "learning_rate": 0.0001999999995303174, "loss": 3.328307628631592, "step": 1707, "token_acc": 0.26595388183638846 }, { "epoch": 1.001172676634418, "grad_norm": 0.8523857318890425, "learning_rate": 0.0001999999981212695, "loss": 3.280609130859375, "step": 1708, "token_acc": 0.27219681858250355 }, { "epoch": 1.0017590149516271, "grad_norm": 1.0527442206149304, "learning_rate": 0.00019999999577285642, "loss": 3.310378074645996, "step": 1709, "token_acc": 0.2680256480687648 }, { "epoch": 1.0023453532688362, "grad_norm": 1.4864315522560045, "learning_rate": 0.00019999999248507814, "loss": 3.2888007164001465, "step": 1710, "token_acc": 0.27098477695009415 }, { "epoch": 1.0029316915860451, "grad_norm": 0.9322965113037861, "learning_rate": 0.00019999998825793463, "loss": 3.3101136684417725, "step": 1711, "token_acc": 0.26868223099627375 }, { "epoch": 1.0035180299032542, "grad_norm": 1.2140821221169382, "learning_rate": 0.000199999983091426, "loss": 3.400871753692627, "step": 1712, "token_acc": 0.258787012987013 }, { "epoch": 1.0041043682204631, "grad_norm": 1.009684052655398, "learning_rate": 0.0001999999769855523, "loss": 3.3012166023254395, "step": 1713, "token_acc": 0.2689036216709844 }, { "epoch": 1.0046907065376722, "grad_norm": 1.202943246939225, "learning_rate": 0.00019999996994031353, "loss": 3.296599864959717, "step": 1714, "token_acc": 0.2679038800635739 }, { "epoch": 1.0052770448548813, "grad_norm": 0.9546427191555766, "learning_rate": 0.00019999996195570985, "loss": 3.2607038021087646, "step": 1715, "token_acc": 0.27512077985191963 }, { "epoch": 1.0058633831720902, "grad_norm": 1.2335410018132984, "learning_rate": 0.00019999995303174125, "loss": 3.291771650314331, "step": 1716, "token_acc": 0.27041116645418095 }, { "epoch": 1.0064497214892993, "grad_norm": 1.0921421651203864, "learning_rate": 0.00019999994316840782, "loss": 3.312577962875366, "step": 1717, "token_acc": 0.26718313356119155 }, { "epoch": 1.0070360598065085, "grad_norm": 1.3695469400955087, "learning_rate": 0.0001999999323657097, "loss": 3.313555955886841, "step": 1718, "token_acc": 0.2650570854629135 }, { "epoch": 1.0076223981237173, "grad_norm": 1.2628121447009166, "learning_rate": 0.00019999992062364697, "loss": 3.315274238586426, "step": 1719, "token_acc": 0.26700405538482214 }, { "epoch": 1.0082087364409265, "grad_norm": 0.9494902519256, "learning_rate": 0.00019999990794221972, "loss": 3.328627347946167, "step": 1720, "token_acc": 0.2664342695895 }, { "epoch": 1.0087950747581353, "grad_norm": 1.3881544086186914, "learning_rate": 0.0001999998943214281, "loss": 3.323829174041748, "step": 1721, "token_acc": 0.2675207274672372 }, { "epoch": 1.0093814130753445, "grad_norm": 1.1653045499417287, "learning_rate": 0.00019999987976127224, "loss": 3.3238110542297363, "step": 1722, "token_acc": 0.2664469337147566 }, { "epoch": 1.0099677513925536, "grad_norm": 1.2575657364561457, "learning_rate": 0.00019999986426175225, "loss": 3.2585737705230713, "step": 1723, "token_acc": 0.27267261489622663 }, { "epoch": 1.0105540897097625, "grad_norm": 0.9936817246844969, "learning_rate": 0.00019999984782286827, "loss": 3.33901309967041, "step": 1724, "token_acc": 0.2643873417123293 }, { "epoch": 1.0111404280269716, "grad_norm": 1.340730015302387, "learning_rate": 0.00019999983044462048, "loss": 3.309812068939209, "step": 1725, "token_acc": 0.26730696234676476 }, { "epoch": 1.0117267663441807, "grad_norm": 1.1609375856392976, "learning_rate": 0.00019999981212700903, "loss": 3.327427387237549, "step": 1726, "token_acc": 0.26356468645519043 }, { "epoch": 1.0123131046613896, "grad_norm": 1.1536328502356665, "learning_rate": 0.0001999997928700341, "loss": 3.3147873878479004, "step": 1727, "token_acc": 0.264424529776481 }, { "epoch": 1.0128994429785987, "grad_norm": 1.2723917601515504, "learning_rate": 0.00019999977267369588, "loss": 3.351543664932251, "step": 1728, "token_acc": 0.26157961462485024 }, { "epoch": 1.0134857812958076, "grad_norm": 1.2415985145042923, "learning_rate": 0.00019999975153799454, "loss": 3.297794818878174, "step": 1729, "token_acc": 0.268834089803858 }, { "epoch": 1.0140721196130167, "grad_norm": 1.491086041243301, "learning_rate": 0.00019999972946293027, "loss": 3.32405161857605, "step": 1730, "token_acc": 0.26578664402198915 }, { "epoch": 1.0146584579302258, "grad_norm": 0.9843345055647239, "learning_rate": 0.0001999997064485033, "loss": 3.259993076324463, "step": 1731, "token_acc": 0.2712384590244301 }, { "epoch": 1.0152447962474347, "grad_norm": 1.450624321829804, "learning_rate": 0.00019999968249471387, "loss": 3.259441375732422, "step": 1732, "token_acc": 0.2731529558877592 }, { "epoch": 1.0158311345646438, "grad_norm": 0.7765770096713421, "learning_rate": 0.00019999965760156215, "loss": 3.2235169410705566, "step": 1733, "token_acc": 0.2790696458905005 }, { "epoch": 1.016417472881853, "grad_norm": 1.1086717941818662, "learning_rate": 0.00019999963176904837, "loss": 3.3074679374694824, "step": 1734, "token_acc": 0.2657508289909995 }, { "epoch": 1.0170038111990618, "grad_norm": 0.8405811975346262, "learning_rate": 0.00019999960499717282, "loss": 3.3117685317993164, "step": 1735, "token_acc": 0.26819336840551106 }, { "epoch": 1.017590149516271, "grad_norm": 0.7431296755526309, "learning_rate": 0.00019999957728593574, "loss": 3.2740464210510254, "step": 1736, "token_acc": 0.272194382292572 }, { "epoch": 1.01817648783348, "grad_norm": 1.0002176285988111, "learning_rate": 0.00019999954863533738, "loss": 3.332535743713379, "step": 1737, "token_acc": 0.26577302609376474 }, { "epoch": 1.018762826150689, "grad_norm": 1.2184169161517018, "learning_rate": 0.00019999951904537802, "loss": 3.2924976348876953, "step": 1738, "token_acc": 0.26945296707130617 }, { "epoch": 1.019349164467898, "grad_norm": 0.9445123407964942, "learning_rate": 0.00019999948851605792, "loss": 3.255767583847046, "step": 1739, "token_acc": 0.2741988044308533 }, { "epoch": 1.019935502785107, "grad_norm": 1.1780655715075063, "learning_rate": 0.0001999994570473774, "loss": 3.308506965637207, "step": 1740, "token_acc": 0.26806187491382694 }, { "epoch": 1.020521841102316, "grad_norm": 1.194753028822761, "learning_rate": 0.0001999994246393367, "loss": 3.284492015838623, "step": 1741, "token_acc": 0.2715702479338843 }, { "epoch": 1.0211081794195251, "grad_norm": 1.3119055079428288, "learning_rate": 0.00019999939129193616, "loss": 3.300604820251465, "step": 1742, "token_acc": 0.27166271324997654 }, { "epoch": 1.021694517736734, "grad_norm": 1.0244102968927506, "learning_rate": 0.00019999935700517612, "loss": 3.3619349002838135, "step": 1743, "token_acc": 0.26027573111536045 }, { "epoch": 1.0222808560539431, "grad_norm": 1.1205548988389924, "learning_rate": 0.00019999932177905682, "loss": 3.287616729736328, "step": 1744, "token_acc": 0.2713776186858607 }, { "epoch": 1.0228671943711523, "grad_norm": 0.906680758714488, "learning_rate": 0.0001999992856135787, "loss": 3.3427040576934814, "step": 1745, "token_acc": 0.2634611589476199 }, { "epoch": 1.0234535326883611, "grad_norm": 1.0278739172048916, "learning_rate": 0.000199999248508742, "loss": 3.2839174270629883, "step": 1746, "token_acc": 0.2727939603264358 }, { "epoch": 1.0240398710055703, "grad_norm": 1.0618976481102271, "learning_rate": 0.00019999921046454712, "loss": 3.272448778152466, "step": 1747, "token_acc": 0.272406176332544 }, { "epoch": 1.0246262093227791, "grad_norm": 1.1981723135672615, "learning_rate": 0.00019999917148099444, "loss": 3.2798118591308594, "step": 1748, "token_acc": 0.2707797500811168 }, { "epoch": 1.0252125476399883, "grad_norm": 0.8220649631902591, "learning_rate": 0.00019999913155808424, "loss": 3.3087692260742188, "step": 1749, "token_acc": 0.2675946817596594 }, { "epoch": 1.0257988859571974, "grad_norm": 0.7858420635565981, "learning_rate": 0.00019999909069581698, "loss": 3.2957398891448975, "step": 1750, "token_acc": 0.2678634647537192 }, { "epoch": 1.0263852242744063, "grad_norm": 1.0644351666311864, "learning_rate": 0.000199999048894193, "loss": 3.276577949523926, "step": 1751, "token_acc": 0.2719083224269991 }, { "epoch": 1.0269715625916154, "grad_norm": 1.0826172294506995, "learning_rate": 0.00019999900615321275, "loss": 3.2733945846557617, "step": 1752, "token_acc": 0.27152142087227915 }, { "epoch": 1.0275579009088245, "grad_norm": 0.910384673197123, "learning_rate": 0.00019999896247287655, "loss": 3.290496349334717, "step": 1753, "token_acc": 0.26887546914838023 }, { "epoch": 1.0281442392260334, "grad_norm": 1.2049532798048312, "learning_rate": 0.00019999891785318485, "loss": 3.284515142440796, "step": 1754, "token_acc": 0.2718623126829334 }, { "epoch": 1.0287305775432425, "grad_norm": 1.3463184648379212, "learning_rate": 0.00019999887229413808, "loss": 3.3190548419952393, "step": 1755, "token_acc": 0.26470618212580327 }, { "epoch": 1.0293169158604514, "grad_norm": 0.9365315302293586, "learning_rate": 0.00019999882579573662, "loss": 3.2841455936431885, "step": 1756, "token_acc": 0.2714503637956142 }, { "epoch": 1.0299032541776605, "grad_norm": 0.8056751332984439, "learning_rate": 0.00019999877835798097, "loss": 3.294391632080078, "step": 1757, "token_acc": 0.269180780278424 }, { "epoch": 1.0304895924948696, "grad_norm": 0.807073807025428, "learning_rate": 0.00019999872998087157, "loss": 3.2585020065307617, "step": 1758, "token_acc": 0.2738279981233534 }, { "epoch": 1.0310759308120785, "grad_norm": 0.9188963200611165, "learning_rate": 0.00019999868066440882, "loss": 3.3134231567382812, "step": 1759, "token_acc": 0.2672857108687968 }, { "epoch": 1.0316622691292876, "grad_norm": 1.0913130207182808, "learning_rate": 0.00019999863040859323, "loss": 3.2983882427215576, "step": 1760, "token_acc": 0.267619630223296 }, { "epoch": 1.0322486074464967, "grad_norm": 1.1510614450793855, "learning_rate": 0.00019999857921342522, "loss": 3.3039979934692383, "step": 1761, "token_acc": 0.2680319802876973 }, { "epoch": 1.0328349457637056, "grad_norm": 0.9742543785483267, "learning_rate": 0.00019999852707890537, "loss": 3.297025680541992, "step": 1762, "token_acc": 0.2686513171894284 }, { "epoch": 1.0334212840809147, "grad_norm": 0.975133901184013, "learning_rate": 0.00019999847400503407, "loss": 3.3022875785827637, "step": 1763, "token_acc": 0.2681696764678582 }, { "epoch": 1.0340076223981236, "grad_norm": 1.1341862309020048, "learning_rate": 0.00019999841999181184, "loss": 3.3030343055725098, "step": 1764, "token_acc": 0.2676047242141809 }, { "epoch": 1.0345939607153327, "grad_norm": 1.1853320474359959, "learning_rate": 0.00019999836503923924, "loss": 3.260789155960083, "step": 1765, "token_acc": 0.273162407391992 }, { "epoch": 1.0351802990325418, "grad_norm": 1.0721449050596954, "learning_rate": 0.00019999830914731672, "loss": 3.257016181945801, "step": 1766, "token_acc": 0.27417226188508353 }, { "epoch": 1.0357666373497507, "grad_norm": 0.8758866062121071, "learning_rate": 0.00019999825231604485, "loss": 3.293463706970215, "step": 1767, "token_acc": 0.2697982297340385 }, { "epoch": 1.0363529756669598, "grad_norm": 0.9709334656668901, "learning_rate": 0.00019999819454542415, "loss": 3.3248915672302246, "step": 1768, "token_acc": 0.26673016544960665 }, { "epoch": 1.036939313984169, "grad_norm": 1.158069377903993, "learning_rate": 0.00019999813583545514, "loss": 3.2606422901153564, "step": 1769, "token_acc": 0.27214280169910254 }, { "epoch": 1.0375256523013778, "grad_norm": 0.9039828439206207, "learning_rate": 0.0001999980761861384, "loss": 3.2774105072021484, "step": 1770, "token_acc": 0.27073095704008543 }, { "epoch": 1.038111990618587, "grad_norm": 0.8163781196590087, "learning_rate": 0.00019999801559747452, "loss": 3.2986931800842285, "step": 1771, "token_acc": 0.2703006784472543 }, { "epoch": 1.038698328935796, "grad_norm": 1.0642042112355978, "learning_rate": 0.000199997954069464, "loss": 3.329517364501953, "step": 1772, "token_acc": 0.2645986330401327 }, { "epoch": 1.039284667253005, "grad_norm": 1.0848399924353889, "learning_rate": 0.00019999789160210746, "loss": 3.3166861534118652, "step": 1773, "token_acc": 0.2661930023498191 }, { "epoch": 1.039871005570214, "grad_norm": 0.9107319036886131, "learning_rate": 0.0001999978281954055, "loss": 3.263587236404419, "step": 1774, "token_acc": 0.27257831783802067 }, { "epoch": 1.040457343887423, "grad_norm": 1.1628462014535277, "learning_rate": 0.00019999776384935865, "loss": 3.2985498905181885, "step": 1775, "token_acc": 0.26976071607855084 }, { "epoch": 1.041043682204632, "grad_norm": 1.0262176865398889, "learning_rate": 0.0001999976985639676, "loss": 3.3277931213378906, "step": 1776, "token_acc": 0.26588533180912494 }, { "epoch": 1.0416300205218412, "grad_norm": 0.9108082402598152, "learning_rate": 0.00019999763233923289, "loss": 3.295567512512207, "step": 1777, "token_acc": 0.2681652723673287 }, { "epoch": 1.04221635883905, "grad_norm": 0.7922922566611076, "learning_rate": 0.00019999756517515515, "loss": 3.3071703910827637, "step": 1778, "token_acc": 0.2682451743505674 }, { "epoch": 1.0428026971562592, "grad_norm": 1.018941359511883, "learning_rate": 0.00019999749707173508, "loss": 3.277599334716797, "step": 1779, "token_acc": 0.27027566350238036 }, { "epoch": 1.0433890354734683, "grad_norm": 1.141157542909064, "learning_rate": 0.00019999742802897326, "loss": 3.316535711288452, "step": 1780, "token_acc": 0.26503940615143745 }, { "epoch": 1.0439753737906772, "grad_norm": 0.8583974318986433, "learning_rate": 0.00019999735804687035, "loss": 3.2898850440979004, "step": 1781, "token_acc": 0.2694515670782688 }, { "epoch": 1.0445617121078863, "grad_norm": 0.799766314629756, "learning_rate": 0.000199997287125427, "loss": 3.2240023612976074, "step": 1782, "token_acc": 0.27754897839715503 }, { "epoch": 1.0451480504250952, "grad_norm": 0.7868300315672145, "learning_rate": 0.00019999721526464388, "loss": 3.294559955596924, "step": 1783, "token_acc": 0.27013611583264885 }, { "epoch": 1.0457343887423043, "grad_norm": 0.910621209933701, "learning_rate": 0.00019999714246452167, "loss": 3.249177932739258, "step": 1784, "token_acc": 0.27533909017691893 }, { "epoch": 1.0463207270595134, "grad_norm": 0.9453268505360458, "learning_rate": 0.00019999706872506109, "loss": 3.3015735149383545, "step": 1785, "token_acc": 0.2685105329668182 }, { "epoch": 1.0469070653767223, "grad_norm": 1.0734196248765913, "learning_rate": 0.00019999699404626278, "loss": 3.3147549629211426, "step": 1786, "token_acc": 0.26804633536700084 }, { "epoch": 1.0474934036939314, "grad_norm": 0.8869495141916742, "learning_rate": 0.00019999691842812744, "loss": 3.3309764862060547, "step": 1787, "token_acc": 0.2650002479271981 }, { "epoch": 1.0480797420111405, "grad_norm": 0.7685706828134822, "learning_rate": 0.00019999684187065584, "loss": 3.2902088165283203, "step": 1788, "token_acc": 0.2699603973251964 }, { "epoch": 1.0486660803283494, "grad_norm": 0.9020925336833913, "learning_rate": 0.0001999967643738486, "loss": 3.3249220848083496, "step": 1789, "token_acc": 0.26559681807510116 }, { "epoch": 1.0492524186455585, "grad_norm": 0.858729389464275, "learning_rate": 0.00019999668593770654, "loss": 3.266615390777588, "step": 1790, "token_acc": 0.2732078374609987 }, { "epoch": 1.0498387569627674, "grad_norm": 1.1730061251275634, "learning_rate": 0.00019999660656223038, "loss": 3.3166589736938477, "step": 1791, "token_acc": 0.26601517773931566 }, { "epoch": 1.0504250952799765, "grad_norm": 1.053190266511645, "learning_rate": 0.00019999652624742083, "loss": 3.2984392642974854, "step": 1792, "token_acc": 0.2692320630558596 }, { "epoch": 1.0510114335971856, "grad_norm": 0.7347243315133648, "learning_rate": 0.00019999644499327866, "loss": 3.247314453125, "step": 1793, "token_acc": 0.27509218527623236 }, { "epoch": 1.0515977719143945, "grad_norm": 0.8659809903170016, "learning_rate": 0.00019999636279980463, "loss": 3.308168411254883, "step": 1794, "token_acc": 0.26753872361826386 }, { "epoch": 1.0521841102316036, "grad_norm": 0.6952421235492701, "learning_rate": 0.00019999627966699952, "loss": 3.288480758666992, "step": 1795, "token_acc": 0.2701284869957663 }, { "epoch": 1.0527704485488127, "grad_norm": 0.820818645842747, "learning_rate": 0.00019999619559486412, "loss": 3.269139289855957, "step": 1796, "token_acc": 0.2713431240959616 }, { "epoch": 1.0533567868660216, "grad_norm": 0.8511836269489489, "learning_rate": 0.0001999961105833992, "loss": 3.262439250946045, "step": 1797, "token_acc": 0.27265030078449615 }, { "epoch": 1.0539431251832307, "grad_norm": 0.8689921812019348, "learning_rate": 0.00019999602463260555, "loss": 3.311861515045166, "step": 1798, "token_acc": 0.2675016376260972 }, { "epoch": 1.0545294635004399, "grad_norm": 1.0833239741221916, "learning_rate": 0.00019999593774248405, "loss": 3.2748827934265137, "step": 1799, "token_acc": 0.2728999686389833 }, { "epoch": 1.0551158018176487, "grad_norm": 1.2047799553899454, "learning_rate": 0.0001999958499130354, "loss": 3.268369674682617, "step": 1800, "token_acc": 0.2714039075076445 }, { "epoch": 1.0557021401348579, "grad_norm": 0.703747546598522, "learning_rate": 0.00019999576114426053, "loss": 3.3230302333831787, "step": 1801, "token_acc": 0.26502325605472743 }, { "epoch": 1.0562884784520667, "grad_norm": 0.8702269717656477, "learning_rate": 0.0001999956714361602, "loss": 3.3232264518737793, "step": 1802, "token_acc": 0.26534232662588064 }, { "epoch": 1.0568748167692759, "grad_norm": 1.362888073173297, "learning_rate": 0.00019999558078873531, "loss": 3.3137786388397217, "step": 1803, "token_acc": 0.2652842483969347 }, { "epoch": 1.057461155086485, "grad_norm": 1.0817603877332413, "learning_rate": 0.00019999548920198668, "loss": 3.3073887825012207, "step": 1804, "token_acc": 0.2677067626484421 }, { "epoch": 1.0580474934036939, "grad_norm": 1.008988663572395, "learning_rate": 0.00019999539667591516, "loss": 3.2882063388824463, "step": 1805, "token_acc": 0.2694314283704975 }, { "epoch": 1.058633831720903, "grad_norm": 1.036650086221023, "learning_rate": 0.00019999530321052162, "loss": 3.276179790496826, "step": 1806, "token_acc": 0.2710957839782997 }, { "epoch": 1.059220170038112, "grad_norm": 1.2501711170050571, "learning_rate": 0.00019999520880580697, "loss": 3.2764010429382324, "step": 1807, "token_acc": 0.26930443521865643 }, { "epoch": 1.059806508355321, "grad_norm": 0.7549989365623009, "learning_rate": 0.00019999511346177208, "loss": 3.2647905349731445, "step": 1808, "token_acc": 0.27399514922327 }, { "epoch": 1.06039284667253, "grad_norm": 1.1043727468325106, "learning_rate": 0.00019999501717841785, "loss": 3.296976327896118, "step": 1809, "token_acc": 0.2662570815052756 }, { "epoch": 1.060979184989739, "grad_norm": 0.8399871366697835, "learning_rate": 0.00019999491995574515, "loss": 3.29217529296875, "step": 1810, "token_acc": 0.26831980887259543 }, { "epoch": 1.061565523306948, "grad_norm": 0.791199311126801, "learning_rate": 0.00019999482179375498, "loss": 3.291705369949341, "step": 1811, "token_acc": 0.268534645772606 }, { "epoch": 1.0621518616241572, "grad_norm": 0.9071268839262596, "learning_rate": 0.00019999472269244815, "loss": 3.27934193611145, "step": 1812, "token_acc": 0.27001719356193726 }, { "epoch": 1.062738199941366, "grad_norm": 0.9246439977527963, "learning_rate": 0.00019999462265182566, "loss": 3.2667436599731445, "step": 1813, "token_acc": 0.27299279065171583 }, { "epoch": 1.0633245382585752, "grad_norm": 1.0563031336108097, "learning_rate": 0.00019999452167188844, "loss": 3.3085427284240723, "step": 1814, "token_acc": 0.2687913473201375 }, { "epoch": 1.0639108765757843, "grad_norm": 1.285219725925068, "learning_rate": 0.00019999441975263743, "loss": 3.289022445678711, "step": 1815, "token_acc": 0.2705862229325579 }, { "epoch": 1.0644972148929932, "grad_norm": 1.055197371119659, "learning_rate": 0.0001999943168940736, "loss": 3.2957544326782227, "step": 1816, "token_acc": 0.268383770728044 }, { "epoch": 1.0650835532102023, "grad_norm": 0.8367355036894132, "learning_rate": 0.00019999421309619788, "loss": 3.2617506980895996, "step": 1817, "token_acc": 0.27230388703642827 }, { "epoch": 1.0656698915274112, "grad_norm": 1.1100409339317048, "learning_rate": 0.0001999941083590113, "loss": 3.2972044944763184, "step": 1818, "token_acc": 0.2690728317053265 }, { "epoch": 1.0662562298446203, "grad_norm": 0.9979100975216163, "learning_rate": 0.0001999940026825148, "loss": 3.30416202545166, "step": 1819, "token_acc": 0.26464092233246855 }, { "epoch": 1.0668425681618294, "grad_norm": 1.1617221906558397, "learning_rate": 0.00019999389606670937, "loss": 3.303950786590576, "step": 1820, "token_acc": 0.26752964638011467 }, { "epoch": 1.0674289064790383, "grad_norm": 0.8527361789114442, "learning_rate": 0.00019999378851159606, "loss": 3.2986960411071777, "step": 1821, "token_acc": 0.2685309317645327 }, { "epoch": 1.0680152447962474, "grad_norm": 0.7592424934168102, "learning_rate": 0.00019999368001717585, "loss": 3.2949836254119873, "step": 1822, "token_acc": 0.2695125763386424 }, { "epoch": 1.0686015831134565, "grad_norm": 0.7034165253275559, "learning_rate": 0.00019999357058344975, "loss": 3.260822296142578, "step": 1823, "token_acc": 0.2720663221610568 }, { "epoch": 1.0691879214306654, "grad_norm": 0.8712614654449726, "learning_rate": 0.0001999934602104188, "loss": 3.300623893737793, "step": 1824, "token_acc": 0.26847036711493205 }, { "epoch": 1.0697742597478745, "grad_norm": 0.9007256456017521, "learning_rate": 0.00019999334889808404, "loss": 3.2684574127197266, "step": 1825, "token_acc": 0.2723802977407159 }, { "epoch": 1.0703605980650837, "grad_norm": 0.7507976999403495, "learning_rate": 0.00019999323664644648, "loss": 3.2420732975006104, "step": 1826, "token_acc": 0.27524754026446074 }, { "epoch": 1.0709469363822925, "grad_norm": 0.810335357863364, "learning_rate": 0.00019999312345550725, "loss": 3.233001708984375, "step": 1827, "token_acc": 0.2759885658568814 }, { "epoch": 1.0715332746995017, "grad_norm": 0.9445326861640987, "learning_rate": 0.00019999300932526735, "loss": 3.2508413791656494, "step": 1828, "token_acc": 0.2707376405760607 }, { "epoch": 1.0721196130167105, "grad_norm": 0.8669637829950226, "learning_rate": 0.00019999289425572786, "loss": 3.2938332557678223, "step": 1829, "token_acc": 0.2667393692262709 }, { "epoch": 1.0727059513339197, "grad_norm": 1.124917753942543, "learning_rate": 0.00019999277824688986, "loss": 3.3348937034606934, "step": 1830, "token_acc": 0.2609762902404251 }, { "epoch": 1.0732922896511288, "grad_norm": 1.060397605529051, "learning_rate": 0.00019999266129875446, "loss": 3.2559690475463867, "step": 1831, "token_acc": 0.27436314305923587 }, { "epoch": 1.0738786279683377, "grad_norm": 1.3091179763595335, "learning_rate": 0.0001999925434113228, "loss": 3.2178902626037598, "step": 1832, "token_acc": 0.2782505960152876 }, { "epoch": 1.0744649662855468, "grad_norm": 1.007301994772881, "learning_rate": 0.00019999242458459588, "loss": 3.2721283435821533, "step": 1833, "token_acc": 0.27125916679669215 }, { "epoch": 1.0750513046027559, "grad_norm": 1.1357547058198343, "learning_rate": 0.00019999230481857486, "loss": 3.2517542839050293, "step": 1834, "token_acc": 0.2733259049680942 }, { "epoch": 1.0756376429199648, "grad_norm": 0.8691403991492244, "learning_rate": 0.0001999921841132609, "loss": 3.3080880641937256, "step": 1835, "token_acc": 0.26807200439487433 }, { "epoch": 1.0762239812371739, "grad_norm": 0.7002440657151844, "learning_rate": 0.00019999206246865513, "loss": 3.2627310752868652, "step": 1836, "token_acc": 0.2732244404741222 }, { "epoch": 1.0768103195543828, "grad_norm": 0.8886735739645918, "learning_rate": 0.00019999193988475865, "loss": 3.3120410442352295, "step": 1837, "token_acc": 0.26791576631874814 }, { "epoch": 1.077396657871592, "grad_norm": 1.330173506907914, "learning_rate": 0.00019999181636157264, "loss": 3.269960880279541, "step": 1838, "token_acc": 0.27057833744945314 }, { "epoch": 1.077982996188801, "grad_norm": 0.7155288492670615, "learning_rate": 0.00019999169189909827, "loss": 3.293656826019287, "step": 1839, "token_acc": 0.26868622646776047 }, { "epoch": 1.07856933450601, "grad_norm": 1.022679874427088, "learning_rate": 0.00019999156649733667, "loss": 3.3083438873291016, "step": 1840, "token_acc": 0.26548180989068626 }, { "epoch": 1.079155672823219, "grad_norm": 1.1952614128090076, "learning_rate": 0.00019999144015628905, "loss": 3.256667137145996, "step": 1841, "token_acc": 0.27316613772995846 }, { "epoch": 1.0797420111404281, "grad_norm": 0.6980783547281257, "learning_rate": 0.0001999913128759566, "loss": 3.262158155441284, "step": 1842, "token_acc": 0.2726297984282676 }, { "epoch": 1.080328349457637, "grad_norm": 0.972063667146567, "learning_rate": 0.00019999118465634051, "loss": 3.3121566772460938, "step": 1843, "token_acc": 0.2663731749262482 }, { "epoch": 1.0809146877748461, "grad_norm": 1.219059018842931, "learning_rate": 0.00019999105549744196, "loss": 3.2812654972076416, "step": 1844, "token_acc": 0.2690675897090897 }, { "epoch": 1.081501026092055, "grad_norm": 0.8471561043926981, "learning_rate": 0.0001999909253992622, "loss": 3.266209125518799, "step": 1845, "token_acc": 0.2721826676317694 }, { "epoch": 1.0820873644092641, "grad_norm": 0.6927929587665244, "learning_rate": 0.00019999079436180245, "loss": 3.221024751663208, "step": 1846, "token_acc": 0.2784002218205718 }, { "epoch": 1.0826737027264732, "grad_norm": 0.8530468672856815, "learning_rate": 0.0001999906623850639, "loss": 3.2521119117736816, "step": 1847, "token_acc": 0.2743287859953603 }, { "epoch": 1.0832600410436821, "grad_norm": 1.053296951556091, "learning_rate": 0.00019999052946904783, "loss": 3.2244858741760254, "step": 1848, "token_acc": 0.2772258336714868 }, { "epoch": 1.0838463793608912, "grad_norm": 1.0873412752462581, "learning_rate": 0.00019999039561375545, "loss": 3.1957848072052, "step": 1849, "token_acc": 0.2817763057614455 }, { "epoch": 1.0844327176781003, "grad_norm": 0.893711618354491, "learning_rate": 0.00019999026081918807, "loss": 3.283287286758423, "step": 1850, "token_acc": 0.2702660709828274 }, { "epoch": 1.0850190559953092, "grad_norm": 0.72093050589574, "learning_rate": 0.0001999901250853469, "loss": 3.2703752517700195, "step": 1851, "token_acc": 0.2711028626304261 }, { "epoch": 1.0856053943125183, "grad_norm": 0.7998399852054139, "learning_rate": 0.0001999899884122333, "loss": 3.2745933532714844, "step": 1852, "token_acc": 0.2721095961381231 }, { "epoch": 1.0861917326297275, "grad_norm": 0.7439192601798515, "learning_rate": 0.00019998985079984843, "loss": 3.291837215423584, "step": 1853, "token_acc": 0.2682023104372479 }, { "epoch": 1.0867780709469363, "grad_norm": 0.9396164741457514, "learning_rate": 0.0001999897122481937, "loss": 3.313176155090332, "step": 1854, "token_acc": 0.26611744102036494 }, { "epoch": 1.0873644092641455, "grad_norm": 1.3512446163325371, "learning_rate": 0.00019998957275727032, "loss": 3.2301125526428223, "step": 1855, "token_acc": 0.27694490151951745 }, { "epoch": 1.0879507475813543, "grad_norm": 0.6356362614994411, "learning_rate": 0.00019998943232707968, "loss": 3.2800469398498535, "step": 1856, "token_acc": 0.2689310999918318 }, { "epoch": 1.0885370858985635, "grad_norm": 0.8873756071458211, "learning_rate": 0.000199989290957623, "loss": 3.335606575012207, "step": 1857, "token_acc": 0.2634876940533554 }, { "epoch": 1.0891234242157726, "grad_norm": 1.490022148237173, "learning_rate": 0.00019998914864890175, "loss": 3.2807559967041016, "step": 1858, "token_acc": 0.26815423812720474 }, { "epoch": 1.0897097625329815, "grad_norm": 0.8737319010951705, "learning_rate": 0.00019998900540091713, "loss": 3.283431053161621, "step": 1859, "token_acc": 0.26906003232567494 }, { "epoch": 1.0902961008501906, "grad_norm": 1.4000082887324203, "learning_rate": 0.00019998886121367056, "loss": 3.2860045433044434, "step": 1860, "token_acc": 0.26787274709833064 }, { "epoch": 1.0908824391673997, "grad_norm": 0.7021338758272468, "learning_rate": 0.00019998871608716337, "loss": 3.3341658115386963, "step": 1861, "token_acc": 0.2641494583682047 }, { "epoch": 1.0914687774846086, "grad_norm": 0.9623430337957842, "learning_rate": 0.00019998857002139693, "loss": 3.266134262084961, "step": 1862, "token_acc": 0.27455271220134675 }, { "epoch": 1.0920551158018177, "grad_norm": 0.6703763160659446, "learning_rate": 0.00019998842301637262, "loss": 3.284419536590576, "step": 1863, "token_acc": 0.2699447357233952 }, { "epoch": 1.0926414541190266, "grad_norm": 0.7072274104346352, "learning_rate": 0.0001999882750720918, "loss": 3.2228569984436035, "step": 1864, "token_acc": 0.2781263660692391 }, { "epoch": 1.0932277924362357, "grad_norm": 0.7321361759974758, "learning_rate": 0.00019998812618855587, "loss": 3.2342801094055176, "step": 1865, "token_acc": 0.27553758028469605 }, { "epoch": 1.0938141307534448, "grad_norm": 0.8849821970747961, "learning_rate": 0.00019998797636576625, "loss": 3.2230300903320312, "step": 1866, "token_acc": 0.2776290101997139 }, { "epoch": 1.0944004690706537, "grad_norm": 0.944092279363582, "learning_rate": 0.0001999878256037243, "loss": 3.2624435424804688, "step": 1867, "token_acc": 0.27430302545008073 }, { "epoch": 1.0949868073878628, "grad_norm": 1.0826813034969316, "learning_rate": 0.00019998767390243147, "loss": 3.30016827583313, "step": 1868, "token_acc": 0.26709118682616967 }, { "epoch": 1.095573145705072, "grad_norm": 1.1165912813478263, "learning_rate": 0.00019998752126188917, "loss": 3.2919421195983887, "step": 1869, "token_acc": 0.2697934673588236 }, { "epoch": 1.0961594840222808, "grad_norm": 1.0671160764336696, "learning_rate": 0.00019998736768209887, "loss": 3.3012094497680664, "step": 1870, "token_acc": 0.2694465171167453 }, { "epoch": 1.09674582233949, "grad_norm": 1.3854577877137213, "learning_rate": 0.00019998721316306196, "loss": 3.2641139030456543, "step": 1871, "token_acc": 0.27333703783307733 }, { "epoch": 1.0973321606566988, "grad_norm": 0.816386087943341, "learning_rate": 0.00019998705770477994, "loss": 3.2627835273742676, "step": 1872, "token_acc": 0.27380241122280463 }, { "epoch": 1.097918498973908, "grad_norm": 1.089044996735678, "learning_rate": 0.00019998690130725426, "loss": 3.245826244354248, "step": 1873, "token_acc": 0.27344066893427044 }, { "epoch": 1.098504837291117, "grad_norm": 1.2041683248652024, "learning_rate": 0.00019998674397048632, "loss": 3.2560391426086426, "step": 1874, "token_acc": 0.2755694643171402 }, { "epoch": 1.099091175608326, "grad_norm": 1.0372118770152572, "learning_rate": 0.00019998658569447773, "loss": 3.272392749786377, "step": 1875, "token_acc": 0.2698370931562452 }, { "epoch": 1.099677513925535, "grad_norm": 1.175723248517185, "learning_rate": 0.00019998642647922984, "loss": 3.3034539222717285, "step": 1876, "token_acc": 0.26746391901179145 }, { "epoch": 1.1002638522427441, "grad_norm": 1.0801960673699682, "learning_rate": 0.00019998626632474422, "loss": 3.2207512855529785, "step": 1877, "token_acc": 0.2797358622447505 }, { "epoch": 1.100850190559953, "grad_norm": 1.1218411404944004, "learning_rate": 0.00019998610523102236, "loss": 3.2619869709014893, "step": 1878, "token_acc": 0.2740995549412434 }, { "epoch": 1.1014365288771621, "grad_norm": 0.978740176291759, "learning_rate": 0.00019998594319806578, "loss": 3.3516883850097656, "step": 1879, "token_acc": 0.25971205559600913 }, { "epoch": 1.1020228671943713, "grad_norm": 1.0325344995137953, "learning_rate": 0.000199985780225876, "loss": 3.2373228073120117, "step": 1880, "token_acc": 0.2749891017429188 }, { "epoch": 1.1026092055115801, "grad_norm": 1.0222006803207744, "learning_rate": 0.00019998561631445457, "loss": 3.3411693572998047, "step": 1881, "token_acc": 0.26362470444181724 }, { "epoch": 1.1031955438287893, "grad_norm": 1.264597044475318, "learning_rate": 0.00019998545146380296, "loss": 3.251077175140381, "step": 1882, "token_acc": 0.27242163349768794 }, { "epoch": 1.1037818821459981, "grad_norm": 0.8723940992647987, "learning_rate": 0.0001999852856739228, "loss": 3.277228832244873, "step": 1883, "token_acc": 0.27135951569103867 }, { "epoch": 1.1043682204632073, "grad_norm": 0.9850632801138791, "learning_rate": 0.0001999851189448156, "loss": 3.322584629058838, "step": 1884, "token_acc": 0.2658655524353838 }, { "epoch": 1.1049545587804164, "grad_norm": 0.9902402188049431, "learning_rate": 0.00019998495127648293, "loss": 3.2704763412475586, "step": 1885, "token_acc": 0.2714962568155683 }, { "epoch": 1.1055408970976253, "grad_norm": 0.9124318344783483, "learning_rate": 0.00019998478266892636, "loss": 3.260925769805908, "step": 1886, "token_acc": 0.273031196589223 }, { "epoch": 1.1061272354148344, "grad_norm": 0.9050821086194911, "learning_rate": 0.00019998461312214754, "loss": 3.261286735534668, "step": 1887, "token_acc": 0.27357934417540575 }, { "epoch": 1.1067135737320435, "grad_norm": 0.7104105119569273, "learning_rate": 0.000199984442636148, "loss": 3.2960314750671387, "step": 1888, "token_acc": 0.2684728940323172 }, { "epoch": 1.1072999120492524, "grad_norm": 0.8027407709505193, "learning_rate": 0.0001999842712109293, "loss": 3.2970781326293945, "step": 1889, "token_acc": 0.2678171615045574 }, { "epoch": 1.1078862503664615, "grad_norm": 0.7217541932961473, "learning_rate": 0.00019998409884649317, "loss": 3.2484469413757324, "step": 1890, "token_acc": 0.2740503407640718 }, { "epoch": 1.1084725886836704, "grad_norm": 0.5599878746408855, "learning_rate": 0.0001999839255428411, "loss": 3.2927956581115723, "step": 1891, "token_acc": 0.26801284754215277 }, { "epoch": 1.1090589270008795, "grad_norm": 0.8896501995553592, "learning_rate": 0.00019998375129997483, "loss": 3.266360282897949, "step": 1892, "token_acc": 0.27072063507773086 }, { "epoch": 1.1096452653180886, "grad_norm": 0.7384035138966728, "learning_rate": 0.00019998357611789592, "loss": 3.2611944675445557, "step": 1893, "token_acc": 0.2720865153258709 }, { "epoch": 1.1102316036352975, "grad_norm": 0.6556593809311072, "learning_rate": 0.00019998339999660605, "loss": 3.2644712924957275, "step": 1894, "token_acc": 0.2736169697406971 }, { "epoch": 1.1108179419525066, "grad_norm": 0.9243238387160311, "learning_rate": 0.00019998322293610684, "loss": 3.277118682861328, "step": 1895, "token_acc": 0.269430762896659 }, { "epoch": 1.1114042802697157, "grad_norm": 0.9078650178153008, "learning_rate": 0.00019998304493640002, "loss": 3.2808914184570312, "step": 1896, "token_acc": 0.27006933621704143 }, { "epoch": 1.1119906185869246, "grad_norm": 0.9420049031876414, "learning_rate": 0.0001999828659974872, "loss": 3.264310121536255, "step": 1897, "token_acc": 0.27259516087132096 }, { "epoch": 1.1125769569041337, "grad_norm": 1.1332686217221968, "learning_rate": 0.0001999826861193701, "loss": 3.2504005432128906, "step": 1898, "token_acc": 0.2756216605566724 }, { "epoch": 1.1131632952213426, "grad_norm": 0.9309216392145607, "learning_rate": 0.00019998250530205036, "loss": 3.2979202270507812, "step": 1899, "token_acc": 0.2698678803039699 }, { "epoch": 1.1137496335385517, "grad_norm": 0.879448792237718, "learning_rate": 0.00019998232354552972, "loss": 3.215097427368164, "step": 1900, "token_acc": 0.27841257569900785 }, { "epoch": 1.1143359718557608, "grad_norm": 0.7579504968972564, "learning_rate": 0.0001999821408498099, "loss": 3.303497791290283, "step": 1901, "token_acc": 0.26813595729057516 }, { "epoch": 1.1149223101729697, "grad_norm": 0.763066652633548, "learning_rate": 0.00019998195721489256, "loss": 3.2818799018859863, "step": 1902, "token_acc": 0.27061246618089946 }, { "epoch": 1.1155086484901788, "grad_norm": 1.0251203513603857, "learning_rate": 0.00019998177264077952, "loss": 3.235461711883545, "step": 1903, "token_acc": 0.2764125273168302 }, { "epoch": 1.116094986807388, "grad_norm": 0.9161793449526694, "learning_rate": 0.00019998158712747238, "loss": 3.3044650554656982, "step": 1904, "token_acc": 0.2677502962283379 }, { "epoch": 1.1166813251245968, "grad_norm": 0.5852962554527037, "learning_rate": 0.000199981400674973, "loss": 3.2016355991363525, "step": 1905, "token_acc": 0.2807474966253843 }, { "epoch": 1.117267663441806, "grad_norm": 0.7826824908010281, "learning_rate": 0.0001999812132832831, "loss": 3.2903404235839844, "step": 1906, "token_acc": 0.2689765607368161 }, { "epoch": 1.117854001759015, "grad_norm": 1.3109488063799364, "learning_rate": 0.00019998102495240438, "loss": 3.3185501098632812, "step": 1907, "token_acc": 0.26501113948812693 }, { "epoch": 1.118440340076224, "grad_norm": 0.7801529538690938, "learning_rate": 0.0001999808356823387, "loss": 3.2621328830718994, "step": 1908, "token_acc": 0.271211303397889 }, { "epoch": 1.119026678393433, "grad_norm": 0.9244284006318981, "learning_rate": 0.00019998064547308776, "loss": 3.225433349609375, "step": 1909, "token_acc": 0.2752866920349615 }, { "epoch": 1.119613016710642, "grad_norm": 0.8327282322235704, "learning_rate": 0.0001999804543246534, "loss": 3.2406206130981445, "step": 1910, "token_acc": 0.27458419847164844 }, { "epoch": 1.120199355027851, "grad_norm": 0.8573451591305861, "learning_rate": 0.0001999802622370374, "loss": 3.2883567810058594, "step": 1911, "token_acc": 0.2680903920669972 }, { "epoch": 1.1207856933450602, "grad_norm": 0.9516033291377468, "learning_rate": 0.00019998006921024156, "loss": 3.279902935028076, "step": 1912, "token_acc": 0.26893878170140073 }, { "epoch": 1.121372031662269, "grad_norm": 0.7209634110149035, "learning_rate": 0.0001999798752442677, "loss": 3.2574493885040283, "step": 1913, "token_acc": 0.27425602873268345 }, { "epoch": 1.1219583699794782, "grad_norm": 0.8473539207558439, "learning_rate": 0.00019997968033911762, "loss": 3.306807041168213, "step": 1914, "token_acc": 0.2668744631631088 }, { "epoch": 1.1225447082966873, "grad_norm": 0.9133499325320734, "learning_rate": 0.00019997948449479317, "loss": 3.2702248096466064, "step": 1915, "token_acc": 0.27176241939117085 }, { "epoch": 1.1231310466138962, "grad_norm": 1.2364189553174405, "learning_rate": 0.0001999792877112962, "loss": 3.271911144256592, "step": 1916, "token_acc": 0.26938964837458607 }, { "epoch": 1.1237173849311053, "grad_norm": 0.7910159191946837, "learning_rate": 0.00019997908998862853, "loss": 3.2572860717773438, "step": 1917, "token_acc": 0.2728228611084094 }, { "epoch": 1.1243037232483142, "grad_norm": 0.8390285877249307, "learning_rate": 0.00019997889132679204, "loss": 3.27970552444458, "step": 1918, "token_acc": 0.27095307252671735 }, { "epoch": 1.1248900615655233, "grad_norm": 0.7623328731020307, "learning_rate": 0.00019997869172578862, "loss": 3.295478582382202, "step": 1919, "token_acc": 0.2679587057931986 }, { "epoch": 1.1254763998827324, "grad_norm": 0.9580346972410974, "learning_rate": 0.00019997849118562005, "loss": 3.279165744781494, "step": 1920, "token_acc": 0.2711731792504322 }, { "epoch": 1.1260627381999413, "grad_norm": 1.3158502452665057, "learning_rate": 0.00019997828970628833, "loss": 3.2494115829467773, "step": 1921, "token_acc": 0.27161823030455445 }, { "epoch": 1.1266490765171504, "grad_norm": 0.6457485214399122, "learning_rate": 0.00019997808728779525, "loss": 3.2810144424438477, "step": 1922, "token_acc": 0.2719407215367957 }, { "epoch": 1.1272354148343595, "grad_norm": 0.7927644439403694, "learning_rate": 0.0001999778839301428, "loss": 3.2561545372009277, "step": 1923, "token_acc": 0.2740903478246925 }, { "epoch": 1.1278217531515684, "grad_norm": 1.1914709675638202, "learning_rate": 0.00019997767963333285, "loss": 3.2644879817962646, "step": 1924, "token_acc": 0.2732024267727836 }, { "epoch": 1.1284080914687775, "grad_norm": 0.74748013902585, "learning_rate": 0.00019997747439736734, "loss": 3.25892972946167, "step": 1925, "token_acc": 0.27457159469896436 }, { "epoch": 1.1289944297859864, "grad_norm": 0.7377013543754479, "learning_rate": 0.00019997726822224815, "loss": 3.27091121673584, "step": 1926, "token_acc": 0.27047361504245854 }, { "epoch": 1.1295807681031955, "grad_norm": 0.9968560683067745, "learning_rate": 0.00019997706110797724, "loss": 3.266421318054199, "step": 1927, "token_acc": 0.2703210360604249 }, { "epoch": 1.1301671064204046, "grad_norm": 0.9327762840593035, "learning_rate": 0.00019997685305455658, "loss": 3.2666382789611816, "step": 1928, "token_acc": 0.2724808558442495 }, { "epoch": 1.1307534447376135, "grad_norm": 1.089282866912289, "learning_rate": 0.00019997664406198813, "loss": 3.218949556350708, "step": 1929, "token_acc": 0.2790187156750647 }, { "epoch": 1.1313397830548226, "grad_norm": 0.8085210582791142, "learning_rate": 0.0001999764341302738, "loss": 3.2501697540283203, "step": 1930, "token_acc": 0.27364933396443303 }, { "epoch": 1.1319261213720317, "grad_norm": 0.6959949615508015, "learning_rate": 0.00019997622325941555, "loss": 3.2533717155456543, "step": 1931, "token_acc": 0.27409006175464495 }, { "epoch": 1.1325124596892406, "grad_norm": 0.8481027511717674, "learning_rate": 0.00019997601144941546, "loss": 3.2488253116607666, "step": 1932, "token_acc": 0.27500396786794934 }, { "epoch": 1.1330987980064497, "grad_norm": 0.841987039466926, "learning_rate": 0.00019997579870027545, "loss": 3.2770237922668457, "step": 1933, "token_acc": 0.2708072353565416 }, { "epoch": 1.1336851363236589, "grad_norm": 0.9206310826549635, "learning_rate": 0.00019997558501199753, "loss": 3.3043227195739746, "step": 1934, "token_acc": 0.26589753328979404 }, { "epoch": 1.1342714746408677, "grad_norm": 0.749045382110028, "learning_rate": 0.0001999753703845837, "loss": 3.241389751434326, "step": 1935, "token_acc": 0.2754146346646267 }, { "epoch": 1.1348578129580769, "grad_norm": 0.5879098678686154, "learning_rate": 0.00019997515481803602, "loss": 3.2266345024108887, "step": 1936, "token_acc": 0.27851853000304916 }, { "epoch": 1.1354441512752858, "grad_norm": 0.6078205697295972, "learning_rate": 0.00019997493831235642, "loss": 3.2653117179870605, "step": 1937, "token_acc": 0.27090985218280783 }, { "epoch": 1.1360304895924949, "grad_norm": 0.6744283889950183, "learning_rate": 0.00019997472086754703, "loss": 3.2614212036132812, "step": 1938, "token_acc": 0.2717488194407598 }, { "epoch": 1.136616827909704, "grad_norm": 0.7901297961529072, "learning_rate": 0.00019997450248360985, "loss": 3.300222396850586, "step": 1939, "token_acc": 0.2661325920398533 }, { "epoch": 1.1372031662269129, "grad_norm": 0.8473773438046425, "learning_rate": 0.00019997428316054694, "loss": 3.2864084243774414, "step": 1940, "token_acc": 0.2703568523467954 }, { "epoch": 1.137789504544122, "grad_norm": 1.1686955481581123, "learning_rate": 0.00019997406289836033, "loss": 3.2466187477111816, "step": 1941, "token_acc": 0.27399214843326036 }, { "epoch": 1.1383758428613309, "grad_norm": 0.9373845068040074, "learning_rate": 0.00019997384169705214, "loss": 3.2632014751434326, "step": 1942, "token_acc": 0.2723962530882955 }, { "epoch": 1.13896218117854, "grad_norm": 0.8803843651518309, "learning_rate": 0.00019997361955662442, "loss": 3.2998671531677246, "step": 1943, "token_acc": 0.26663300250174304 }, { "epoch": 1.139548519495749, "grad_norm": 0.8697430520871321, "learning_rate": 0.00019997339647707924, "loss": 3.315596103668213, "step": 1944, "token_acc": 0.2656780890695745 }, { "epoch": 1.140134857812958, "grad_norm": 0.9277301011220579, "learning_rate": 0.00019997317245841877, "loss": 3.2290682792663574, "step": 1945, "token_acc": 0.27639741113377964 }, { "epoch": 1.140721196130167, "grad_norm": 1.2182329091203932, "learning_rate": 0.000199972947500645, "loss": 3.2481350898742676, "step": 1946, "token_acc": 0.2749248382884388 }, { "epoch": 1.1413075344473762, "grad_norm": 0.9417842095689553, "learning_rate": 0.00019997272160376012, "loss": 3.309577703475952, "step": 1947, "token_acc": 0.2660076398665799 }, { "epoch": 1.141893872764585, "grad_norm": 0.8144209020152119, "learning_rate": 0.00019997249476776626, "loss": 3.2357430458068848, "step": 1948, "token_acc": 0.27612199380090463 }, { "epoch": 1.1424802110817942, "grad_norm": 0.695459287540676, "learning_rate": 0.0001999722669926655, "loss": 3.25854754447937, "step": 1949, "token_acc": 0.27435871082212676 }, { "epoch": 1.1430665493990033, "grad_norm": 0.8129316378146372, "learning_rate": 0.00019997203827846, "loss": 3.3170485496520996, "step": 1950, "token_acc": 0.2654045212951503 }, { "epoch": 1.1436528877162122, "grad_norm": 0.7584360415146316, "learning_rate": 0.00019997180862515196, "loss": 3.290923595428467, "step": 1951, "token_acc": 0.26751815149262753 }, { "epoch": 1.1442392260334213, "grad_norm": 0.6701358583432266, "learning_rate": 0.00019997157803274346, "loss": 3.2826781272888184, "step": 1952, "token_acc": 0.2691333738498118 }, { "epoch": 1.1448255643506302, "grad_norm": 0.6356742044277998, "learning_rate": 0.00019997134650123668, "loss": 3.253901720046997, "step": 1953, "token_acc": 0.27367806780997855 }, { "epoch": 1.1454119026678393, "grad_norm": 0.8124574061078009, "learning_rate": 0.00019997111403063383, "loss": 3.2431864738464355, "step": 1954, "token_acc": 0.2740222609343209 }, { "epoch": 1.1459982409850484, "grad_norm": 0.932957826122799, "learning_rate": 0.00019997088062093706, "loss": 3.2937915325164795, "step": 1955, "token_acc": 0.2693883749752535 }, { "epoch": 1.1465845793022573, "grad_norm": 0.8784728539545127, "learning_rate": 0.00019997064627214861, "loss": 3.301311492919922, "step": 1956, "token_acc": 0.2658231471674348 }, { "epoch": 1.1471709176194664, "grad_norm": 0.9398565733266478, "learning_rate": 0.00019997041098427065, "loss": 3.2685933113098145, "step": 1957, "token_acc": 0.2717892958881396 }, { "epoch": 1.1477572559366755, "grad_norm": 0.9989404646996672, "learning_rate": 0.00019997017475730539, "loss": 3.2379813194274902, "step": 1958, "token_acc": 0.2758472597261068 }, { "epoch": 1.1483435942538844, "grad_norm": 0.958986700205622, "learning_rate": 0.00019996993759125502, "loss": 3.2710471153259277, "step": 1959, "token_acc": 0.27277080243606366 }, { "epoch": 1.1489299325710935, "grad_norm": 0.6672534510044124, "learning_rate": 0.0001999696994861218, "loss": 3.2638556957244873, "step": 1960, "token_acc": 0.27444183698886954 }, { "epoch": 1.1495162708883027, "grad_norm": 0.8300351749240533, "learning_rate": 0.000199969460441908, "loss": 3.2085366249084473, "step": 1961, "token_acc": 0.2780671778079071 }, { "epoch": 1.1501026092055116, "grad_norm": 1.0218739127993308, "learning_rate": 0.00019996922045861578, "loss": 3.2892489433288574, "step": 1962, "token_acc": 0.2695970600838863 }, { "epoch": 1.1506889475227207, "grad_norm": 0.8623101638235369, "learning_rate": 0.0001999689795362475, "loss": 3.2662177085876465, "step": 1963, "token_acc": 0.27107829949305523 }, { "epoch": 1.1512752858399296, "grad_norm": 0.8477698554209213, "learning_rate": 0.00019996873767480535, "loss": 3.2148969173431396, "step": 1964, "token_acc": 0.278102336568739 }, { "epoch": 1.1518616241571387, "grad_norm": 0.8845595793807728, "learning_rate": 0.00019996849487429158, "loss": 3.2937536239624023, "step": 1965, "token_acc": 0.2692820363247805 }, { "epoch": 1.1524479624743478, "grad_norm": 0.985028635816655, "learning_rate": 0.00019996825113470856, "loss": 3.276862621307373, "step": 1966, "token_acc": 0.269620024570337 }, { "epoch": 1.1530343007915567, "grad_norm": 0.6553812430577929, "learning_rate": 0.0001999680064560585, "loss": 3.2651476860046387, "step": 1967, "token_acc": 0.2728559725768096 }, { "epoch": 1.1536206391087658, "grad_norm": 0.673768152013136, "learning_rate": 0.00019996776083834375, "loss": 3.2586464881896973, "step": 1968, "token_acc": 0.27199175735720266 }, { "epoch": 1.1542069774259747, "grad_norm": 0.7660411828513154, "learning_rate": 0.00019996751428156658, "loss": 3.212815284729004, "step": 1969, "token_acc": 0.2786290108724476 }, { "epoch": 1.1547933157431838, "grad_norm": 0.9964241024652095, "learning_rate": 0.0001999672667857293, "loss": 3.218221664428711, "step": 1970, "token_acc": 0.2782306018854242 }, { "epoch": 1.155379654060393, "grad_norm": 0.8397888760741828, "learning_rate": 0.00019996701835083428, "loss": 3.3010244369506836, "step": 1971, "token_acc": 0.26843564143276194 }, { "epoch": 1.1559659923776018, "grad_norm": 0.7689723436886481, "learning_rate": 0.00019996676897688384, "loss": 3.2293550968170166, "step": 1972, "token_acc": 0.27704683934769464 }, { "epoch": 1.156552330694811, "grad_norm": 0.853284027378518, "learning_rate": 0.0001999665186638803, "loss": 3.2223005294799805, "step": 1973, "token_acc": 0.27791917861510806 }, { "epoch": 1.15713866901202, "grad_norm": 0.9891626363442542, "learning_rate": 0.00019996626741182602, "loss": 3.2942628860473633, "step": 1974, "token_acc": 0.2664776430547771 }, { "epoch": 1.157725007329229, "grad_norm": 0.9271522061011388, "learning_rate": 0.00019996601522072338, "loss": 3.2600202560424805, "step": 1975, "token_acc": 0.2732699905345899 }, { "epoch": 1.158311345646438, "grad_norm": 0.8254289934400133, "learning_rate": 0.0001999657620905747, "loss": 3.2105512619018555, "step": 1976, "token_acc": 0.2811946145953173 }, { "epoch": 1.1588976839636471, "grad_norm": 0.8296627513688101, "learning_rate": 0.00019996550802138242, "loss": 3.297508955001831, "step": 1977, "token_acc": 0.2684389024014251 }, { "epoch": 1.159484022280856, "grad_norm": 0.7257055600701903, "learning_rate": 0.0001999652530131489, "loss": 3.283249855041504, "step": 1978, "token_acc": 0.269326168903832 }, { "epoch": 1.1600703605980651, "grad_norm": 0.7027186500898142, "learning_rate": 0.00019996499706587652, "loss": 3.2580084800720215, "step": 1979, "token_acc": 0.27335557797263393 }, { "epoch": 1.160656698915274, "grad_norm": 0.8093970817807803, "learning_rate": 0.0001999647401795677, "loss": 3.2579898834228516, "step": 1980, "token_acc": 0.2714008811482285 }, { "epoch": 1.1612430372324831, "grad_norm": 0.7827391216836376, "learning_rate": 0.00019996448235422488, "loss": 3.2746973037719727, "step": 1981, "token_acc": 0.27259540630511236 }, { "epoch": 1.1618293755496922, "grad_norm": 0.6506335267933542, "learning_rate": 0.0001999642235898504, "loss": 3.2467989921569824, "step": 1982, "token_acc": 0.2744970328556955 }, { "epoch": 1.1624157138669011, "grad_norm": 0.7348201980745489, "learning_rate": 0.00019996396388644676, "loss": 3.242039680480957, "step": 1983, "token_acc": 0.2756301775996537 }, { "epoch": 1.1630020521841102, "grad_norm": 0.7300972178012441, "learning_rate": 0.00019996370324401637, "loss": 3.247382402420044, "step": 1984, "token_acc": 0.2748797946231797 }, { "epoch": 1.1635883905013193, "grad_norm": 0.8353060349608965, "learning_rate": 0.00019996344166256172, "loss": 3.253366708755493, "step": 1985, "token_acc": 0.27241338966525835 }, { "epoch": 1.1641747288185282, "grad_norm": 0.8356436308209866, "learning_rate": 0.00019996317914208525, "loss": 3.27044677734375, "step": 1986, "token_acc": 0.27158714445984294 }, { "epoch": 1.1647610671357373, "grad_norm": 0.7822850471316256, "learning_rate": 0.00019996291568258939, "loss": 3.2056796550750732, "step": 1987, "token_acc": 0.2808533593901086 }, { "epoch": 1.1653474054529465, "grad_norm": 0.8682312917673789, "learning_rate": 0.00019996265128407662, "loss": 3.259547233581543, "step": 1988, "token_acc": 0.2729803630432055 }, { "epoch": 1.1659337437701554, "grad_norm": 0.8675749657265975, "learning_rate": 0.00019996238594654947, "loss": 3.2365829944610596, "step": 1989, "token_acc": 0.27469171142431853 }, { "epoch": 1.1665200820873645, "grad_norm": 0.8032126529475314, "learning_rate": 0.0001999621196700104, "loss": 3.284641742706299, "step": 1990, "token_acc": 0.26934748807034475 }, { "epoch": 1.1671064204045734, "grad_norm": 0.778041515573633, "learning_rate": 0.0001999618524544619, "loss": 3.266928195953369, "step": 1991, "token_acc": 0.27106657122405153 }, { "epoch": 1.1676927587217825, "grad_norm": 0.8690471278502755, "learning_rate": 0.00019996158429990652, "loss": 3.2374746799468994, "step": 1992, "token_acc": 0.2763543248320766 }, { "epoch": 1.1682790970389916, "grad_norm": 0.861630358486042, "learning_rate": 0.00019996131520634672, "loss": 3.2934985160827637, "step": 1993, "token_acc": 0.2671104362541348 }, { "epoch": 1.1688654353562005, "grad_norm": 0.9036689254364504, "learning_rate": 0.00019996104517378511, "loss": 3.294443130493164, "step": 1994, "token_acc": 0.26843497874555394 }, { "epoch": 1.1694517736734096, "grad_norm": 0.9412080658009019, "learning_rate": 0.00019996077420222417, "loss": 3.249305486679077, "step": 1995, "token_acc": 0.2716355183147056 }, { "epoch": 1.1700381119906185, "grad_norm": 0.8857930200724065, "learning_rate": 0.00019996050229166643, "loss": 3.2995262145996094, "step": 1996, "token_acc": 0.2678745764658588 }, { "epoch": 1.1706244503078276, "grad_norm": 0.6518930795464495, "learning_rate": 0.00019996022944211448, "loss": 3.2961931228637695, "step": 1997, "token_acc": 0.2659757341989117 }, { "epoch": 1.1712107886250367, "grad_norm": 0.7335991476032812, "learning_rate": 0.00019995995565357086, "loss": 3.253488063812256, "step": 1998, "token_acc": 0.27380656405787446 }, { "epoch": 1.1717971269422456, "grad_norm": 0.7292390556697382, "learning_rate": 0.00019995968092603817, "loss": 3.2600302696228027, "step": 1999, "token_acc": 0.27267906096340455 }, { "epoch": 1.1723834652594547, "grad_norm": 0.5177465826850679, "learning_rate": 0.00019995940525951896, "loss": 3.26132869720459, "step": 2000, "token_acc": 0.2734660728458835 }, { "epoch": 1.1729698035766638, "grad_norm": 0.718937689264216, "learning_rate": 0.0001999591286540159, "loss": 3.2502331733703613, "step": 2001, "token_acc": 0.27259292796207085 }, { "epoch": 1.1735561418938727, "grad_norm": 0.6157077969268541, "learning_rate": 0.00019995885110953146, "loss": 3.221062183380127, "step": 2002, "token_acc": 0.2772924220541152 }, { "epoch": 1.1741424802110818, "grad_norm": 0.8024661926483448, "learning_rate": 0.0001999585726260683, "loss": 3.2899162769317627, "step": 2003, "token_acc": 0.2689389087294471 }, { "epoch": 1.174728818528291, "grad_norm": 1.1519905683398246, "learning_rate": 0.00019995829320362907, "loss": 3.257702350616455, "step": 2004, "token_acc": 0.27184555577518615 }, { "epoch": 1.1753151568454998, "grad_norm": 0.8024844038706597, "learning_rate": 0.00019995801284221638, "loss": 3.268692970275879, "step": 2005, "token_acc": 0.2712818958787032 }, { "epoch": 1.175901495162709, "grad_norm": 0.6693598356277342, "learning_rate": 0.00019995773154183284, "loss": 3.2361884117126465, "step": 2006, "token_acc": 0.27483449756932576 }, { "epoch": 1.1764878334799178, "grad_norm": 0.670025167376958, "learning_rate": 0.00019995744930248114, "loss": 3.2938122749328613, "step": 2007, "token_acc": 0.26667725279117777 }, { "epoch": 1.177074171797127, "grad_norm": 0.8540840861124067, "learning_rate": 0.00019995716612416383, "loss": 3.282114028930664, "step": 2008, "token_acc": 0.26711221272012353 }, { "epoch": 1.177660510114336, "grad_norm": 0.7931607422309142, "learning_rate": 0.0001999568820068837, "loss": 3.236847162246704, "step": 2009, "token_acc": 0.27360249839761763 }, { "epoch": 1.178246848431545, "grad_norm": 0.8554516183858673, "learning_rate": 0.00019995659695064332, "loss": 3.2495715618133545, "step": 2010, "token_acc": 0.27311921905757475 }, { "epoch": 1.178833186748754, "grad_norm": 0.7745890448428505, "learning_rate": 0.00019995631095544542, "loss": 3.195685863494873, "step": 2011, "token_acc": 0.28077329904283893 }, { "epoch": 1.1794195250659631, "grad_norm": 0.8852564516137074, "learning_rate": 0.00019995602402129268, "loss": 3.289680004119873, "step": 2012, "token_acc": 0.2668665512427849 }, { "epoch": 1.180005863383172, "grad_norm": 0.9622267091569193, "learning_rate": 0.00019995573614818777, "loss": 3.2482614517211914, "step": 2013, "token_acc": 0.27555830661858915 }, { "epoch": 1.1805922017003811, "grad_norm": 0.9613379214541318, "learning_rate": 0.00019995544733613342, "loss": 3.242762565612793, "step": 2014, "token_acc": 0.27423466367819177 }, { "epoch": 1.1811785400175903, "grad_norm": 0.9445665991133994, "learning_rate": 0.0001999551575851323, "loss": 3.233274459838867, "step": 2015, "token_acc": 0.2763170836091241 }, { "epoch": 1.1817648783347992, "grad_norm": 0.8930833042601309, "learning_rate": 0.00019995486689518722, "loss": 3.261763572692871, "step": 2016, "token_acc": 0.2726266801184626 }, { "epoch": 1.1823512166520083, "grad_norm": 0.7972642302546983, "learning_rate": 0.00019995457526630084, "loss": 3.2242867946624756, "step": 2017, "token_acc": 0.27707603005088915 }, { "epoch": 1.1829375549692172, "grad_norm": 0.7404690405877784, "learning_rate": 0.0001999542826984759, "loss": 3.2541773319244385, "step": 2018, "token_acc": 0.2733113797783366 }, { "epoch": 1.1835238932864263, "grad_norm": 0.8161207325155335, "learning_rate": 0.00019995398919171517, "loss": 3.2292823791503906, "step": 2019, "token_acc": 0.2751745130082022 }, { "epoch": 1.1841102316036354, "grad_norm": 0.7570681518435783, "learning_rate": 0.0001999536947460214, "loss": 3.2853622436523438, "step": 2020, "token_acc": 0.26895290852946085 }, { "epoch": 1.1846965699208443, "grad_norm": 0.6825551127485251, "learning_rate": 0.0001999533993613974, "loss": 3.254007339477539, "step": 2021, "token_acc": 0.27249751875006395 }, { "epoch": 1.1852829082380534, "grad_norm": 0.74254226673996, "learning_rate": 0.00019995310303784584, "loss": 3.2557644844055176, "step": 2022, "token_acc": 0.27399547794818707 }, { "epoch": 1.1858692465552623, "grad_norm": 0.6435715481447366, "learning_rate": 0.0001999528057753696, "loss": 3.2203073501586914, "step": 2023, "token_acc": 0.278271369756778 }, { "epoch": 1.1864555848724714, "grad_norm": 0.6747413634798002, "learning_rate": 0.00019995250757397142, "loss": 3.277912139892578, "step": 2024, "token_acc": 0.2721270641938896 }, { "epoch": 1.1870419231896805, "grad_norm": 0.7371871656659613, "learning_rate": 0.0001999522084336541, "loss": 3.258727550506592, "step": 2025, "token_acc": 0.27213489184468626 }, { "epoch": 1.1876282615068894, "grad_norm": 0.7492219471380487, "learning_rate": 0.00019995190835442051, "loss": 3.2612719535827637, "step": 2026, "token_acc": 0.27125480802989754 }, { "epoch": 1.1882145998240985, "grad_norm": 0.8117697901910712, "learning_rate": 0.00019995160733627342, "loss": 3.2734169960021973, "step": 2027, "token_acc": 0.27141197086658353 }, { "epoch": 1.1888009381413076, "grad_norm": 0.7554405098347979, "learning_rate": 0.00019995130537921565, "loss": 3.2358875274658203, "step": 2028, "token_acc": 0.2743048526894978 }, { "epoch": 1.1893872764585165, "grad_norm": 0.7519687425779817, "learning_rate": 0.00019995100248325007, "loss": 3.250917673110962, "step": 2029, "token_acc": 0.27256431749306853 }, { "epoch": 1.1899736147757256, "grad_norm": 0.7660760097585455, "learning_rate": 0.0001999506986483795, "loss": 3.264845371246338, "step": 2030, "token_acc": 0.27109547774583653 }, { "epoch": 1.1905599530929347, "grad_norm": 0.7227019360396504, "learning_rate": 0.0001999503938746068, "loss": 3.224827766418457, "step": 2031, "token_acc": 0.27476198214511416 }, { "epoch": 1.1911462914101436, "grad_norm": 0.7096499179706384, "learning_rate": 0.00019995008816193485, "loss": 3.237521171569824, "step": 2032, "token_acc": 0.27524512826837694 }, { "epoch": 1.1917326297273527, "grad_norm": 0.6703253516677996, "learning_rate": 0.00019994978151036648, "loss": 3.3084309101104736, "step": 2033, "token_acc": 0.26603097552728905 }, { "epoch": 1.1923189680445616, "grad_norm": 0.6022497359643784, "learning_rate": 0.0001999494739199046, "loss": 3.210780143737793, "step": 2034, "token_acc": 0.27962328185095997 }, { "epoch": 1.1929053063617707, "grad_norm": 1.0656626725126377, "learning_rate": 0.0001999491653905521, "loss": 3.2924139499664307, "step": 2035, "token_acc": 0.268935212925878 }, { "epoch": 1.1934916446789798, "grad_norm": 1.014876992677786, "learning_rate": 0.0001999488559223119, "loss": 3.2329001426696777, "step": 2036, "token_acc": 0.27664740420749845 }, { "epoch": 1.1940779829961887, "grad_norm": 0.7789300777988992, "learning_rate": 0.00019994854551518682, "loss": 3.264383316040039, "step": 2037, "token_acc": 0.2710244596791369 }, { "epoch": 1.1946643213133978, "grad_norm": 0.6606641768237064, "learning_rate": 0.0001999482341691799, "loss": 3.206993579864502, "step": 2038, "token_acc": 0.2794790145330045 }, { "epoch": 1.195250659630607, "grad_norm": 0.7800734140061765, "learning_rate": 0.000199947921884294, "loss": 3.2454488277435303, "step": 2039, "token_acc": 0.27575220119487787 }, { "epoch": 1.1958369979478158, "grad_norm": 0.7206048992364572, "learning_rate": 0.00019994760866053198, "loss": 3.2386507987976074, "step": 2040, "token_acc": 0.2736122628864098 }, { "epoch": 1.196423336265025, "grad_norm": 0.614807989212569, "learning_rate": 0.00019994729449789692, "loss": 3.2631406784057617, "step": 2041, "token_acc": 0.27201316675394627 }, { "epoch": 1.197009674582234, "grad_norm": 0.7670226964696416, "learning_rate": 0.00019994697939639173, "loss": 3.238020896911621, "step": 2042, "token_acc": 0.27551113864719984 }, { "epoch": 1.197596012899443, "grad_norm": 0.7732689235303511, "learning_rate": 0.0001999466633560193, "loss": 3.283295154571533, "step": 2043, "token_acc": 0.26933461658516283 }, { "epoch": 1.198182351216652, "grad_norm": 0.7873626704170653, "learning_rate": 0.0001999463463767827, "loss": 3.217207670211792, "step": 2044, "token_acc": 0.2780436341932072 }, { "epoch": 1.198768689533861, "grad_norm": 0.6780692112504618, "learning_rate": 0.0001999460284586848, "loss": 3.267735242843628, "step": 2045, "token_acc": 0.2701111054956922 }, { "epoch": 1.19935502785107, "grad_norm": 0.6411496423631139, "learning_rate": 0.00019994570960172868, "loss": 3.235504627227783, "step": 2046, "token_acc": 0.2770185407894702 }, { "epoch": 1.1999413661682792, "grad_norm": 0.6949354759461744, "learning_rate": 0.00019994538980591726, "loss": 3.192155361175537, "step": 2047, "token_acc": 0.2809576303059385 }, { "epoch": 1.200527704485488, "grad_norm": 0.6618181695803856, "learning_rate": 0.0001999450690712536, "loss": 3.3199121952056885, "step": 2048, "token_acc": 0.26607390054307317 }, { "epoch": 1.2011140428026972, "grad_norm": 0.5337850893424061, "learning_rate": 0.0001999447473977407, "loss": 3.2487683296203613, "step": 2049, "token_acc": 0.27245741332192736 }, { "epoch": 1.201700381119906, "grad_norm": 0.6664085775810086, "learning_rate": 0.0001999444247853816, "loss": 3.2674710750579834, "step": 2050, "token_acc": 0.2721702610611665 }, { "epoch": 1.2022867194371152, "grad_norm": 0.823499590078672, "learning_rate": 0.00019994410123417924, "loss": 3.2266602516174316, "step": 2051, "token_acc": 0.27755598656219616 }, { "epoch": 1.2028730577543243, "grad_norm": 0.8721375598893872, "learning_rate": 0.00019994377674413676, "loss": 3.2453181743621826, "step": 2052, "token_acc": 0.2741025907813963 }, { "epoch": 1.2034593960715332, "grad_norm": 0.9244862466117538, "learning_rate": 0.00019994345131525717, "loss": 3.2178244590759277, "step": 2053, "token_acc": 0.2791771509525723 }, { "epoch": 1.2040457343887423, "grad_norm": 1.1196099245212572, "learning_rate": 0.00019994312494754356, "loss": 3.256829261779785, "step": 2054, "token_acc": 0.2720128265783525 }, { "epoch": 1.2046320727059514, "grad_norm": 0.8641842542700614, "learning_rate": 0.00019994279764099892, "loss": 3.1919198036193848, "step": 2055, "token_acc": 0.2803798969817114 }, { "epoch": 1.2052184110231603, "grad_norm": 0.7219574640915918, "learning_rate": 0.0001999424693956264, "loss": 3.264314651489258, "step": 2056, "token_acc": 0.2727636143242163 }, { "epoch": 1.2058047493403694, "grad_norm": 0.9909495877523862, "learning_rate": 0.00019994214021142902, "loss": 3.2319424152374268, "step": 2057, "token_acc": 0.2761497505554857 }, { "epoch": 1.2063910876575785, "grad_norm": 1.0646397989858591, "learning_rate": 0.00019994181008840996, "loss": 3.298980474472046, "step": 2058, "token_acc": 0.2668688002601858 }, { "epoch": 1.2069774259747874, "grad_norm": 0.8597897116920034, "learning_rate": 0.00019994147902657224, "loss": 3.2467713356018066, "step": 2059, "token_acc": 0.27467695235883466 }, { "epoch": 1.2075637642919965, "grad_norm": 1.0475000077040948, "learning_rate": 0.00019994114702591898, "loss": 3.238417625427246, "step": 2060, "token_acc": 0.2745846389509174 }, { "epoch": 1.2081501026092054, "grad_norm": 0.8130668100084034, "learning_rate": 0.00019994081408645332, "loss": 3.274354934692383, "step": 2061, "token_acc": 0.26963046020117226 }, { "epoch": 1.2087364409264145, "grad_norm": 0.7261892841799678, "learning_rate": 0.0001999404802081784, "loss": 3.253032684326172, "step": 2062, "token_acc": 0.27494122754998307 }, { "epoch": 1.2093227792436236, "grad_norm": 0.7616039116543535, "learning_rate": 0.00019994014539109737, "loss": 3.2325727939605713, "step": 2063, "token_acc": 0.2752068788984797 }, { "epoch": 1.2099091175608325, "grad_norm": 0.9753137276825926, "learning_rate": 0.0001999398096352133, "loss": 3.176018476486206, "step": 2064, "token_acc": 0.28387230460402774 }, { "epoch": 1.2104954558780416, "grad_norm": 0.9444559467709397, "learning_rate": 0.00019993947294052943, "loss": 3.261976718902588, "step": 2065, "token_acc": 0.27122822830783916 }, { "epoch": 1.2110817941952507, "grad_norm": 0.6061907031630014, "learning_rate": 0.00019993913530704882, "loss": 3.2700295448303223, "step": 2066, "token_acc": 0.2695068367421966 }, { "epoch": 1.2116681325124596, "grad_norm": 0.7702134973146593, "learning_rate": 0.00019993879673477474, "loss": 3.2154996395111084, "step": 2067, "token_acc": 0.27732970636255355 }, { "epoch": 1.2122544708296688, "grad_norm": 0.9520840502911053, "learning_rate": 0.00019993845722371032, "loss": 3.248842239379883, "step": 2068, "token_acc": 0.27322229835199086 }, { "epoch": 1.2128408091468779, "grad_norm": 0.8658794283548628, "learning_rate": 0.0001999381167738588, "loss": 3.2003722190856934, "step": 2069, "token_acc": 0.2797433590113028 }, { "epoch": 1.2134271474640868, "grad_norm": 0.9016477761825271, "learning_rate": 0.0001999377753852233, "loss": 3.217028856277466, "step": 2070, "token_acc": 0.2782054450615919 }, { "epoch": 1.2140134857812959, "grad_norm": 0.8036189779348722, "learning_rate": 0.00019993743305780708, "loss": 3.295497417449951, "step": 2071, "token_acc": 0.26756264710507865 }, { "epoch": 1.2145998240985048, "grad_norm": 0.6545245660927652, "learning_rate": 0.00019993708979161335, "loss": 3.2754857540130615, "step": 2072, "token_acc": 0.26995701387291304 }, { "epoch": 1.2151861624157139, "grad_norm": 0.5805237574808724, "learning_rate": 0.00019993674558664533, "loss": 3.259950876235962, "step": 2073, "token_acc": 0.27178820685442473 }, { "epoch": 1.215772500732923, "grad_norm": 0.5642645501949415, "learning_rate": 0.00019993640044290624, "loss": 3.2452166080474854, "step": 2074, "token_acc": 0.27498536915921434 }, { "epoch": 1.2163588390501319, "grad_norm": 0.4813788786612914, "learning_rate": 0.00019993605436039932, "loss": 3.238658905029297, "step": 2075, "token_acc": 0.2766798943202436 }, { "epoch": 1.216945177367341, "grad_norm": 0.5817760126336858, "learning_rate": 0.00019993570733912788, "loss": 3.200277805328369, "step": 2076, "token_acc": 0.27990723225622555 }, { "epoch": 1.2175315156845499, "grad_norm": 0.5940748385262843, "learning_rate": 0.00019993535937909508, "loss": 3.214750051498413, "step": 2077, "token_acc": 0.27732758284727005 }, { "epoch": 1.218117854001759, "grad_norm": 0.5495133965349347, "learning_rate": 0.0001999350104803043, "loss": 3.2320947647094727, "step": 2078, "token_acc": 0.2753966405259208 }, { "epoch": 1.218704192318968, "grad_norm": 0.6913603813879012, "learning_rate": 0.00019993466064275873, "loss": 3.2195510864257812, "step": 2079, "token_acc": 0.27786837328082215 }, { "epoch": 1.219290530636177, "grad_norm": 0.9379586266692725, "learning_rate": 0.0001999343098664617, "loss": 3.2361984252929688, "step": 2080, "token_acc": 0.2748539705019098 }, { "epoch": 1.219876868953386, "grad_norm": 0.9134461505820622, "learning_rate": 0.00019993395815141648, "loss": 3.2189457416534424, "step": 2081, "token_acc": 0.2780203900779996 }, { "epoch": 1.2204632072705952, "grad_norm": 0.7060965669344349, "learning_rate": 0.0001999336054976264, "loss": 3.271450996398926, "step": 2082, "token_acc": 0.2709042554596282 }, { "epoch": 1.221049545587804, "grad_norm": 0.6668726382203133, "learning_rate": 0.00019993325190509472, "loss": 3.25471830368042, "step": 2083, "token_acc": 0.2727644723844672 }, { "epoch": 1.2216358839050132, "grad_norm": 0.7428538402139995, "learning_rate": 0.00019993289737382482, "loss": 3.2256226539611816, "step": 2084, "token_acc": 0.27700972329217766 }, { "epoch": 1.2222222222222223, "grad_norm": 0.7248845749148265, "learning_rate": 0.00019993254190382004, "loss": 3.270136594772339, "step": 2085, "token_acc": 0.269031450338762 }, { "epoch": 1.2228085605394312, "grad_norm": 0.8480222408794976, "learning_rate": 0.00019993218549508364, "loss": 3.234304189682007, "step": 2086, "token_acc": 0.2751677675797537 }, { "epoch": 1.2233948988566403, "grad_norm": 0.7953781322910523, "learning_rate": 0.00019993182814761906, "loss": 3.258174419403076, "step": 2087, "token_acc": 0.27418361734209395 }, { "epoch": 1.2239812371738492, "grad_norm": 0.8043188982441075, "learning_rate": 0.0001999314698614296, "loss": 3.2224857807159424, "step": 2088, "token_acc": 0.2771479979646542 }, { "epoch": 1.2245675754910583, "grad_norm": 0.8763197021257622, "learning_rate": 0.00019993111063651867, "loss": 3.238600254058838, "step": 2089, "token_acc": 0.2759119171410032 }, { "epoch": 1.2251539138082674, "grad_norm": 0.7835168773415344, "learning_rate": 0.00019993075047288955, "loss": 3.236215591430664, "step": 2090, "token_acc": 0.2735578123699326 }, { "epoch": 1.2257402521254763, "grad_norm": 0.6740945104488825, "learning_rate": 0.00019993038937054573, "loss": 3.2422947883605957, "step": 2091, "token_acc": 0.2759259456409492 }, { "epoch": 1.2263265904426854, "grad_norm": 0.5212942601727388, "learning_rate": 0.00019993002732949055, "loss": 3.2242431640625, "step": 2092, "token_acc": 0.27570878172601965 }, { "epoch": 1.2269129287598945, "grad_norm": 0.6049830425773788, "learning_rate": 0.00019992966434972741, "loss": 3.2530179023742676, "step": 2093, "token_acc": 0.27245231121600905 }, { "epoch": 1.2274992670771034, "grad_norm": 0.6753899437272635, "learning_rate": 0.00019992930043125976, "loss": 3.265923500061035, "step": 2094, "token_acc": 0.2701323961830084 }, { "epoch": 1.2280856053943126, "grad_norm": 0.456299638698701, "learning_rate": 0.00019992893557409098, "loss": 3.2533345222473145, "step": 2095, "token_acc": 0.27371421752562286 }, { "epoch": 1.2286719437115217, "grad_norm": 0.5209591167763342, "learning_rate": 0.0001999285697782245, "loss": 3.207162618637085, "step": 2096, "token_acc": 0.2784138670686107 }, { "epoch": 1.2292582820287306, "grad_norm": 0.6513276798334263, "learning_rate": 0.00019992820304366374, "loss": 3.265101909637451, "step": 2097, "token_acc": 0.2713824629637132 }, { "epoch": 1.2298446203459397, "grad_norm": 0.8913523771844125, "learning_rate": 0.0001999278353704122, "loss": 3.2463419437408447, "step": 2098, "token_acc": 0.27331340158766665 }, { "epoch": 1.2304309586631486, "grad_norm": 1.1717570085462516, "learning_rate": 0.00019992746675847326, "loss": 3.2408082485198975, "step": 2099, "token_acc": 0.27433389028481053 }, { "epoch": 1.2310172969803577, "grad_norm": 0.7572878790321899, "learning_rate": 0.00019992709720785046, "loss": 3.2798378467559814, "step": 2100, "token_acc": 0.2700344377649185 }, { "epoch": 1.2316036352975668, "grad_norm": 0.8730022204651231, "learning_rate": 0.00019992672671854722, "loss": 3.2689807415008545, "step": 2101, "token_acc": 0.2717733871682256 }, { "epoch": 1.2321899736147757, "grad_norm": 1.1999072009775023, "learning_rate": 0.00019992635529056708, "loss": 3.3413915634155273, "step": 2102, "token_acc": 0.2625287819785024 }, { "epoch": 1.2327763119319848, "grad_norm": 0.7171919039153778, "learning_rate": 0.00019992598292391343, "loss": 3.2402501106262207, "step": 2103, "token_acc": 0.2761459413290932 }, { "epoch": 1.2333626502491937, "grad_norm": 0.8064018182677524, "learning_rate": 0.00019992560961858984, "loss": 3.262726306915283, "step": 2104, "token_acc": 0.271804857497036 }, { "epoch": 1.2339489885664028, "grad_norm": 0.8459765717445233, "learning_rate": 0.0001999252353745998, "loss": 3.27354097366333, "step": 2105, "token_acc": 0.2695342313298159 }, { "epoch": 1.234535326883612, "grad_norm": 0.836947634583542, "learning_rate": 0.00019992486019194684, "loss": 3.260399580001831, "step": 2106, "token_acc": 0.27135008623470236 }, { "epoch": 1.2351216652008208, "grad_norm": 0.7602294558882761, "learning_rate": 0.0001999244840706344, "loss": 3.2782716751098633, "step": 2107, "token_acc": 0.2698450443930367 }, { "epoch": 1.23570800351803, "grad_norm": 0.7814186784810427, "learning_rate": 0.00019992410701066615, "loss": 3.2937588691711426, "step": 2108, "token_acc": 0.26817424134866674 }, { "epoch": 1.236294341835239, "grad_norm": 0.6259638405837912, "learning_rate": 0.00019992372901204557, "loss": 3.2281618118286133, "step": 2109, "token_acc": 0.27528459848538006 }, { "epoch": 1.236880680152448, "grad_norm": 0.483736593563296, "learning_rate": 0.00019992335007477618, "loss": 3.2342374324798584, "step": 2110, "token_acc": 0.27565841476256625 }, { "epoch": 1.237467018469657, "grad_norm": 0.630266095374544, "learning_rate": 0.00019992297019886154, "loss": 3.2631545066833496, "step": 2111, "token_acc": 0.27094199318790463 }, { "epoch": 1.2380533567868661, "grad_norm": 0.5639592873036668, "learning_rate": 0.0001999225893843053, "loss": 3.198659896850586, "step": 2112, "token_acc": 0.2802096105336322 }, { "epoch": 1.238639695104075, "grad_norm": 0.6632747779578492, "learning_rate": 0.00019992220763111093, "loss": 3.2554709911346436, "step": 2113, "token_acc": 0.27210225544652716 }, { "epoch": 1.2392260334212841, "grad_norm": 0.7272417788706874, "learning_rate": 0.0001999218249392821, "loss": 3.237191677093506, "step": 2114, "token_acc": 0.27372554095743534 }, { "epoch": 1.239812371738493, "grad_norm": 0.8577853457392106, "learning_rate": 0.00019992144130882234, "loss": 3.253605365753174, "step": 2115, "token_acc": 0.27277816302086616 }, { "epoch": 1.2403987100557021, "grad_norm": 0.8920984218240926, "learning_rate": 0.00019992105673973532, "loss": 3.2642173767089844, "step": 2116, "token_acc": 0.2720777279521674 }, { "epoch": 1.2409850483729112, "grad_norm": 0.6724170158262036, "learning_rate": 0.0001999206712320246, "loss": 3.2222015857696533, "step": 2117, "token_acc": 0.2753767035227565 }, { "epoch": 1.2415713866901201, "grad_norm": 0.6083123287550836, "learning_rate": 0.0001999202847856938, "loss": 3.2815628051757812, "step": 2118, "token_acc": 0.26882221370623344 }, { "epoch": 1.2421577250073292, "grad_norm": 0.7345078030171832, "learning_rate": 0.0001999198974007466, "loss": 3.2437543869018555, "step": 2119, "token_acc": 0.2739233463562791 }, { "epoch": 1.2427440633245384, "grad_norm": 0.7783908533656655, "learning_rate": 0.00019991950907718658, "loss": 3.2664730548858643, "step": 2120, "token_acc": 0.27040531532654466 }, { "epoch": 1.2433304016417472, "grad_norm": 0.9970579713164444, "learning_rate": 0.00019991911981501743, "loss": 3.259303569793701, "step": 2121, "token_acc": 0.27222284366033406 }, { "epoch": 1.2439167399589564, "grad_norm": 1.0198371246734628, "learning_rate": 0.0001999187296142428, "loss": 3.2523155212402344, "step": 2122, "token_acc": 0.27243841255547696 }, { "epoch": 1.2445030782761655, "grad_norm": 0.7793279231356892, "learning_rate": 0.00019991833847486635, "loss": 3.2021541595458984, "step": 2123, "token_acc": 0.27830900598079394 }, { "epoch": 1.2450894165933744, "grad_norm": 0.6446590922413522, "learning_rate": 0.00019991794639689174, "loss": 3.266526699066162, "step": 2124, "token_acc": 0.27196870191286004 }, { "epoch": 1.2456757549105835, "grad_norm": 0.594158856943162, "learning_rate": 0.00019991755338032266, "loss": 3.2651641368865967, "step": 2125, "token_acc": 0.27245110411774415 }, { "epoch": 1.2462620932277924, "grad_norm": 0.5342609035495294, "learning_rate": 0.0001999171594251628, "loss": 3.302712917327881, "step": 2126, "token_acc": 0.26477727599302064 }, { "epoch": 1.2468484315450015, "grad_norm": 0.4775861843555774, "learning_rate": 0.0001999167645314159, "loss": 3.281754493713379, "step": 2127, "token_acc": 0.2664507966795083 }, { "epoch": 1.2474347698622106, "grad_norm": 0.5566738635080677, "learning_rate": 0.0001999163686990856, "loss": 3.23808217048645, "step": 2128, "token_acc": 0.2747500603212236 }, { "epoch": 1.2480211081794195, "grad_norm": 0.45640086438844707, "learning_rate": 0.00019991597192817566, "loss": 3.2125957012176514, "step": 2129, "token_acc": 0.27919218914341043 }, { "epoch": 1.2486074464966286, "grad_norm": 0.5992324717617522, "learning_rate": 0.00019991557421868982, "loss": 3.2420685291290283, "step": 2130, "token_acc": 0.2736017205457356 }, { "epoch": 1.2491937848138375, "grad_norm": 0.5895001888250889, "learning_rate": 0.0001999151755706318, "loss": 3.243584156036377, "step": 2131, "token_acc": 0.27488625013871937 }, { "epoch": 1.2497801231310466, "grad_norm": 0.729691625239649, "learning_rate": 0.00019991477598400533, "loss": 3.2531533241271973, "step": 2132, "token_acc": 0.2725829221224955 }, { "epoch": 1.2503664614482557, "grad_norm": 0.8477726169408945, "learning_rate": 0.0001999143754588142, "loss": 3.272512674331665, "step": 2133, "token_acc": 0.2689022057886013 }, { "epoch": 1.2509527997654648, "grad_norm": 1.065825354563309, "learning_rate": 0.00019991397399506214, "loss": 3.281550884246826, "step": 2134, "token_acc": 0.2683666234096352 }, { "epoch": 1.2515391380826737, "grad_norm": 0.8596864082259226, "learning_rate": 0.00019991357159275291, "loss": 3.239239454269409, "step": 2135, "token_acc": 0.27454686045387716 }, { "epoch": 1.2521254763998828, "grad_norm": 0.7655384878888009, "learning_rate": 0.0001999131682518903, "loss": 3.2360548973083496, "step": 2136, "token_acc": 0.275677134795408 }, { "epoch": 1.2527118147170917, "grad_norm": 0.5258642112913008, "learning_rate": 0.00019991276397247814, "loss": 3.2244338989257812, "step": 2137, "token_acc": 0.2766056994735059 }, { "epoch": 1.2532981530343008, "grad_norm": 0.6186875016615384, "learning_rate": 0.00019991235875452018, "loss": 3.2318358421325684, "step": 2138, "token_acc": 0.2740722568241444 }, { "epoch": 1.25388449135151, "grad_norm": 0.7519474609825382, "learning_rate": 0.00019991195259802026, "loss": 3.216973304748535, "step": 2139, "token_acc": 0.2766929594528477 }, { "epoch": 1.2544708296687188, "grad_norm": 0.665647492742681, "learning_rate": 0.00019991154550298216, "loss": 3.2894692420959473, "step": 2140, "token_acc": 0.2688509385961731 }, { "epoch": 1.255057167985928, "grad_norm": 0.7019323356323866, "learning_rate": 0.00019991113746940973, "loss": 3.2308433055877686, "step": 2141, "token_acc": 0.27640619617721846 }, { "epoch": 1.2556435063031368, "grad_norm": 0.6241000967432053, "learning_rate": 0.00019991072849730678, "loss": 3.2318615913391113, "step": 2142, "token_acc": 0.2759584386046786 }, { "epoch": 1.256229844620346, "grad_norm": 0.6953932918913984, "learning_rate": 0.0001999103185866772, "loss": 3.2314140796661377, "step": 2143, "token_acc": 0.2751824911822845 }, { "epoch": 1.256816182937555, "grad_norm": 0.7471169400723318, "learning_rate": 0.00019990990773752478, "loss": 3.3068342208862305, "step": 2144, "token_acc": 0.26609806696390076 }, { "epoch": 1.257402521254764, "grad_norm": 0.6696148307418435, "learning_rate": 0.0001999094959498534, "loss": 3.2601871490478516, "step": 2145, "token_acc": 0.2703857538432159 }, { "epoch": 1.257988859571973, "grad_norm": 0.7970819925096037, "learning_rate": 0.00019990908322366696, "loss": 3.2590270042419434, "step": 2146, "token_acc": 0.2714718917658036 }, { "epoch": 1.258575197889182, "grad_norm": 0.6530741985561475, "learning_rate": 0.00019990866955896933, "loss": 3.1904618740081787, "step": 2147, "token_acc": 0.2813886848666039 }, { "epoch": 1.259161536206391, "grad_norm": 0.6327277126307087, "learning_rate": 0.00019990825495576434, "loss": 3.2751193046569824, "step": 2148, "token_acc": 0.26960616068318705 }, { "epoch": 1.2597478745236002, "grad_norm": 0.6823775712105917, "learning_rate": 0.00019990783941405593, "loss": 3.1972270011901855, "step": 2149, "token_acc": 0.2791242397366087 }, { "epoch": 1.2603342128408093, "grad_norm": 0.5467204222176628, "learning_rate": 0.000199907422933848, "loss": 3.2215945720672607, "step": 2150, "token_acc": 0.27730997797339524 }, { "epoch": 1.2609205511580182, "grad_norm": 0.6335703896690619, "learning_rate": 0.00019990700551514445, "loss": 3.224214553833008, "step": 2151, "token_acc": 0.2768862396189084 }, { "epoch": 1.2615068894752273, "grad_norm": 0.5949194678482058, "learning_rate": 0.00019990658715794923, "loss": 3.2176411151885986, "step": 2152, "token_acc": 0.27845557104300117 }, { "epoch": 1.2620932277924362, "grad_norm": 0.6106785645344613, "learning_rate": 0.00019990616786226624, "loss": 3.22220516204834, "step": 2153, "token_acc": 0.2765824224342079 }, { "epoch": 1.2626795661096453, "grad_norm": 0.6497760827356881, "learning_rate": 0.00019990574762809943, "loss": 3.235352039337158, "step": 2154, "token_acc": 0.2742665407610396 }, { "epoch": 1.2632659044268544, "grad_norm": 0.7023374739174322, "learning_rate": 0.00019990532645545274, "loss": 3.2108068466186523, "step": 2155, "token_acc": 0.27878084179970974 }, { "epoch": 1.2638522427440633, "grad_norm": 0.7291585526232633, "learning_rate": 0.00019990490434433012, "loss": 3.233272075653076, "step": 2156, "token_acc": 0.27409529052496 }, { "epoch": 1.2644385810612724, "grad_norm": 0.7176754275713474, "learning_rate": 0.00019990448129473558, "loss": 3.298462390899658, "step": 2157, "token_acc": 0.266765168235606 }, { "epoch": 1.2650249193784813, "grad_norm": 0.712373129786324, "learning_rate": 0.00019990405730667304, "loss": 3.269559383392334, "step": 2158, "token_acc": 0.2699186564516341 }, { "epoch": 1.2656112576956904, "grad_norm": 0.6114463624677939, "learning_rate": 0.0001999036323801465, "loss": 3.2140755653381348, "step": 2159, "token_acc": 0.2767617469399277 }, { "epoch": 1.2661975960128995, "grad_norm": 0.5830773296686367, "learning_rate": 0.00019990320651515998, "loss": 3.2144436836242676, "step": 2160, "token_acc": 0.2772574890816264 }, { "epoch": 1.2667839343301086, "grad_norm": 0.7750272980897177, "learning_rate": 0.00019990277971171746, "loss": 3.235363721847534, "step": 2161, "token_acc": 0.27473418425258866 }, { "epoch": 1.2673702726473175, "grad_norm": 0.7512707441949172, "learning_rate": 0.0001999023519698229, "loss": 3.2060887813568115, "step": 2162, "token_acc": 0.27986954920835305 }, { "epoch": 1.2679566109645266, "grad_norm": 0.6293976369808847, "learning_rate": 0.00019990192328948037, "loss": 3.269015312194824, "step": 2163, "token_acc": 0.2693025795368286 }, { "epoch": 1.2685429492817355, "grad_norm": 0.7412463955948765, "learning_rate": 0.00019990149367069392, "loss": 3.199249744415283, "step": 2164, "token_acc": 0.2781958390606435 }, { "epoch": 1.2691292875989446, "grad_norm": 0.5633395651180294, "learning_rate": 0.00019990106311346755, "loss": 3.1954383850097656, "step": 2165, "token_acc": 0.2825022938820282 }, { "epoch": 1.2697156259161537, "grad_norm": 0.663621156911283, "learning_rate": 0.00019990063161780532, "loss": 3.2284841537475586, "step": 2166, "token_acc": 0.2753045252798392 }, { "epoch": 1.2703019642333626, "grad_norm": 0.8266267092526703, "learning_rate": 0.00019990019918371123, "loss": 3.248342514038086, "step": 2167, "token_acc": 0.2741044207317073 }, { "epoch": 1.2708883025505717, "grad_norm": 0.7546182676163214, "learning_rate": 0.00019989976581118944, "loss": 3.263092517852783, "step": 2168, "token_acc": 0.2699587672382286 }, { "epoch": 1.2714746408677806, "grad_norm": 0.7404262312100173, "learning_rate": 0.00019989933150024394, "loss": 3.2622618675231934, "step": 2169, "token_acc": 0.27007631612986677 }, { "epoch": 1.2720609791849897, "grad_norm": 0.5533614220311185, "learning_rate": 0.00019989889625087883, "loss": 3.2398970127105713, "step": 2170, "token_acc": 0.27420291627181853 }, { "epoch": 1.2726473175021988, "grad_norm": 0.5833857254368834, "learning_rate": 0.00019989846006309822, "loss": 3.273425340652466, "step": 2171, "token_acc": 0.2690484607187183 }, { "epoch": 1.2732336558194077, "grad_norm": 0.5783378198681538, "learning_rate": 0.00019989802293690615, "loss": 3.267561674118042, "step": 2172, "token_acc": 0.27068126050831265 }, { "epoch": 1.2738199941366168, "grad_norm": 0.6052277147591019, "learning_rate": 0.00019989758487230682, "loss": 3.2454724311828613, "step": 2173, "token_acc": 0.274316649888388 }, { "epoch": 1.2744063324538257, "grad_norm": 0.7487010540600042, "learning_rate": 0.00019989714586930428, "loss": 3.2361464500427246, "step": 2174, "token_acc": 0.27505867716321397 }, { "epoch": 1.2749926707710348, "grad_norm": 0.683503620306193, "learning_rate": 0.00019989670592790267, "loss": 3.2607080936431885, "step": 2175, "token_acc": 0.27137833854745713 }, { "epoch": 1.275579009088244, "grad_norm": 0.4819795493747239, "learning_rate": 0.00019989626504810613, "loss": 3.2419040203094482, "step": 2176, "token_acc": 0.2741549246443665 }, { "epoch": 1.276165347405453, "grad_norm": 0.5235147978251798, "learning_rate": 0.00019989582322991876, "loss": 3.235147476196289, "step": 2177, "token_acc": 0.2746303321991518 }, { "epoch": 1.276751685722662, "grad_norm": 0.4527633688792298, "learning_rate": 0.00019989538047334476, "loss": 3.2353568077087402, "step": 2178, "token_acc": 0.27443259686864074 }, { "epoch": 1.277338024039871, "grad_norm": 0.6131312861860545, "learning_rate": 0.00019989493677838825, "loss": 3.222853183746338, "step": 2179, "token_acc": 0.2749989909725679 }, { "epoch": 1.27792436235708, "grad_norm": 0.5677673801830521, "learning_rate": 0.00019989449214505346, "loss": 3.2522151470184326, "step": 2180, "token_acc": 0.27095981002827463 }, { "epoch": 1.278510700674289, "grad_norm": 0.5601336655207524, "learning_rate": 0.00019989404657334449, "loss": 3.2426445484161377, "step": 2181, "token_acc": 0.2739189555273256 }, { "epoch": 1.2790970389914982, "grad_norm": 0.5945498272627211, "learning_rate": 0.00019989360006326558, "loss": 3.270915985107422, "step": 2182, "token_acc": 0.27023450880052824 }, { "epoch": 1.279683377308707, "grad_norm": 0.6923372350605109, "learning_rate": 0.00019989315261482093, "loss": 3.2347657680511475, "step": 2183, "token_acc": 0.2753687582340937 }, { "epoch": 1.2802697156259162, "grad_norm": 0.8223412916588332, "learning_rate": 0.00019989270422801468, "loss": 3.2032394409179688, "step": 2184, "token_acc": 0.27703133047923395 }, { "epoch": 1.280856053943125, "grad_norm": 1.052744822789687, "learning_rate": 0.0001998922549028511, "loss": 3.2600834369659424, "step": 2185, "token_acc": 0.2696017944204055 }, { "epoch": 1.2814423922603342, "grad_norm": 0.9838125937422946, "learning_rate": 0.00019989180463933436, "loss": 3.20468807220459, "step": 2186, "token_acc": 0.27884265872101655 }, { "epoch": 1.2820287305775433, "grad_norm": 0.8908789010185257, "learning_rate": 0.00019989135343746877, "loss": 3.2697415351867676, "step": 2187, "token_acc": 0.2686644022393985 }, { "epoch": 1.2826150688947524, "grad_norm": 1.0217860724860865, "learning_rate": 0.0001998909012972585, "loss": 3.2619426250457764, "step": 2188, "token_acc": 0.270508053897804 }, { "epoch": 1.2832014072119613, "grad_norm": 0.6154334343656209, "learning_rate": 0.0001998904482187078, "loss": 3.211843967437744, "step": 2189, "token_acc": 0.2774608888627537 }, { "epoch": 1.2837877455291704, "grad_norm": 0.6684831294288789, "learning_rate": 0.00019988999420182096, "loss": 3.234035015106201, "step": 2190, "token_acc": 0.2760094847890219 }, { "epoch": 1.2843740838463793, "grad_norm": 0.7719757671130388, "learning_rate": 0.0001998895392466022, "loss": 3.232257127761841, "step": 2191, "token_acc": 0.27577705805578384 }, { "epoch": 1.2849604221635884, "grad_norm": 0.723670775455715, "learning_rate": 0.00019988908335305587, "loss": 3.240888833999634, "step": 2192, "token_acc": 0.2752683200188974 }, { "epoch": 1.2855467604807975, "grad_norm": 0.8035035576830801, "learning_rate": 0.00019988862652118615, "loss": 3.2716798782348633, "step": 2193, "token_acc": 0.26834522381463477 }, { "epoch": 1.2861330987980064, "grad_norm": 0.6514107213947139, "learning_rate": 0.00019988816875099742, "loss": 3.2054567337036133, "step": 2194, "token_acc": 0.2789974576578349 }, { "epoch": 1.2867194371152155, "grad_norm": 0.6919988548748391, "learning_rate": 0.00019988771004249395, "loss": 3.234520196914673, "step": 2195, "token_acc": 0.2748395634490793 }, { "epoch": 1.2873057754324244, "grad_norm": 0.8253730982703739, "learning_rate": 0.00019988725039568004, "loss": 3.30932354927063, "step": 2196, "token_acc": 0.26580008168628916 }, { "epoch": 1.2878921137496335, "grad_norm": 0.6938011649814418, "learning_rate": 0.00019988678981056, "loss": 3.2249906063079834, "step": 2197, "token_acc": 0.2765212639921562 }, { "epoch": 1.2884784520668426, "grad_norm": 0.534500212770452, "learning_rate": 0.00019988632828713817, "loss": 3.259059429168701, "step": 2198, "token_acc": 0.27322731297967445 }, { "epoch": 1.2890647903840515, "grad_norm": 0.5421164903860508, "learning_rate": 0.00019988586582541892, "loss": 3.2708330154418945, "step": 2199, "token_acc": 0.27159299246282337 }, { "epoch": 1.2896511287012606, "grad_norm": 0.7825625360605715, "learning_rate": 0.00019988540242540652, "loss": 3.178575277328491, "step": 2200, "token_acc": 0.2823877524765213 }, { "epoch": 1.2902374670184695, "grad_norm": 0.7519683535990888, "learning_rate": 0.0001998849380871054, "loss": 3.2659499645233154, "step": 2201, "token_acc": 0.271232045986735 }, { "epoch": 1.2908238053356786, "grad_norm": 0.6429486171573856, "learning_rate": 0.00019988447281051982, "loss": 3.2065045833587646, "step": 2202, "token_acc": 0.2772391075439612 }, { "epoch": 1.2914101436528878, "grad_norm": 0.6309962708672037, "learning_rate": 0.00019988400659565425, "loss": 3.1658382415771484, "step": 2203, "token_acc": 0.28441528545119704 }, { "epoch": 1.2919964819700969, "grad_norm": 0.6632401044365421, "learning_rate": 0.00019988353944251302, "loss": 3.262974739074707, "step": 2204, "token_acc": 0.27275845896147405 }, { "epoch": 1.2925828202873058, "grad_norm": 0.7247508636439692, "learning_rate": 0.00019988307135110055, "loss": 3.2054362297058105, "step": 2205, "token_acc": 0.2795609575947167 }, { "epoch": 1.2931691586045149, "grad_norm": 0.7405059386418791, "learning_rate": 0.00019988260232142122, "loss": 3.1694211959838867, "step": 2206, "token_acc": 0.28214910316292363 }, { "epoch": 1.2937554969217238, "grad_norm": 0.6922066938209215, "learning_rate": 0.0001998821323534794, "loss": 3.2345194816589355, "step": 2207, "token_acc": 0.2743523647891383 }, { "epoch": 1.2943418352389329, "grad_norm": 0.7606908562672414, "learning_rate": 0.00019988166144727958, "loss": 3.233335018157959, "step": 2208, "token_acc": 0.27613305123384374 }, { "epoch": 1.294928173556142, "grad_norm": 0.6492810415886225, "learning_rate": 0.00019988118960282615, "loss": 3.2524380683898926, "step": 2209, "token_acc": 0.27368770134546144 }, { "epoch": 1.2955145118733509, "grad_norm": 0.5637800516502484, "learning_rate": 0.0001998807168201235, "loss": 3.2404277324676514, "step": 2210, "token_acc": 0.27279534934691513 }, { "epoch": 1.29610085019056, "grad_norm": 0.6166664962410506, "learning_rate": 0.0001998802430991761, "loss": 3.2613275051116943, "step": 2211, "token_acc": 0.2733495702928017 }, { "epoch": 1.2966871885077689, "grad_norm": 0.676391139608334, "learning_rate": 0.00019987976843998843, "loss": 3.2212557792663574, "step": 2212, "token_acc": 0.27698590788507216 }, { "epoch": 1.297273526824978, "grad_norm": 0.6862026576124307, "learning_rate": 0.00019987929284256492, "loss": 3.2168734073638916, "step": 2213, "token_acc": 0.2778010260225353 }, { "epoch": 1.297859865142187, "grad_norm": 0.7131653085970625, "learning_rate": 0.00019987881630691005, "loss": 3.236105442047119, "step": 2214, "token_acc": 0.27362127759839877 }, { "epoch": 1.2984462034593962, "grad_norm": 0.7137339920348341, "learning_rate": 0.00019987833883302827, "loss": 3.2486140727996826, "step": 2215, "token_acc": 0.27384627721422555 }, { "epoch": 1.299032541776605, "grad_norm": 0.7174074078760778, "learning_rate": 0.00019987786042092412, "loss": 3.2277421951293945, "step": 2216, "token_acc": 0.2757485193479669 }, { "epoch": 1.2996188800938142, "grad_norm": 0.5986700207760487, "learning_rate": 0.00019987738107060202, "loss": 3.2506535053253174, "step": 2217, "token_acc": 0.27179324956204826 }, { "epoch": 1.300205218411023, "grad_norm": 0.9365846432662207, "learning_rate": 0.00019987690078206654, "loss": 3.238294839859009, "step": 2218, "token_acc": 0.2738704964148108 }, { "epoch": 1.3007915567282322, "grad_norm": 0.8365571196498061, "learning_rate": 0.00019987641955532216, "loss": 3.2023885250091553, "step": 2219, "token_acc": 0.27832308986219834 }, { "epoch": 1.3013778950454413, "grad_norm": 0.5952536669010751, "learning_rate": 0.00019987593739037338, "loss": 3.202956199645996, "step": 2220, "token_acc": 0.27807432958894485 }, { "epoch": 1.3019642333626502, "grad_norm": 0.6546631752159368, "learning_rate": 0.00019987545428722477, "loss": 3.2611093521118164, "step": 2221, "token_acc": 0.27222647648774906 }, { "epoch": 1.3025505716798593, "grad_norm": 0.5203480196727166, "learning_rate": 0.00019987497024588085, "loss": 3.208035945892334, "step": 2222, "token_acc": 0.27859294453760636 }, { "epoch": 1.3031369099970682, "grad_norm": 0.691001425135421, "learning_rate": 0.00019987448526634615, "loss": 3.2042624950408936, "step": 2223, "token_acc": 0.2787840675272461 }, { "epoch": 1.3037232483142773, "grad_norm": 0.6866352526769937, "learning_rate": 0.00019987399934862524, "loss": 3.217952251434326, "step": 2224, "token_acc": 0.27749061270317077 }, { "epoch": 1.3043095866314864, "grad_norm": 0.6644913935534694, "learning_rate": 0.00019987351249272273, "loss": 3.2209808826446533, "step": 2225, "token_acc": 0.27552274296424983 }, { "epoch": 1.3048959249486953, "grad_norm": 0.4563799464246214, "learning_rate": 0.00019987302469864313, "loss": 3.2819745540618896, "step": 2226, "token_acc": 0.2690673817288386 }, { "epoch": 1.3054822632659044, "grad_norm": 0.6332059547099614, "learning_rate": 0.00019987253596639103, "loss": 3.245044231414795, "step": 2227, "token_acc": 0.27424212759452116 }, { "epoch": 1.3060686015831133, "grad_norm": 0.6401487888261346, "learning_rate": 0.00019987204629597107, "loss": 3.211263656616211, "step": 2228, "token_acc": 0.27881904752375686 }, { "epoch": 1.3066549399003224, "grad_norm": 0.5456118423587977, "learning_rate": 0.00019987155568738777, "loss": 3.1943864822387695, "step": 2229, "token_acc": 0.28030357454407373 }, { "epoch": 1.3072412782175316, "grad_norm": 0.5887965883742874, "learning_rate": 0.00019987106414064582, "loss": 3.211409568786621, "step": 2230, "token_acc": 0.2771836961832181 }, { "epoch": 1.3078276165347407, "grad_norm": 0.7659352395773921, "learning_rate": 0.00019987057165574975, "loss": 3.222933769226074, "step": 2231, "token_acc": 0.2776629356776802 }, { "epoch": 1.3084139548519496, "grad_norm": 0.8456032469825049, "learning_rate": 0.0001998700782327043, "loss": 3.2683181762695312, "step": 2232, "token_acc": 0.2706966732297522 }, { "epoch": 1.3090002931691587, "grad_norm": 0.9133910492023074, "learning_rate": 0.00019986958387151402, "loss": 3.2389771938323975, "step": 2233, "token_acc": 0.27440895604793214 }, { "epoch": 1.3095866314863676, "grad_norm": 0.8930941498191963, "learning_rate": 0.00019986908857218353, "loss": 3.2202579975128174, "step": 2234, "token_acc": 0.2770410053726919 }, { "epoch": 1.3101729698035767, "grad_norm": 0.5511377077731109, "learning_rate": 0.00019986859233471756, "loss": 3.2436373233795166, "step": 2235, "token_acc": 0.27343714568499344 }, { "epoch": 1.3107593081207858, "grad_norm": 0.6617750902178912, "learning_rate": 0.00019986809515912078, "loss": 3.2722220420837402, "step": 2236, "token_acc": 0.2708851563178634 }, { "epoch": 1.3113456464379947, "grad_norm": 1.009080456548298, "learning_rate": 0.00019986759704539777, "loss": 3.2702579498291016, "step": 2237, "token_acc": 0.26745342785637244 }, { "epoch": 1.3119319847552038, "grad_norm": 0.8767344557309221, "learning_rate": 0.0001998670979935533, "loss": 3.2472572326660156, "step": 2238, "token_acc": 0.27203619909502263 }, { "epoch": 1.3125183230724127, "grad_norm": 0.6228109132234975, "learning_rate": 0.00019986659800359197, "loss": 3.261760711669922, "step": 2239, "token_acc": 0.2709176576406974 }, { "epoch": 1.3131046613896218, "grad_norm": 0.765110871533862, "learning_rate": 0.00019986609707551856, "loss": 3.208376407623291, "step": 2240, "token_acc": 0.2765927422911097 }, { "epoch": 1.313690999706831, "grad_norm": 0.8750214227254173, "learning_rate": 0.0001998655952093377, "loss": 3.183520793914795, "step": 2241, "token_acc": 0.28137957899160954 }, { "epoch": 1.31427733802404, "grad_norm": 0.80459460675569, "learning_rate": 0.00019986509240505417, "loss": 3.2631258964538574, "step": 2242, "token_acc": 0.2708663223753606 }, { "epoch": 1.314863676341249, "grad_norm": 0.5970988188248154, "learning_rate": 0.00019986458866267268, "loss": 3.2179551124572754, "step": 2243, "token_acc": 0.27717829783674536 }, { "epoch": 1.315450014658458, "grad_norm": 0.6176680112576002, "learning_rate": 0.00019986408398219791, "loss": 3.232510566711426, "step": 2244, "token_acc": 0.2741283894483491 }, { "epoch": 1.316036352975667, "grad_norm": 0.700601417333807, "learning_rate": 0.00019986357836363467, "loss": 3.192729949951172, "step": 2245, "token_acc": 0.2806099774578204 }, { "epoch": 1.316622691292876, "grad_norm": 0.5887891281797769, "learning_rate": 0.00019986307180698768, "loss": 3.2269630432128906, "step": 2246, "token_acc": 0.2763050440454765 }, { "epoch": 1.3172090296100851, "grad_norm": 0.689922146390308, "learning_rate": 0.0001998625643122617, "loss": 3.225935935974121, "step": 2247, "token_acc": 0.27600047096377506 }, { "epoch": 1.317795367927294, "grad_norm": 0.6515404401005768, "learning_rate": 0.00019986205587946146, "loss": 3.2339959144592285, "step": 2248, "token_acc": 0.27438105406292734 }, { "epoch": 1.3183817062445031, "grad_norm": 0.5004332671999709, "learning_rate": 0.00019986154650859182, "loss": 3.197260856628418, "step": 2249, "token_acc": 0.27783522204887656 }, { "epoch": 1.318968044561712, "grad_norm": 0.5869524509361221, "learning_rate": 0.0001998610361996575, "loss": 3.2309985160827637, "step": 2250, "token_acc": 0.2741105999591882 }, { "epoch": 1.3195543828789211, "grad_norm": 0.7122138470828963, "learning_rate": 0.00019986052495266326, "loss": 3.1646969318389893, "step": 2251, "token_acc": 0.28456891106107973 }, { "epoch": 1.3201407211961302, "grad_norm": 0.6227812834177969, "learning_rate": 0.000199860012767614, "loss": 3.279928684234619, "step": 2252, "token_acc": 0.26896610901360735 }, { "epoch": 1.3207270595133391, "grad_norm": 0.6328106520193089, "learning_rate": 0.0001998594996445145, "loss": 3.2405104637145996, "step": 2253, "token_acc": 0.27473872467040533 }, { "epoch": 1.3213133978305482, "grad_norm": 0.6028168797756295, "learning_rate": 0.00019985898558336953, "loss": 3.237025499343872, "step": 2254, "token_acc": 0.27497195012795783 }, { "epoch": 1.3218997361477571, "grad_norm": 0.6781529378114041, "learning_rate": 0.00019985847058418395, "loss": 3.2425265312194824, "step": 2255, "token_acc": 0.2728344552643618 }, { "epoch": 1.3224860744649662, "grad_norm": 0.6709203509667832, "learning_rate": 0.00019985795464696262, "loss": 3.250643014907837, "step": 2256, "token_acc": 0.27371561223096397 }, { "epoch": 1.3230724127821754, "grad_norm": 0.6159761443485521, "learning_rate": 0.00019985743777171036, "loss": 3.1767172813415527, "step": 2257, "token_acc": 0.28017775419513163 }, { "epoch": 1.3236587510993845, "grad_norm": 0.827291773636289, "learning_rate": 0.00019985691995843202, "loss": 3.199859857559204, "step": 2258, "token_acc": 0.27957185621610464 }, { "epoch": 1.3242450894165934, "grad_norm": 0.5843705585078939, "learning_rate": 0.0001998564012071325, "loss": 3.253561496734619, "step": 2259, "token_acc": 0.27299517032981224 }, { "epoch": 1.3248314277338025, "grad_norm": 0.7190081360302373, "learning_rate": 0.0001998558815178166, "loss": 3.2165870666503906, "step": 2260, "token_acc": 0.27615683998320906 }, { "epoch": 1.3254177660510114, "grad_norm": 0.8784520031631571, "learning_rate": 0.00019985536089048932, "loss": 3.26008939743042, "step": 2261, "token_acc": 0.27026334489490245 }, { "epoch": 1.3260041043682205, "grad_norm": 0.7390322447560675, "learning_rate": 0.0001998548393251554, "loss": 3.2137928009033203, "step": 2262, "token_acc": 0.2756235250566841 }, { "epoch": 1.3265904426854296, "grad_norm": 0.5443228917546807, "learning_rate": 0.00019985431682181988, "loss": 3.201265335083008, "step": 2263, "token_acc": 0.2792324227966235 }, { "epoch": 1.3271767810026385, "grad_norm": 0.5575561333066834, "learning_rate": 0.0001998537933804876, "loss": 3.19649076461792, "step": 2264, "token_acc": 0.28025750505624664 }, { "epoch": 1.3277631193198476, "grad_norm": 0.48236433167077003, "learning_rate": 0.00019985326900116347, "loss": 3.2390646934509277, "step": 2265, "token_acc": 0.27410909127562105 }, { "epoch": 1.3283494576370565, "grad_norm": 0.5218286509775047, "learning_rate": 0.00019985274368385244, "loss": 3.232114553451538, "step": 2266, "token_acc": 0.2740335892048921 }, { "epoch": 1.3289357959542656, "grad_norm": 0.5376447609392558, "learning_rate": 0.00019985221742855943, "loss": 3.213789939880371, "step": 2267, "token_acc": 0.2782256401797864 }, { "epoch": 1.3295221342714747, "grad_norm": 0.5982102119809181, "learning_rate": 0.0001998516902352894, "loss": 3.208268404006958, "step": 2268, "token_acc": 0.27627221298694604 }, { "epoch": 1.3301084725886836, "grad_norm": 0.5571299573171761, "learning_rate": 0.0001998511621040473, "loss": 3.2628281116485596, "step": 2269, "token_acc": 0.27125648071476416 }, { "epoch": 1.3306948109058927, "grad_norm": 0.6002613267922414, "learning_rate": 0.0001998506330348381, "loss": 3.192324161529541, "step": 2270, "token_acc": 0.2790401160982689 }, { "epoch": 1.3312811492231018, "grad_norm": 0.6672970821280866, "learning_rate": 0.00019985010302766673, "loss": 3.2170486450195312, "step": 2271, "token_acc": 0.2753741905854105 }, { "epoch": 1.3318674875403107, "grad_norm": 0.6512075256222587, "learning_rate": 0.00019984957208253815, "loss": 3.2053141593933105, "step": 2272, "token_acc": 0.27761073667795305 }, { "epoch": 1.3324538258575198, "grad_norm": 0.526392866128523, "learning_rate": 0.00019984904019945744, "loss": 3.2353649139404297, "step": 2273, "token_acc": 0.27385611882231153 }, { "epoch": 1.333040164174729, "grad_norm": 0.5005279643343734, "learning_rate": 0.00019984850737842953, "loss": 3.2453670501708984, "step": 2274, "token_acc": 0.2743985618357743 }, { "epoch": 1.3336265024919378, "grad_norm": 0.5653080210755295, "learning_rate": 0.00019984797361945943, "loss": 3.2648744583129883, "step": 2275, "token_acc": 0.2692078742081471 }, { "epoch": 1.334212840809147, "grad_norm": 0.6128537822142958, "learning_rate": 0.0001998474389225522, "loss": 3.2634871006011963, "step": 2276, "token_acc": 0.2705781708585041 }, { "epoch": 1.3347991791263558, "grad_norm": 0.543765648879881, "learning_rate": 0.0001998469032877128, "loss": 3.234138250350952, "step": 2277, "token_acc": 0.27253592501906637 }, { "epoch": 1.335385517443565, "grad_norm": 0.6509227426809466, "learning_rate": 0.0001998463667149463, "loss": 3.158964157104492, "step": 2278, "token_acc": 0.28470620575905436 }, { "epoch": 1.335971855760774, "grad_norm": 0.6258121495301059, "learning_rate": 0.00019984582920425773, "loss": 3.2377443313598633, "step": 2279, "token_acc": 0.272644602264654 }, { "epoch": 1.336558194077983, "grad_norm": 0.5524376756774653, "learning_rate": 0.00019984529075565212, "loss": 3.2220206260681152, "step": 2280, "token_acc": 0.27683494164872263 }, { "epoch": 1.337144532395192, "grad_norm": 0.594201190056979, "learning_rate": 0.00019984475136913457, "loss": 3.2362303733825684, "step": 2281, "token_acc": 0.2747722914353091 }, { "epoch": 1.337730870712401, "grad_norm": 0.6699420353678947, "learning_rate": 0.00019984421104471013, "loss": 3.1627087593078613, "step": 2282, "token_acc": 0.28453556234677063 }, { "epoch": 1.33831720902961, "grad_norm": 0.5502641986777778, "learning_rate": 0.00019984366978238383, "loss": 3.1949961185455322, "step": 2283, "token_acc": 0.27853903232527205 }, { "epoch": 1.3389035473468192, "grad_norm": 0.543135943297599, "learning_rate": 0.0001998431275821608, "loss": 3.2371275424957275, "step": 2284, "token_acc": 0.2733758035044426 }, { "epoch": 1.3394898856640283, "grad_norm": 0.6619079985080465, "learning_rate": 0.00019984258444404615, "loss": 3.233591079711914, "step": 2285, "token_acc": 0.2740162125326743 }, { "epoch": 1.3400762239812372, "grad_norm": 0.5913094287642736, "learning_rate": 0.00019984204036804494, "loss": 3.328991174697876, "step": 2286, "token_acc": 0.26215950066267335 }, { "epoch": 1.3406625622984463, "grad_norm": 0.6062428982943917, "learning_rate": 0.0001998414953541623, "loss": 3.1826672554016113, "step": 2287, "token_acc": 0.28090535346045103 }, { "epoch": 1.3412489006156552, "grad_norm": 0.7284424605106146, "learning_rate": 0.00019984094940240338, "loss": 3.25720477104187, "step": 2288, "token_acc": 0.26918541280909797 }, { "epoch": 1.3418352389328643, "grad_norm": 0.7718300280624945, "learning_rate": 0.00019984040251277329, "loss": 3.2442948818206787, "step": 2289, "token_acc": 0.27410718749259994 }, { "epoch": 1.3424215772500734, "grad_norm": 0.807390073401052, "learning_rate": 0.00019983985468527708, "loss": 3.2178585529327393, "step": 2290, "token_acc": 0.2757340080535077 }, { "epoch": 1.3430079155672823, "grad_norm": 0.6404755554627158, "learning_rate": 0.00019983930591992, "loss": 3.2205185890197754, "step": 2291, "token_acc": 0.27700200160759036 }, { "epoch": 1.3435942538844914, "grad_norm": 0.553621751543449, "learning_rate": 0.00019983875621670723, "loss": 3.2219204902648926, "step": 2292, "token_acc": 0.2742485572707437 }, { "epoch": 1.3441805922017003, "grad_norm": 0.7061550096923472, "learning_rate": 0.0001998382055756438, "loss": 3.2496695518493652, "step": 2293, "token_acc": 0.2730970952124798 }, { "epoch": 1.3447669305189094, "grad_norm": 0.6849586960660874, "learning_rate": 0.000199837653996735, "loss": 3.274606466293335, "step": 2294, "token_acc": 0.26955158341042634 }, { "epoch": 1.3453532688361185, "grad_norm": 0.7289622826884206, "learning_rate": 0.00019983710147998597, "loss": 3.163226366043091, "step": 2295, "token_acc": 0.28403002514659875 }, { "epoch": 1.3459396071533274, "grad_norm": 0.7688357287609365, "learning_rate": 0.00019983654802540194, "loss": 3.2314798831939697, "step": 2296, "token_acc": 0.2742084156658738 }, { "epoch": 1.3465259454705365, "grad_norm": 0.5642437501537321, "learning_rate": 0.00019983599363298802, "loss": 3.2372398376464844, "step": 2297, "token_acc": 0.27441326111931724 }, { "epoch": 1.3471122837877456, "grad_norm": 0.6241511696739673, "learning_rate": 0.00019983543830274952, "loss": 3.2144174575805664, "step": 2298, "token_acc": 0.2769318829278027 }, { "epoch": 1.3476986221049545, "grad_norm": 0.5972181630469828, "learning_rate": 0.00019983488203469157, "loss": 3.274454116821289, "step": 2299, "token_acc": 0.26843078732784215 }, { "epoch": 1.3482849604221636, "grad_norm": 0.5406282947798213, "learning_rate": 0.00019983432482881944, "loss": 3.2105770111083984, "step": 2300, "token_acc": 0.27812797117207977 }, { "epoch": 1.3488712987393727, "grad_norm": 0.5522460594731188, "learning_rate": 0.00019983376668513835, "loss": 3.185898780822754, "step": 2301, "token_acc": 0.2811006360565548 }, { "epoch": 1.3494576370565816, "grad_norm": 0.520622267451164, "learning_rate": 0.00019983320760365357, "loss": 3.185917615890503, "step": 2302, "token_acc": 0.278965231810255 }, { "epoch": 1.3500439753737907, "grad_norm": 0.6353993616355507, "learning_rate": 0.00019983264758437032, "loss": 3.2158405780792236, "step": 2303, "token_acc": 0.27638747397794217 }, { "epoch": 1.3506303136909996, "grad_norm": 0.8916374222969062, "learning_rate": 0.00019983208662729385, "loss": 3.200443744659424, "step": 2304, "token_acc": 0.2782498921532562 }, { "epoch": 1.3512166520082087, "grad_norm": 0.8026032583630455, "learning_rate": 0.00019983152473242947, "loss": 3.260380268096924, "step": 2305, "token_acc": 0.27103291479357206 }, { "epoch": 1.3518029903254178, "grad_norm": 0.6341304989208912, "learning_rate": 0.00019983096189978244, "loss": 3.1936960220336914, "step": 2306, "token_acc": 0.28008496663886573 }, { "epoch": 1.3523893286426267, "grad_norm": 0.584122256168139, "learning_rate": 0.00019983039812935806, "loss": 3.2230868339538574, "step": 2307, "token_acc": 0.2768247829597217 }, { "epoch": 1.3529756669598358, "grad_norm": 0.6495324025482342, "learning_rate": 0.0001998298334211616, "loss": 3.30298113822937, "step": 2308, "token_acc": 0.2657771204253706 }, { "epoch": 1.3535620052770447, "grad_norm": 0.5313045079574161, "learning_rate": 0.00019982926777519836, "loss": 3.273165702819824, "step": 2309, "token_acc": 0.26882349484167056 }, { "epoch": 1.3541483435942538, "grad_norm": 0.5889348974434815, "learning_rate": 0.0001998287011914737, "loss": 3.207526922225952, "step": 2310, "token_acc": 0.2781034070731651 }, { "epoch": 1.354734681911463, "grad_norm": 0.6029770858900678, "learning_rate": 0.00019982813366999295, "loss": 3.217254638671875, "step": 2311, "token_acc": 0.2779865550355765 }, { "epoch": 1.355321020228672, "grad_norm": 0.4482869287343374, "learning_rate": 0.00019982756521076133, "loss": 3.1985254287719727, "step": 2312, "token_acc": 0.27998129403376787 }, { "epoch": 1.355907358545881, "grad_norm": 0.5913790783237538, "learning_rate": 0.00019982699581378428, "loss": 3.203127145767212, "step": 2313, "token_acc": 0.2761919782722597 }, { "epoch": 1.35649369686309, "grad_norm": 0.5958282437330858, "learning_rate": 0.0001998264254790671, "loss": 3.253300189971924, "step": 2314, "token_acc": 0.2701720766009949 }, { "epoch": 1.357080035180299, "grad_norm": 0.4361716900632453, "learning_rate": 0.0001998258542066152, "loss": 3.1949198246002197, "step": 2315, "token_acc": 0.27966022329485196 }, { "epoch": 1.357666373497508, "grad_norm": 0.5560691823152235, "learning_rate": 0.00019982528199643393, "loss": 3.215174674987793, "step": 2316, "token_acc": 0.2759738021371941 }, { "epoch": 1.3582527118147172, "grad_norm": 0.6014850157988356, "learning_rate": 0.00019982470884852865, "loss": 3.2045233249664307, "step": 2317, "token_acc": 0.276458151314626 }, { "epoch": 1.358839050131926, "grad_norm": 0.5856864457684887, "learning_rate": 0.00019982413476290472, "loss": 3.2175469398498535, "step": 2318, "token_acc": 0.27388324415318943 }, { "epoch": 1.3594253884491352, "grad_norm": 0.5750138873305165, "learning_rate": 0.00019982355973956756, "loss": 3.2004001140594482, "step": 2319, "token_acc": 0.27929333692045555 }, { "epoch": 1.360011726766344, "grad_norm": 0.8072528462993173, "learning_rate": 0.00019982298377852257, "loss": 3.1702215671539307, "step": 2320, "token_acc": 0.28318932990754014 }, { "epoch": 1.3605980650835532, "grad_norm": 0.7485061521492183, "learning_rate": 0.00019982240687977518, "loss": 3.254049301147461, "step": 2321, "token_acc": 0.27151328816823866 }, { "epoch": 1.3611844034007623, "grad_norm": 0.737655221888409, "learning_rate": 0.00019982182904333077, "loss": 3.223850727081299, "step": 2322, "token_acc": 0.27522641618527494 }, { "epoch": 1.3617707417179712, "grad_norm": 0.6708598417714791, "learning_rate": 0.0001998212502691948, "loss": 3.2333619594573975, "step": 2323, "token_acc": 0.27585207675678725 }, { "epoch": 1.3623570800351803, "grad_norm": 0.5251337659452316, "learning_rate": 0.00019982067055737267, "loss": 3.2231106758117676, "step": 2324, "token_acc": 0.275502283707791 }, { "epoch": 1.3629434183523892, "grad_norm": 0.5057244774599616, "learning_rate": 0.00019982008990786988, "loss": 3.2434182167053223, "step": 2325, "token_acc": 0.2733519512015874 }, { "epoch": 1.3635297566695983, "grad_norm": 0.6742115577405382, "learning_rate": 0.00019981950832069186, "loss": 3.2518272399902344, "step": 2326, "token_acc": 0.2722143936349346 }, { "epoch": 1.3641160949868074, "grad_norm": 0.7967552228871582, "learning_rate": 0.00019981892579584406, "loss": 3.2046470642089844, "step": 2327, "token_acc": 0.2771851183075018 }, { "epoch": 1.3647024333040165, "grad_norm": 0.6365444965383319, "learning_rate": 0.00019981834233333197, "loss": 3.259977340698242, "step": 2328, "token_acc": 0.26872736003123754 }, { "epoch": 1.3652887716212254, "grad_norm": 0.5333450647854192, "learning_rate": 0.00019981775793316103, "loss": 3.192427396774292, "step": 2329, "token_acc": 0.279133975670701 }, { "epoch": 1.3658751099384345, "grad_norm": 0.5252276987943667, "learning_rate": 0.00019981717259533676, "loss": 3.23946475982666, "step": 2330, "token_acc": 0.27350827744334877 }, { "epoch": 1.3664614482556434, "grad_norm": 0.5863381450647908, "learning_rate": 0.0001998165863198647, "loss": 3.1982219219207764, "step": 2331, "token_acc": 0.27799400090831317 }, { "epoch": 1.3670477865728525, "grad_norm": 0.6108867726281499, "learning_rate": 0.00019981599910675026, "loss": 3.2414894104003906, "step": 2332, "token_acc": 0.2736137221125383 }, { "epoch": 1.3676341248900616, "grad_norm": 0.7113411580867973, "learning_rate": 0.00019981541095599905, "loss": 3.242356300354004, "step": 2333, "token_acc": 0.27478681788759746 }, { "epoch": 1.3682204632072705, "grad_norm": 0.5791722020006749, "learning_rate": 0.00019981482186761656, "loss": 3.2137610912323, "step": 2334, "token_acc": 0.27603771301583113 }, { "epoch": 1.3688068015244796, "grad_norm": 0.5424069615258724, "learning_rate": 0.0001998142318416083, "loss": 3.1894490718841553, "step": 2335, "token_acc": 0.279772065405126 }, { "epoch": 1.3693931398416885, "grad_norm": 0.5035732296910779, "learning_rate": 0.00019981364087797986, "loss": 3.1840128898620605, "step": 2336, "token_acc": 0.2813259961761568 }, { "epoch": 1.3699794781588976, "grad_norm": 0.6264023162641467, "learning_rate": 0.00019981304897673675, "loss": 3.2200043201446533, "step": 2337, "token_acc": 0.27721157032861726 }, { "epoch": 1.3705658164761068, "grad_norm": 0.6514884948140748, "learning_rate": 0.00019981245613788452, "loss": 3.2242660522460938, "step": 2338, "token_acc": 0.2758242906170619 }, { "epoch": 1.3711521547933159, "grad_norm": 0.5291656236228282, "learning_rate": 0.00019981186236142878, "loss": 3.199312686920166, "step": 2339, "token_acc": 0.2786369761920041 }, { "epoch": 1.3717384931105248, "grad_norm": 0.4565481505770869, "learning_rate": 0.0001998112676473751, "loss": 3.2137539386749268, "step": 2340, "token_acc": 0.27764540774914515 }, { "epoch": 1.3723248314277339, "grad_norm": 0.5558804257886234, "learning_rate": 0.00019981067199572908, "loss": 3.249399185180664, "step": 2341, "token_acc": 0.27489022621241227 }, { "epoch": 1.3729111697449428, "grad_norm": 0.7062373053707898, "learning_rate": 0.00019981007540649626, "loss": 3.239898681640625, "step": 2342, "token_acc": 0.2759300705867622 }, { "epoch": 1.3734975080621519, "grad_norm": 0.8363165494654855, "learning_rate": 0.00019980947787968226, "loss": 3.2085115909576416, "step": 2343, "token_acc": 0.2780588166091669 }, { "epoch": 1.374083846379361, "grad_norm": 0.6247519410570893, "learning_rate": 0.00019980887941529273, "loss": 3.2052500247955322, "step": 2344, "token_acc": 0.27813829500780873 }, { "epoch": 1.3746701846965699, "grad_norm": 0.5549906609830094, "learning_rate": 0.0001998082800133333, "loss": 3.275214195251465, "step": 2345, "token_acc": 0.2680938476347191 }, { "epoch": 1.375256523013779, "grad_norm": 0.683637149844554, "learning_rate": 0.00019980767967380954, "loss": 3.1837263107299805, "step": 2346, "token_acc": 0.28113576082953984 }, { "epoch": 1.3758428613309879, "grad_norm": 0.6688027626371712, "learning_rate": 0.00019980707839672713, "loss": 3.220227003097534, "step": 2347, "token_acc": 0.27730263331451516 }, { "epoch": 1.376429199648197, "grad_norm": 0.6017286270436637, "learning_rate": 0.00019980647618209171, "loss": 3.2132978439331055, "step": 2348, "token_acc": 0.27634790503912193 }, { "epoch": 1.377015537965406, "grad_norm": 0.4831958397930829, "learning_rate": 0.00019980587302990895, "loss": 3.2312119007110596, "step": 2349, "token_acc": 0.27301190201642417 }, { "epoch": 1.377601876282615, "grad_norm": 0.5930291861968073, "learning_rate": 0.0001998052689401845, "loss": 3.1645560264587402, "step": 2350, "token_acc": 0.2840330780360248 }, { "epoch": 1.378188214599824, "grad_norm": 0.5622695346890244, "learning_rate": 0.00019980466391292403, "loss": 3.2629756927490234, "step": 2351, "token_acc": 0.2710935200370043 }, { "epoch": 1.378774552917033, "grad_norm": 0.6856628267589485, "learning_rate": 0.0001998040579481332, "loss": 3.2333879470825195, "step": 2352, "token_acc": 0.27330692104856597 }, { "epoch": 1.379360891234242, "grad_norm": 0.6863675546101641, "learning_rate": 0.00019980345104581777, "loss": 3.2529211044311523, "step": 2353, "token_acc": 0.2715952691528902 }, { "epoch": 1.3799472295514512, "grad_norm": 0.6899547873172952, "learning_rate": 0.0001998028432059834, "loss": 3.2395944595336914, "step": 2354, "token_acc": 0.2726540005264743 }, { "epoch": 1.3805335678686603, "grad_norm": 0.6177271704604851, "learning_rate": 0.0001998022344286358, "loss": 3.229825973510742, "step": 2355, "token_acc": 0.2758487321610102 }, { "epoch": 1.3811199061858692, "grad_norm": 0.623916065446246, "learning_rate": 0.0001998016247137807, "loss": 3.2398109436035156, "step": 2356, "token_acc": 0.2735699090934939 }, { "epoch": 1.3817062445030783, "grad_norm": 0.567433224816844, "learning_rate": 0.0001998010140614238, "loss": 3.215573787689209, "step": 2357, "token_acc": 0.27671625281107 }, { "epoch": 1.3822925828202872, "grad_norm": 0.6598461412279835, "learning_rate": 0.00019980040247157091, "loss": 3.1701865196228027, "step": 2358, "token_acc": 0.28190160952444854 }, { "epoch": 1.3828789211374963, "grad_norm": 0.6705453813496319, "learning_rate": 0.00019979978994422767, "loss": 3.2331960201263428, "step": 2359, "token_acc": 0.2736695959484404 }, { "epoch": 1.3834652594547054, "grad_norm": 0.6921995770859319, "learning_rate": 0.00019979917647939988, "loss": 3.2129034996032715, "step": 2360, "token_acc": 0.27447688934007985 }, { "epoch": 1.3840515977719143, "grad_norm": 0.6961748960749062, "learning_rate": 0.00019979856207709334, "loss": 3.2598257064819336, "step": 2361, "token_acc": 0.27075521005119985 }, { "epoch": 1.3846379360891234, "grad_norm": 0.6664236138689095, "learning_rate": 0.00019979794673731375, "loss": 3.2386069297790527, "step": 2362, "token_acc": 0.27316250203976356 }, { "epoch": 1.3852242744063323, "grad_norm": 0.5350056839982936, "learning_rate": 0.00019979733046006696, "loss": 3.2003543376922607, "step": 2363, "token_acc": 0.27673492378530756 }, { "epoch": 1.3858106127235414, "grad_norm": 0.5791184182126338, "learning_rate": 0.00019979671324535874, "loss": 3.2376790046691895, "step": 2364, "token_acc": 0.27297032744464056 }, { "epoch": 1.3863969510407506, "grad_norm": 0.5941116777472292, "learning_rate": 0.00019979609509319487, "loss": 3.2144241333007812, "step": 2365, "token_acc": 0.2756934051551069 }, { "epoch": 1.3869832893579597, "grad_norm": 0.5665023755124522, "learning_rate": 0.00019979547600358115, "loss": 3.249159336090088, "step": 2366, "token_acc": 0.2718142607769924 }, { "epoch": 1.3875696276751686, "grad_norm": 0.5406878436917767, "learning_rate": 0.0001997948559765234, "loss": 3.207214832305908, "step": 2367, "token_acc": 0.2771076390537852 }, { "epoch": 1.3881559659923777, "grad_norm": 0.6048104840838411, "learning_rate": 0.00019979423501202746, "loss": 3.2486190795898438, "step": 2368, "token_acc": 0.2729608237791709 }, { "epoch": 1.3887423043095866, "grad_norm": 0.6415322916823561, "learning_rate": 0.00019979361311009918, "loss": 3.2393012046813965, "step": 2369, "token_acc": 0.2724976393447826 }, { "epoch": 1.3893286426267957, "grad_norm": 0.7798264213529195, "learning_rate": 0.00019979299027074435, "loss": 3.228355884552002, "step": 2370, "token_acc": 0.27472287316675004 }, { "epoch": 1.3899149809440048, "grad_norm": 0.714062649363242, "learning_rate": 0.0001997923664939689, "loss": 3.142852783203125, "step": 2371, "token_acc": 0.2855816119518031 }, { "epoch": 1.3905013192612137, "grad_norm": 0.4872556186733384, "learning_rate": 0.00019979174177977858, "loss": 3.2286200523376465, "step": 2372, "token_acc": 0.2735760971055089 }, { "epoch": 1.3910876575784228, "grad_norm": 0.518015842590596, "learning_rate": 0.00019979111612817934, "loss": 3.178013801574707, "step": 2373, "token_acc": 0.27998639617031584 }, { "epoch": 1.3916739958956317, "grad_norm": 0.6617056547679382, "learning_rate": 0.00019979048953917703, "loss": 3.2063825130462646, "step": 2374, "token_acc": 0.2755368359051696 }, { "epoch": 1.3922603342128408, "grad_norm": 0.5868555124254369, "learning_rate": 0.00019978986201277753, "loss": 3.2318530082702637, "step": 2375, "token_acc": 0.2750218121213267 }, { "epoch": 1.39284667253005, "grad_norm": 0.6275465511085035, "learning_rate": 0.00019978923354898678, "loss": 3.2033700942993164, "step": 2376, "token_acc": 0.2784625454430749 }, { "epoch": 1.3934330108472588, "grad_norm": 0.7537940168930265, "learning_rate": 0.00019978860414781061, "loss": 3.250974655151367, "step": 2377, "token_acc": 0.27068091131419747 }, { "epoch": 1.394019349164468, "grad_norm": 0.7391654192718113, "learning_rate": 0.000199787973809255, "loss": 3.231107234954834, "step": 2378, "token_acc": 0.2753643515957184 }, { "epoch": 1.3946056874816768, "grad_norm": 0.7801532073842946, "learning_rate": 0.00019978734253332583, "loss": 3.2028980255126953, "step": 2379, "token_acc": 0.2796999285299275 }, { "epoch": 1.395192025798886, "grad_norm": 0.8112482551412615, "learning_rate": 0.00019978671032002903, "loss": 3.2664527893066406, "step": 2380, "token_acc": 0.27029385400112643 }, { "epoch": 1.395778364116095, "grad_norm": 0.6584939446017312, "learning_rate": 0.00019978607716937056, "loss": 3.2783241271972656, "step": 2381, "token_acc": 0.2664420988812552 }, { "epoch": 1.3963647024333041, "grad_norm": 0.504938948741125, "learning_rate": 0.00019978544308135634, "loss": 3.2081007957458496, "step": 2382, "token_acc": 0.27739006404087646 }, { "epoch": 1.396951040750513, "grad_norm": 0.63134838024265, "learning_rate": 0.00019978480805599237, "loss": 3.2313427925109863, "step": 2383, "token_acc": 0.2729746265227571 }, { "epoch": 1.3975373790677221, "grad_norm": 0.697818896389499, "learning_rate": 0.0001997841720932846, "loss": 3.2521848678588867, "step": 2384, "token_acc": 0.27125546625616476 }, { "epoch": 1.398123717384931, "grad_norm": 0.5015365181197838, "learning_rate": 0.000199783535193239, "loss": 3.227332592010498, "step": 2385, "token_acc": 0.276129883843717 }, { "epoch": 1.3987100557021401, "grad_norm": 0.5464465531096463, "learning_rate": 0.00019978289735586149, "loss": 3.240340232849121, "step": 2386, "token_acc": 0.2731998514020727 }, { "epoch": 1.3992963940193492, "grad_norm": 0.7590414798245688, "learning_rate": 0.00019978225858115816, "loss": 3.249244213104248, "step": 2387, "token_acc": 0.2715142894349337 }, { "epoch": 1.3998827323365581, "grad_norm": 0.6888253009341045, "learning_rate": 0.00019978161886913495, "loss": 3.229674816131592, "step": 2388, "token_acc": 0.2764979406055231 }, { "epoch": 1.4004690706537672, "grad_norm": 0.6199260499984348, "learning_rate": 0.0001997809782197979, "loss": 3.2093381881713867, "step": 2389, "token_acc": 0.27615976536629977 }, { "epoch": 1.4010554089709761, "grad_norm": 0.5217379877598975, "learning_rate": 0.00019978033663315304, "loss": 3.2073476314544678, "step": 2390, "token_acc": 0.2771758081719724 }, { "epoch": 1.4016417472881852, "grad_norm": 0.567808325841439, "learning_rate": 0.00019977969410920634, "loss": 3.2124552726745605, "step": 2391, "token_acc": 0.27608767711418963 }, { "epoch": 1.4022280856053944, "grad_norm": 0.6140491549357279, "learning_rate": 0.00019977905064796388, "loss": 3.236581802368164, "step": 2392, "token_acc": 0.2758689023584681 }, { "epoch": 1.4028144239226035, "grad_norm": 0.5686894086779063, "learning_rate": 0.00019977840624943167, "loss": 3.2035863399505615, "step": 2393, "token_acc": 0.2768361581920904 }, { "epoch": 1.4034007622398124, "grad_norm": 0.5972326681685846, "learning_rate": 0.00019977776091361583, "loss": 3.2221245765686035, "step": 2394, "token_acc": 0.27841265236558416 }, { "epoch": 1.4039871005570215, "grad_norm": 0.4943320081203837, "learning_rate": 0.00019977711464052233, "loss": 3.184516668319702, "step": 2395, "token_acc": 0.2796301576106801 }, { "epoch": 1.4045734388742304, "grad_norm": 0.7119464937378809, "learning_rate": 0.00019977646743015733, "loss": 3.2168521881103516, "step": 2396, "token_acc": 0.27513242951460637 }, { "epoch": 1.4051597771914395, "grad_norm": 0.6645047305604501, "learning_rate": 0.00019977581928252685, "loss": 3.195078134536743, "step": 2397, "token_acc": 0.2799557319452792 }, { "epoch": 1.4057461155086486, "grad_norm": 0.44322205655539476, "learning_rate": 0.000199775170197637, "loss": 3.194641590118408, "step": 2398, "token_acc": 0.27951687403049474 }, { "epoch": 1.4063324538258575, "grad_norm": 0.5374841510988142, "learning_rate": 0.00019977452017549388, "loss": 3.2574822902679443, "step": 2399, "token_acc": 0.2714746574602407 }, { "epoch": 1.4069187921430666, "grad_norm": 0.6055705981763689, "learning_rate": 0.0001997738692161036, "loss": 3.228893280029297, "step": 2400, "token_acc": 0.2745166001672188 }, { "epoch": 1.4075051304602755, "grad_norm": 0.5640662013810072, "learning_rate": 0.00019977321731947225, "loss": 3.267703056335449, "step": 2401, "token_acc": 0.26852412883925514 }, { "epoch": 1.4080914687774846, "grad_norm": 0.6622212614390544, "learning_rate": 0.00019977256448560596, "loss": 3.259082317352295, "step": 2402, "token_acc": 0.27010537296814735 }, { "epoch": 1.4086778070946937, "grad_norm": 0.7015160617451744, "learning_rate": 0.0001997719107145109, "loss": 3.2116522789001465, "step": 2403, "token_acc": 0.2758896892948403 }, { "epoch": 1.4092641454119026, "grad_norm": 0.720953842758773, "learning_rate": 0.00019977125600619314, "loss": 3.187607526779175, "step": 2404, "token_acc": 0.28193377084275123 }, { "epoch": 1.4098504837291117, "grad_norm": 0.6453306553580302, "learning_rate": 0.00019977060036065894, "loss": 3.2051162719726562, "step": 2405, "token_acc": 0.27592966712226574 }, { "epoch": 1.4104368220463206, "grad_norm": 0.5671117551022952, "learning_rate": 0.00019976994377791433, "loss": 3.2236409187316895, "step": 2406, "token_acc": 0.2759743129761069 }, { "epoch": 1.4110231603635297, "grad_norm": 0.5408282221800969, "learning_rate": 0.00019976928625796555, "loss": 3.2435684204101562, "step": 2407, "token_acc": 0.2732884516333782 }, { "epoch": 1.4116094986807388, "grad_norm": 0.6399059777064569, "learning_rate": 0.0001997686278008188, "loss": 3.207069158554077, "step": 2408, "token_acc": 0.27848512135363224 }, { "epoch": 1.412195836997948, "grad_norm": 0.7672870758021896, "learning_rate": 0.0001997679684064802, "loss": 3.2588136196136475, "step": 2409, "token_acc": 0.2709196503106131 }, { "epoch": 1.4127821753151568, "grad_norm": 0.8975147344890393, "learning_rate": 0.00019976730807495598, "loss": 3.2286903858184814, "step": 2410, "token_acc": 0.27536499722786917 }, { "epoch": 1.413368513632366, "grad_norm": 0.8407919415761599, "learning_rate": 0.00019976664680625237, "loss": 3.176316261291504, "step": 2411, "token_acc": 0.280994843702989 }, { "epoch": 1.4139548519495748, "grad_norm": 0.6143956569068743, "learning_rate": 0.0001997659846003755, "loss": 3.227346420288086, "step": 2412, "token_acc": 0.27461865863007906 }, { "epoch": 1.414541190266784, "grad_norm": 0.48061711290683096, "learning_rate": 0.0001997653214573317, "loss": 3.223888397216797, "step": 2413, "token_acc": 0.27643683101921507 }, { "epoch": 1.415127528583993, "grad_norm": 0.7327587026591392, "learning_rate": 0.0001997646573771271, "loss": 3.1877684593200684, "step": 2414, "token_acc": 0.2809499477589744 }, { "epoch": 1.415713866901202, "grad_norm": 0.6464437940365194, "learning_rate": 0.00019976399235976797, "loss": 3.2386980056762695, "step": 2415, "token_acc": 0.27239908876227015 }, { "epoch": 1.416300205218411, "grad_norm": 0.5445316993199244, "learning_rate": 0.00019976332640526059, "loss": 3.204814910888672, "step": 2416, "token_acc": 0.2777111016947002 }, { "epoch": 1.41688654353562, "grad_norm": 0.5315443796877389, "learning_rate": 0.0001997626595136112, "loss": 3.207308292388916, "step": 2417, "token_acc": 0.27723640192492977 }, { "epoch": 1.417472881852829, "grad_norm": 0.49204973705629174, "learning_rate": 0.00019976199168482604, "loss": 3.2157540321350098, "step": 2418, "token_acc": 0.27719271109736227 }, { "epoch": 1.4180592201700382, "grad_norm": 0.4954114593314, "learning_rate": 0.00019976132291891138, "loss": 3.249311923980713, "step": 2419, "token_acc": 0.2708770871888711 }, { "epoch": 1.4186455584872473, "grad_norm": 0.5442182335881889, "learning_rate": 0.00019976065321587353, "loss": 3.236985206604004, "step": 2420, "token_acc": 0.2732306121254993 }, { "epoch": 1.4192318968044562, "grad_norm": 0.5813570205458434, "learning_rate": 0.00019975998257571877, "loss": 3.2398853302001953, "step": 2421, "token_acc": 0.27186714667517653 }, { "epoch": 1.4198182351216653, "grad_norm": 0.5748631709304792, "learning_rate": 0.00019975931099845343, "loss": 3.1971547603607178, "step": 2422, "token_acc": 0.2776223706609061 }, { "epoch": 1.4204045734388742, "grad_norm": 0.5024429192673745, "learning_rate": 0.00019975863848408377, "loss": 3.1993765830993652, "step": 2423, "token_acc": 0.277317748066927 }, { "epoch": 1.4209909117560833, "grad_norm": 0.5594769247118319, "learning_rate": 0.00019975796503261613, "loss": 3.170154094696045, "step": 2424, "token_acc": 0.2817924620004809 }, { "epoch": 1.4215772500732924, "grad_norm": 0.6378007881785445, "learning_rate": 0.00019975729064405684, "loss": 3.1824638843536377, "step": 2425, "token_acc": 0.2817877124458989 }, { "epoch": 1.4221635883905013, "grad_norm": 0.6649885201154627, "learning_rate": 0.00019975661531841223, "loss": 3.2195241451263428, "step": 2426, "token_acc": 0.27712886836974426 }, { "epoch": 1.4227499267077104, "grad_norm": 0.5871732574327972, "learning_rate": 0.00019975593905568862, "loss": 3.236100673675537, "step": 2427, "token_acc": 0.27433189030173033 }, { "epoch": 1.4233362650249193, "grad_norm": 0.5080365770420008, "learning_rate": 0.0001997552618558924, "loss": 3.2706072330474854, "step": 2428, "token_acc": 0.2694888404568753 }, { "epoch": 1.4239226033421284, "grad_norm": 0.6149748132532346, "learning_rate": 0.00019975458371902994, "loss": 3.1961092948913574, "step": 2429, "token_acc": 0.27900624991879003 }, { "epoch": 1.4245089416593375, "grad_norm": 0.6128820510083652, "learning_rate": 0.00019975390464510757, "loss": 3.214637517929077, "step": 2430, "token_acc": 0.27467537661645114 }, { "epoch": 1.4250952799765464, "grad_norm": 0.5678014153886253, "learning_rate": 0.00019975322463413169, "loss": 3.1884846687316895, "step": 2431, "token_acc": 0.2799892381143349 }, { "epoch": 1.4256816182937555, "grad_norm": 0.5115537102444275, "learning_rate": 0.00019975254368610865, "loss": 3.1780881881713867, "step": 2432, "token_acc": 0.28135368786338316 }, { "epoch": 1.4262679566109644, "grad_norm": 0.7145595930189065, "learning_rate": 0.0001997518618010449, "loss": 3.251774787902832, "step": 2433, "token_acc": 0.26977892492425093 }, { "epoch": 1.4268542949281735, "grad_norm": 0.6457555340355297, "learning_rate": 0.00019975117897894684, "loss": 3.21073055267334, "step": 2434, "token_acc": 0.27644302320374464 }, { "epoch": 1.4274406332453826, "grad_norm": 0.5887215595468299, "learning_rate": 0.00019975049521982086, "loss": 3.2040278911590576, "step": 2435, "token_acc": 0.2785312525379015 }, { "epoch": 1.4280269715625917, "grad_norm": 0.678209814023707, "learning_rate": 0.00019974981052367342, "loss": 3.212954521179199, "step": 2436, "token_acc": 0.2769654705519899 }, { "epoch": 1.4286133098798006, "grad_norm": 0.4820990664345289, "learning_rate": 0.00019974912489051087, "loss": 3.2021005153656006, "step": 2437, "token_acc": 0.2775631795839333 }, { "epoch": 1.4291996481970097, "grad_norm": 0.5112590163606578, "learning_rate": 0.00019974843832033977, "loss": 3.2184808254241943, "step": 2438, "token_acc": 0.27523801017191374 }, { "epoch": 1.4297859865142186, "grad_norm": 0.535891472216452, "learning_rate": 0.00019974775081316642, "loss": 3.209672451019287, "step": 2439, "token_acc": 0.2755418848167539 }, { "epoch": 1.4303723248314277, "grad_norm": 0.5010239049644959, "learning_rate": 0.00019974706236899743, "loss": 3.221233606338501, "step": 2440, "token_acc": 0.2752595612569502 }, { "epoch": 1.4309586631486368, "grad_norm": 0.5494723549490004, "learning_rate": 0.00019974637298783918, "loss": 3.211958646774292, "step": 2441, "token_acc": 0.27455440690385946 }, { "epoch": 1.4315450014658457, "grad_norm": 0.5352035058258378, "learning_rate": 0.00019974568266969818, "loss": 3.177605152130127, "step": 2442, "token_acc": 0.28160656644363297 }, { "epoch": 1.4321313397830548, "grad_norm": 0.6491733593368556, "learning_rate": 0.0001997449914145809, "loss": 3.204132080078125, "step": 2443, "token_acc": 0.27808162441723394 }, { "epoch": 1.4327176781002637, "grad_norm": 0.5205690583051633, "learning_rate": 0.00019974429922249383, "loss": 3.21420955657959, "step": 2444, "token_acc": 0.27544076062242245 }, { "epoch": 1.4333040164174728, "grad_norm": 0.6189631960741147, "learning_rate": 0.00019974360609344345, "loss": 3.203617572784424, "step": 2445, "token_acc": 0.2771131527410854 }, { "epoch": 1.433890354734682, "grad_norm": 0.7812580128702256, "learning_rate": 0.0001997429120274363, "loss": 3.217839241027832, "step": 2446, "token_acc": 0.2766352362819467 }, { "epoch": 1.434476693051891, "grad_norm": 0.7860513794869695, "learning_rate": 0.00019974221702447894, "loss": 3.2327585220336914, "step": 2447, "token_acc": 0.2743000894337432 }, { "epoch": 1.4350630313691, "grad_norm": 0.972253037056436, "learning_rate": 0.0001997415210845778, "loss": 3.2005929946899414, "step": 2448, "token_acc": 0.27736807947158515 }, { "epoch": 1.435649369686309, "grad_norm": 0.9164018339659405, "learning_rate": 0.00019974082420773953, "loss": 3.2413651943206787, "step": 2449, "token_acc": 0.2726021183641963 }, { "epoch": 1.436235708003518, "grad_norm": 0.6397973299426116, "learning_rate": 0.00019974012639397058, "loss": 3.176875114440918, "step": 2450, "token_acc": 0.28275956496876337 }, { "epoch": 1.436822046320727, "grad_norm": 0.5675265328723258, "learning_rate": 0.00019973942764327753, "loss": 3.213367462158203, "step": 2451, "token_acc": 0.27693923433360423 }, { "epoch": 1.4374083846379362, "grad_norm": 0.7443237591104463, "learning_rate": 0.000199738727955667, "loss": 3.156369686126709, "step": 2452, "token_acc": 0.28361667580674643 }, { "epoch": 1.437994722955145, "grad_norm": 0.6221376707507675, "learning_rate": 0.00019973802733114552, "loss": 3.183365821838379, "step": 2453, "token_acc": 0.28065817145761135 }, { "epoch": 1.4385810612723542, "grad_norm": 0.7968473205957822, "learning_rate": 0.00019973732576971962, "loss": 3.2436492443084717, "step": 2454, "token_acc": 0.27124884623651835 }, { "epoch": 1.439167399589563, "grad_norm": 0.8105287534621836, "learning_rate": 0.00019973662327139597, "loss": 3.1755919456481934, "step": 2455, "token_acc": 0.2830058134448836 }, { "epoch": 1.4397537379067722, "grad_norm": 0.6506472426254918, "learning_rate": 0.00019973591983618117, "loss": 3.222956657409668, "step": 2456, "token_acc": 0.27398947306421645 }, { "epoch": 1.4403400762239813, "grad_norm": 0.5257835253078588, "learning_rate": 0.00019973521546408175, "loss": 3.1910247802734375, "step": 2457, "token_acc": 0.2784298055929884 }, { "epoch": 1.4409264145411902, "grad_norm": 0.5783526811024802, "learning_rate": 0.00019973451015510444, "loss": 3.2501659393310547, "step": 2458, "token_acc": 0.27256398984900254 }, { "epoch": 1.4415127528583993, "grad_norm": 0.5021171327916099, "learning_rate": 0.00019973380390925574, "loss": 3.204328775405884, "step": 2459, "token_acc": 0.276011258463376 }, { "epoch": 1.4420990911756082, "grad_norm": 0.6881020903119974, "learning_rate": 0.00019973309672654236, "loss": 3.207953453063965, "step": 2460, "token_acc": 0.2765532796417194 }, { "epoch": 1.4426854294928173, "grad_norm": 0.6899068798047286, "learning_rate": 0.00019973238860697095, "loss": 3.2005624771118164, "step": 2461, "token_acc": 0.27745833125569164 }, { "epoch": 1.4432717678100264, "grad_norm": 0.42998025252895544, "learning_rate": 0.00019973167955054813, "loss": 3.184542655944824, "step": 2462, "token_acc": 0.2789898037916017 }, { "epoch": 1.4438581061272355, "grad_norm": 0.5517684630508338, "learning_rate": 0.00019973096955728056, "loss": 3.1826236248016357, "step": 2463, "token_acc": 0.2817761181059024 }, { "epoch": 1.4444444444444444, "grad_norm": 0.5959620886106191, "learning_rate": 0.00019973025862717492, "loss": 3.210742712020874, "step": 2464, "token_acc": 0.2760825731126846 }, { "epoch": 1.4450307827616535, "grad_norm": 0.6175176816843638, "learning_rate": 0.00019972954676023789, "loss": 3.2387290000915527, "step": 2465, "token_acc": 0.27281263573345654 }, { "epoch": 1.4456171210788624, "grad_norm": 0.5435348542510129, "learning_rate": 0.00019972883395647615, "loss": 3.2437338829040527, "step": 2466, "token_acc": 0.2719556463504217 }, { "epoch": 1.4462034593960715, "grad_norm": 0.500203085694502, "learning_rate": 0.0001997281202158964, "loss": 3.1948535442352295, "step": 2467, "token_acc": 0.27947645129877596 }, { "epoch": 1.4467897977132806, "grad_norm": 0.5265721057911557, "learning_rate": 0.00019972740553850539, "loss": 3.207122325897217, "step": 2468, "token_acc": 0.2768359936565258 }, { "epoch": 1.4473761360304895, "grad_norm": 0.4829913796755665, "learning_rate": 0.00019972668992430977, "loss": 3.2574994564056396, "step": 2469, "token_acc": 0.2711800433262895 }, { "epoch": 1.4479624743476986, "grad_norm": 0.6122344296399751, "learning_rate": 0.00019972597337331624, "loss": 3.2081079483032227, "step": 2470, "token_acc": 0.27587271688616466 }, { "epoch": 1.4485488126649075, "grad_norm": 0.6038361584270306, "learning_rate": 0.00019972525588553158, "loss": 3.1788229942321777, "step": 2471, "token_acc": 0.28153393442867 }, { "epoch": 1.4491351509821166, "grad_norm": 0.564385493509247, "learning_rate": 0.00019972453746096256, "loss": 3.247359275817871, "step": 2472, "token_acc": 0.27234472354539724 }, { "epoch": 1.4497214892993258, "grad_norm": 0.5059872544628228, "learning_rate": 0.0001997238180996159, "loss": 3.163174629211426, "step": 2473, "token_acc": 0.28155848717225523 }, { "epoch": 1.4503078276165349, "grad_norm": 0.4907589108075454, "learning_rate": 0.0001997230978014983, "loss": 3.213125467300415, "step": 2474, "token_acc": 0.2760072251982666 }, { "epoch": 1.4508941659337438, "grad_norm": 0.49759636932039647, "learning_rate": 0.00019972237656661662, "loss": 3.2524139881134033, "step": 2475, "token_acc": 0.2692037756976918 }, { "epoch": 1.4514805042509529, "grad_norm": 0.6335015042988192, "learning_rate": 0.00019972165439497753, "loss": 3.271270751953125, "step": 2476, "token_acc": 0.26658019033458025 }, { "epoch": 1.4520668425681618, "grad_norm": 0.5829846017636969, "learning_rate": 0.00019972093128658793, "loss": 3.1781558990478516, "step": 2477, "token_acc": 0.2814562592726567 }, { "epoch": 1.4526531808853709, "grad_norm": 0.4196788623607607, "learning_rate": 0.00019972020724145454, "loss": 3.2532718181610107, "step": 2478, "token_acc": 0.2708930404653948 }, { "epoch": 1.45323951920258, "grad_norm": 0.5933753110909107, "learning_rate": 0.00019971948225958416, "loss": 3.1762609481811523, "step": 2479, "token_acc": 0.2810328849749037 }, { "epoch": 1.4538258575197889, "grad_norm": 0.642452763253212, "learning_rate": 0.00019971875634098365, "loss": 3.2116127014160156, "step": 2480, "token_acc": 0.275731276626071 }, { "epoch": 1.454412195836998, "grad_norm": 0.6106044558450442, "learning_rate": 0.00019971802948565975, "loss": 3.197416305541992, "step": 2481, "token_acc": 0.2787896599885042 }, { "epoch": 1.4549985341542069, "grad_norm": 0.4585504986172105, "learning_rate": 0.00019971730169361939, "loss": 3.167546510696411, "step": 2482, "token_acc": 0.283260456400101 }, { "epoch": 1.455584872471416, "grad_norm": 0.49179417002102926, "learning_rate": 0.00019971657296486933, "loss": 3.1961326599121094, "step": 2483, "token_acc": 0.27924459351874814 }, { "epoch": 1.456171210788625, "grad_norm": 0.5750534623613722, "learning_rate": 0.00019971584329941643, "loss": 3.195237398147583, "step": 2484, "token_acc": 0.27979462959032053 }, { "epoch": 1.456757549105834, "grad_norm": 0.6616069458002177, "learning_rate": 0.00019971511269726756, "loss": 3.1538100242614746, "step": 2485, "token_acc": 0.284324502866934 }, { "epoch": 1.457343887423043, "grad_norm": 0.6106337556194826, "learning_rate": 0.00019971438115842956, "loss": 3.1540393829345703, "step": 2486, "token_acc": 0.2842917941967422 }, { "epoch": 1.457930225740252, "grad_norm": 0.7204347558629824, "learning_rate": 0.00019971364868290933, "loss": 3.1896331310272217, "step": 2487, "token_acc": 0.28045013633152155 }, { "epoch": 1.458516564057461, "grad_norm": 0.6753786806621657, "learning_rate": 0.0001997129152707137, "loss": 3.192776679992676, "step": 2488, "token_acc": 0.28070888458700094 }, { "epoch": 1.4591029023746702, "grad_norm": 0.46156542412579743, "learning_rate": 0.00019971218092184963, "loss": 3.189014434814453, "step": 2489, "token_acc": 0.27789217890758755 }, { "epoch": 1.4596892406918793, "grad_norm": 0.6302476502557568, "learning_rate": 0.000199711445636324, "loss": 3.160496234893799, "step": 2490, "token_acc": 0.2818571182035446 }, { "epoch": 1.4602755790090882, "grad_norm": 0.6700090303154498, "learning_rate": 0.00019971070941414366, "loss": 3.169593095779419, "step": 2491, "token_acc": 0.28137520772375546 }, { "epoch": 1.4608619173262973, "grad_norm": 0.7256959653113945, "learning_rate": 0.0001997099722553156, "loss": 3.2104599475860596, "step": 2492, "token_acc": 0.27613938937579086 }, { "epoch": 1.4614482556435062, "grad_norm": 0.7035745442690808, "learning_rate": 0.0001997092341598467, "loss": 3.226490020751953, "step": 2493, "token_acc": 0.27574394804959385 }, { "epoch": 1.4620345939607153, "grad_norm": 0.5944670778921513, "learning_rate": 0.00019970849512774392, "loss": 3.181095600128174, "step": 2494, "token_acc": 0.2797733911191252 }, { "epoch": 1.4626209322779244, "grad_norm": 0.5894188467720394, "learning_rate": 0.00019970775515901416, "loss": 3.2125582695007324, "step": 2495, "token_acc": 0.2759883176224896 }, { "epoch": 1.4632072705951333, "grad_norm": 0.5368885645687302, "learning_rate": 0.00019970701425366441, "loss": 3.2335870265960693, "step": 2496, "token_acc": 0.2732706999526237 }, { "epoch": 1.4637936089123424, "grad_norm": 0.6562322633980282, "learning_rate": 0.0001997062724117016, "loss": 3.234812021255493, "step": 2497, "token_acc": 0.27149178826231113 }, { "epoch": 1.4643799472295513, "grad_norm": 0.5122770302918647, "learning_rate": 0.00019970552963313276, "loss": 3.2412772178649902, "step": 2498, "token_acc": 0.2729527556984066 }, { "epoch": 1.4649662855467604, "grad_norm": 0.5521648029820367, "learning_rate": 0.00019970478591796478, "loss": 3.223781108856201, "step": 2499, "token_acc": 0.2747450525567885 }, { "epoch": 1.4655526238639696, "grad_norm": 0.5130770167363079, "learning_rate": 0.0001997040412662047, "loss": 3.2385120391845703, "step": 2500, "token_acc": 0.27294334934186665 }, { "epoch": 1.4661389621811787, "grad_norm": 0.4811323211021129, "learning_rate": 0.00019970329567785952, "loss": 3.192143440246582, "step": 2501, "token_acc": 0.2798596619365609 }, { "epoch": 1.4667253004983876, "grad_norm": 0.4194047828520244, "learning_rate": 0.0001997025491529362, "loss": 3.157729148864746, "step": 2502, "token_acc": 0.28574631215531543 }, { "epoch": 1.4673116388155967, "grad_norm": 0.47850992005191706, "learning_rate": 0.00019970180169144185, "loss": 3.239119052886963, "step": 2503, "token_acc": 0.27235876215887334 }, { "epoch": 1.4678979771328056, "grad_norm": 0.5177957871782771, "learning_rate": 0.00019970105329338334, "loss": 3.1877291202545166, "step": 2504, "token_acc": 0.2771754225211358 }, { "epoch": 1.4684843154500147, "grad_norm": 0.6531418016107837, "learning_rate": 0.00019970030395876785, "loss": 3.1982970237731934, "step": 2505, "token_acc": 0.27739468161906067 }, { "epoch": 1.4690706537672238, "grad_norm": 0.6542004822307297, "learning_rate": 0.00019969955368760232, "loss": 3.208113193511963, "step": 2506, "token_acc": 0.276475195475846 }, { "epoch": 1.4696569920844327, "grad_norm": 0.6121823158096926, "learning_rate": 0.0001996988024798938, "loss": 3.205082893371582, "step": 2507, "token_acc": 0.27654330188880133 }, { "epoch": 1.4702433304016418, "grad_norm": 0.5519609062019573, "learning_rate": 0.00019969805033564944, "loss": 3.219461441040039, "step": 2508, "token_acc": 0.2752365888881248 }, { "epoch": 1.4708296687188507, "grad_norm": 0.5722058555205045, "learning_rate": 0.0001996972972548762, "loss": 3.1967005729675293, "step": 2509, "token_acc": 0.2791950701041383 }, { "epoch": 1.4714160070360598, "grad_norm": 0.726680201817417, "learning_rate": 0.00019969654323758121, "loss": 3.180673360824585, "step": 2510, "token_acc": 0.28260201723973244 }, { "epoch": 1.472002345353269, "grad_norm": 0.7854221005748253, "learning_rate": 0.00019969578828377154, "loss": 3.200195789337158, "step": 2511, "token_acc": 0.2782077423951851 }, { "epoch": 1.4725886836704778, "grad_norm": 0.5887666723660547, "learning_rate": 0.0001996950323934543, "loss": 3.250013828277588, "step": 2512, "token_acc": 0.2720941860525054 }, { "epoch": 1.473175021987687, "grad_norm": 0.5420748971636962, "learning_rate": 0.0001996942755666365, "loss": 3.200059413909912, "step": 2513, "token_acc": 0.27972385645520587 }, { "epoch": 1.4737613603048958, "grad_norm": 0.47571610123741964, "learning_rate": 0.00019969351780332536, "loss": 3.190324068069458, "step": 2514, "token_acc": 0.2793133982347034 }, { "epoch": 1.474347698622105, "grad_norm": 0.6183824062005063, "learning_rate": 0.000199692759103528, "loss": 3.2048568725585938, "step": 2515, "token_acc": 0.2773247020869146 }, { "epoch": 1.474934036939314, "grad_norm": 0.5427911055629165, "learning_rate": 0.00019969199946725147, "loss": 3.208702325820923, "step": 2516, "token_acc": 0.2763342050752977 }, { "epoch": 1.4755203752565231, "grad_norm": 0.4922526607332147, "learning_rate": 0.00019969123889450294, "loss": 3.2250969409942627, "step": 2517, "token_acc": 0.2744276248172996 }, { "epoch": 1.476106713573732, "grad_norm": 0.7313959804633519, "learning_rate": 0.00019969047738528956, "loss": 3.213158130645752, "step": 2518, "token_acc": 0.27674735567136743 }, { "epoch": 1.4766930518909411, "grad_norm": 0.5834036986696088, "learning_rate": 0.0001996897149396185, "loss": 3.1552886962890625, "step": 2519, "token_acc": 0.28381487715613757 }, { "epoch": 1.47727939020815, "grad_norm": 0.6075928501263398, "learning_rate": 0.00019968895155749686, "loss": 3.2194197177886963, "step": 2520, "token_acc": 0.27472501930481985 }, { "epoch": 1.4778657285253591, "grad_norm": 0.5101410971984783, "learning_rate": 0.00019968818723893188, "loss": 3.2222201824188232, "step": 2521, "token_acc": 0.27400174883691963 }, { "epoch": 1.4784520668425682, "grad_norm": 0.6437041639971338, "learning_rate": 0.00019968742198393072, "loss": 3.141871929168701, "step": 2522, "token_acc": 0.284780965783233 }, { "epoch": 1.4790384051597771, "grad_norm": 0.5121587196636244, "learning_rate": 0.00019968665579250052, "loss": 3.2558109760284424, "step": 2523, "token_acc": 0.26877669995826364 }, { "epoch": 1.4796247434769862, "grad_norm": 0.6496642947867797, "learning_rate": 0.00019968588866464858, "loss": 3.2151339054107666, "step": 2524, "token_acc": 0.2754572951173844 }, { "epoch": 1.4802110817941951, "grad_norm": 0.7097584022477003, "learning_rate": 0.00019968512060038199, "loss": 3.1849985122680664, "step": 2525, "token_acc": 0.2789456645467172 }, { "epoch": 1.4807974201114043, "grad_norm": 0.5734534267527766, "learning_rate": 0.00019968435159970803, "loss": 3.227900505065918, "step": 2526, "token_acc": 0.2764788783290572 }, { "epoch": 1.4813837584286134, "grad_norm": 0.6627169235649571, "learning_rate": 0.00019968358166263394, "loss": 3.2236788272857666, "step": 2527, "token_acc": 0.2744796961104275 }, { "epoch": 1.4819700967458225, "grad_norm": 0.5891236881226227, "learning_rate": 0.00019968281078916691, "loss": 3.217014789581299, "step": 2528, "token_acc": 0.27542196531791907 }, { "epoch": 1.4825564350630314, "grad_norm": 0.6223975932642402, "learning_rate": 0.00019968203897931418, "loss": 3.2257957458496094, "step": 2529, "token_acc": 0.2728936105043405 }, { "epoch": 1.4831427733802405, "grad_norm": 0.7123687047983446, "learning_rate": 0.00019968126623308305, "loss": 3.1915369033813477, "step": 2530, "token_acc": 0.27894352576500875 }, { "epoch": 1.4837291116974494, "grad_norm": 0.5008508330566309, "learning_rate": 0.00019968049255048072, "loss": 3.2058990001678467, "step": 2531, "token_acc": 0.2779154603358425 }, { "epoch": 1.4843154500146585, "grad_norm": 0.5905457680258205, "learning_rate": 0.0001996797179315145, "loss": 3.2080085277557373, "step": 2532, "token_acc": 0.27641812966857454 }, { "epoch": 1.4849017883318676, "grad_norm": 0.6573248059047676, "learning_rate": 0.00019967894237619166, "loss": 3.1979641914367676, "step": 2533, "token_acc": 0.2769533304369698 }, { "epoch": 1.4854881266490765, "grad_norm": 0.5719084354713385, "learning_rate": 0.00019967816588451945, "loss": 3.212754726409912, "step": 2534, "token_acc": 0.27594639359973333 }, { "epoch": 1.4860744649662856, "grad_norm": 0.6130197424173706, "learning_rate": 0.00019967738845650518, "loss": 3.1927692890167236, "step": 2535, "token_acc": 0.27878230146247207 }, { "epoch": 1.4866608032834945, "grad_norm": 0.5401694099444807, "learning_rate": 0.0001996766100921562, "loss": 3.224625825881958, "step": 2536, "token_acc": 0.273315781736539 }, { "epoch": 1.4872471416007036, "grad_norm": 0.45654371177895586, "learning_rate": 0.00019967583079147976, "loss": 3.1967859268188477, "step": 2537, "token_acc": 0.2805367278045989 }, { "epoch": 1.4878334799179127, "grad_norm": 0.568102826814915, "learning_rate": 0.0001996750505544832, "loss": 3.1914587020874023, "step": 2538, "token_acc": 0.2796358439137668 }, { "epoch": 1.4884198182351216, "grad_norm": 0.5407867112711078, "learning_rate": 0.00019967426938117386, "loss": 3.188796043395996, "step": 2539, "token_acc": 0.2804518599933688 }, { "epoch": 1.4890061565523307, "grad_norm": 0.5245662356683642, "learning_rate": 0.00019967348727155908, "loss": 3.243316650390625, "step": 2540, "token_acc": 0.2711999186548855 }, { "epoch": 1.4895924948695396, "grad_norm": 0.5958080055453131, "learning_rate": 0.0001996727042256462, "loss": 3.229617118835449, "step": 2541, "token_acc": 0.27406487204055857 }, { "epoch": 1.4901788331867487, "grad_norm": 0.659684891150021, "learning_rate": 0.00019967192024344254, "loss": 3.1845126152038574, "step": 2542, "token_acc": 0.28146634590131214 }, { "epoch": 1.4907651715039578, "grad_norm": 0.5195139402251614, "learning_rate": 0.00019967113532495554, "loss": 3.225705146789551, "step": 2543, "token_acc": 0.27475822274107653 }, { "epoch": 1.491351509821167, "grad_norm": 0.5026947473730926, "learning_rate": 0.00019967034947019255, "loss": 3.198336124420166, "step": 2544, "token_acc": 0.2772863046982627 }, { "epoch": 1.4919378481383758, "grad_norm": 0.4792965054812544, "learning_rate": 0.0001996695626791609, "loss": 3.184157609939575, "step": 2545, "token_acc": 0.27936705773972415 }, { "epoch": 1.492524186455585, "grad_norm": 0.5166776279078952, "learning_rate": 0.000199668774951868, "loss": 3.234355926513672, "step": 2546, "token_acc": 0.2726265891745137 }, { "epoch": 1.4931105247727938, "grad_norm": 0.46635813548353355, "learning_rate": 0.00019966798628832128, "loss": 3.2106258869171143, "step": 2547, "token_acc": 0.2743577288573169 }, { "epoch": 1.493696863090003, "grad_norm": 0.47208036868593695, "learning_rate": 0.00019966719668852815, "loss": 3.2191061973571777, "step": 2548, "token_acc": 0.2737214241428892 }, { "epoch": 1.494283201407212, "grad_norm": 0.4072077133491671, "learning_rate": 0.00019966640615249598, "loss": 3.195671319961548, "step": 2549, "token_acc": 0.27851041351447486 }, { "epoch": 1.494869539724421, "grad_norm": 0.5007068754891894, "learning_rate": 0.00019966561468023227, "loss": 3.1850838661193848, "step": 2550, "token_acc": 0.27543604098848623 }, { "epoch": 1.49545587804163, "grad_norm": 0.5484460382686189, "learning_rate": 0.00019966482227174438, "loss": 3.1656007766723633, "step": 2551, "token_acc": 0.2817036151228038 }, { "epoch": 1.496042216358839, "grad_norm": 0.4472232777621029, "learning_rate": 0.00019966402892703978, "loss": 3.228626251220703, "step": 2552, "token_acc": 0.27372426994913096 }, { "epoch": 1.496628554676048, "grad_norm": 0.5370529019064525, "learning_rate": 0.00019966323464612592, "loss": 3.1678030490875244, "step": 2553, "token_acc": 0.28093113452666824 }, { "epoch": 1.4972148929932572, "grad_norm": 0.5188188924773037, "learning_rate": 0.0001996624394290103, "loss": 3.16164231300354, "step": 2554, "token_acc": 0.2828802311161986 }, { "epoch": 1.4978012313104663, "grad_norm": 0.5491705479819814, "learning_rate": 0.00019966164327570032, "loss": 3.204102039337158, "step": 2555, "token_acc": 0.27742823436885566 }, { "epoch": 1.4983875696276752, "grad_norm": 0.4946163233521165, "learning_rate": 0.00019966084618620354, "loss": 3.1950695514678955, "step": 2556, "token_acc": 0.27978114896791845 }, { "epoch": 1.4989739079448843, "grad_norm": 0.44101646202070716, "learning_rate": 0.0001996600481605274, "loss": 3.241257429122925, "step": 2557, "token_acc": 0.2737059898066241 }, { "epoch": 1.4995602462620932, "grad_norm": 0.5287581474783024, "learning_rate": 0.00019965924919867939, "loss": 3.205601453781128, "step": 2558, "token_acc": 0.27714623154134815 }, { "epoch": 1.5001465845793023, "grad_norm": 0.5615238682513923, "learning_rate": 0.000199658449300667, "loss": 3.1826610565185547, "step": 2559, "token_acc": 0.2795690325224993 }, { "epoch": 1.5007329228965114, "grad_norm": 0.5499962633852297, "learning_rate": 0.00019965764846649776, "loss": 3.1878738403320312, "step": 2560, "token_acc": 0.2794553280211387 }, { "epoch": 1.5013192612137203, "grad_norm": 0.5262601707034167, "learning_rate": 0.00019965684669617927, "loss": 3.18379545211792, "step": 2561, "token_acc": 0.27958872974826027 }, { "epoch": 1.5019055995309294, "grad_norm": 0.4890548671392714, "learning_rate": 0.00019965604398971895, "loss": 3.2516584396362305, "step": 2562, "token_acc": 0.27112523358590834 }, { "epoch": 1.5024919378481383, "grad_norm": 0.5510605958581747, "learning_rate": 0.0001996552403471244, "loss": 3.174196243286133, "step": 2563, "token_acc": 0.2815971559334802 }, { "epoch": 1.5030782761653474, "grad_norm": 0.679424420448652, "learning_rate": 0.00019965443576840314, "loss": 3.2020723819732666, "step": 2564, "token_acc": 0.27895799482989964 }, { "epoch": 1.5036646144825565, "grad_norm": 0.6111051627171314, "learning_rate": 0.00019965363025356277, "loss": 3.1935744285583496, "step": 2565, "token_acc": 0.2779376021038371 }, { "epoch": 1.5042509527997656, "grad_norm": 0.5954299683518938, "learning_rate": 0.0001996528238026108, "loss": 3.188821315765381, "step": 2566, "token_acc": 0.27774087768753236 }, { "epoch": 1.5048372911169745, "grad_norm": 0.6188524450667723, "learning_rate": 0.00019965201641555485, "loss": 3.195739507675171, "step": 2567, "token_acc": 0.2796716656173524 }, { "epoch": 1.5054236294341834, "grad_norm": 0.6079346382368913, "learning_rate": 0.00019965120809240248, "loss": 3.204178810119629, "step": 2568, "token_acc": 0.2761797866168941 }, { "epoch": 1.5060099677513925, "grad_norm": 0.5856612871716462, "learning_rate": 0.00019965039883316127, "loss": 3.2181806564331055, "step": 2569, "token_acc": 0.2737018028687985 }, { "epoch": 1.5065963060686016, "grad_norm": 0.6220883117403615, "learning_rate": 0.0001996495886378389, "loss": 3.1793951988220215, "step": 2570, "token_acc": 0.2790577931264043 }, { "epoch": 1.5071826443858107, "grad_norm": 0.8896693590849469, "learning_rate": 0.00019964877750644288, "loss": 3.2209482192993164, "step": 2571, "token_acc": 0.27534816586801114 }, { "epoch": 1.5077689827030196, "grad_norm": 0.7766437880647261, "learning_rate": 0.00019964796543898088, "loss": 3.2114968299865723, "step": 2572, "token_acc": 0.2768520780038822 }, { "epoch": 1.5083553210202285, "grad_norm": 0.5061783353958996, "learning_rate": 0.00019964715243546053, "loss": 3.1883745193481445, "step": 2573, "token_acc": 0.2800476750744604 }, { "epoch": 1.5089416593374376, "grad_norm": 0.5876043860514126, "learning_rate": 0.00019964633849588946, "loss": 3.230588674545288, "step": 2574, "token_acc": 0.2737344191118766 }, { "epoch": 1.5095279976546467, "grad_norm": 0.5490222411240606, "learning_rate": 0.00019964552362027532, "loss": 3.225931406021118, "step": 2575, "token_acc": 0.27572280226782847 }, { "epoch": 1.5101143359718558, "grad_norm": 0.49634159791958343, "learning_rate": 0.00019964470780862574, "loss": 3.183867931365967, "step": 2576, "token_acc": 0.2834025110211658 }, { "epoch": 1.5107006742890647, "grad_norm": 0.6039783785082427, "learning_rate": 0.00019964389106094844, "loss": 3.197291374206543, "step": 2577, "token_acc": 0.27901096736060943 }, { "epoch": 1.5112870126062738, "grad_norm": 0.6449220446386443, "learning_rate": 0.000199643073377251, "loss": 3.2544384002685547, "step": 2578, "token_acc": 0.26886562540617825 }, { "epoch": 1.5118733509234827, "grad_norm": 0.518953885142126, "learning_rate": 0.0001996422547575412, "loss": 3.210023880004883, "step": 2579, "token_acc": 0.2760992423707493 }, { "epoch": 1.5124596892406919, "grad_norm": 0.42408514917536944, "learning_rate": 0.00019964143520182667, "loss": 3.1973390579223633, "step": 2580, "token_acc": 0.2768521919998344 }, { "epoch": 1.513046027557901, "grad_norm": 0.5466683052297299, "learning_rate": 0.00019964061471011512, "loss": 3.2478909492492676, "step": 2581, "token_acc": 0.27186737810999884 }, { "epoch": 1.51363236587511, "grad_norm": 0.6076113311440955, "learning_rate": 0.00019963979328241428, "loss": 3.250032424926758, "step": 2582, "token_acc": 0.27027553278722244 }, { "epoch": 1.514218704192319, "grad_norm": 0.6575414575084427, "learning_rate": 0.00019963897091873184, "loss": 3.2322471141815186, "step": 2583, "token_acc": 0.27306669045532544 }, { "epoch": 1.5148050425095279, "grad_norm": 0.5153929024427344, "learning_rate": 0.00019963814761907552, "loss": 3.195371150970459, "step": 2584, "token_acc": 0.27909082964575405 }, { "epoch": 1.515391380826737, "grad_norm": 0.5355900598890126, "learning_rate": 0.00019963732338345306, "loss": 3.211622953414917, "step": 2585, "token_acc": 0.27775738837240616 }, { "epoch": 1.515977719143946, "grad_norm": 0.5007075998587486, "learning_rate": 0.00019963649821187223, "loss": 3.228663444519043, "step": 2586, "token_acc": 0.27121009478397395 }, { "epoch": 1.5165640574611552, "grad_norm": 0.5738858838888441, "learning_rate": 0.00019963567210434078, "loss": 3.187683582305908, "step": 2587, "token_acc": 0.2799089048245198 }, { "epoch": 1.517150395778364, "grad_norm": 0.47501189834544905, "learning_rate": 0.0001996348450608664, "loss": 3.197097063064575, "step": 2588, "token_acc": 0.2761465863892387 }, { "epoch": 1.5177367340955732, "grad_norm": 0.45559079551047527, "learning_rate": 0.00019963401708145698, "loss": 3.1959357261657715, "step": 2589, "token_acc": 0.2776377602428715 }, { "epoch": 1.518323072412782, "grad_norm": 0.475828542760936, "learning_rate": 0.00019963318816612017, "loss": 3.1912906169891357, "step": 2590, "token_acc": 0.27945222164264666 }, { "epoch": 1.5189094107299912, "grad_norm": 0.4692414063210235, "learning_rate": 0.00019963235831486383, "loss": 3.187471866607666, "step": 2591, "token_acc": 0.27807520185933976 }, { "epoch": 1.5194957490472003, "grad_norm": 0.5019159356266234, "learning_rate": 0.00019963152752769573, "loss": 3.213104009628296, "step": 2592, "token_acc": 0.2756329589037356 }, { "epoch": 1.5200820873644094, "grad_norm": 0.5053822379157825, "learning_rate": 0.00019963069580462373, "loss": 3.2158172130584717, "step": 2593, "token_acc": 0.2762074884447156 }, { "epoch": 1.5206684256816183, "grad_norm": 0.48863112503599165, "learning_rate": 0.00019962986314565556, "loss": 3.2027974128723145, "step": 2594, "token_acc": 0.2782632491106449 }, { "epoch": 1.5212547639988272, "grad_norm": 0.4879702271670185, "learning_rate": 0.00019962902955079909, "loss": 3.2264890670776367, "step": 2595, "token_acc": 0.27760177811295905 }, { "epoch": 1.5218411023160363, "grad_norm": 0.5216363861434566, "learning_rate": 0.00019962819502006212, "loss": 3.14943790435791, "step": 2596, "token_acc": 0.28374012017098155 }, { "epoch": 1.5224274406332454, "grad_norm": 0.6062754835776472, "learning_rate": 0.00019962735955345254, "loss": 3.217008352279663, "step": 2597, "token_acc": 0.2761816870789298 }, { "epoch": 1.5230137789504545, "grad_norm": 0.6130860589728432, "learning_rate": 0.0001996265231509781, "loss": 3.1947407722473145, "step": 2598, "token_acc": 0.2762130818891434 }, { "epoch": 1.5236001172676634, "grad_norm": 0.5512465483085283, "learning_rate": 0.0001996256858126468, "loss": 3.187373638153076, "step": 2599, "token_acc": 0.2788363181117269 }, { "epoch": 1.5241864555848723, "grad_norm": 0.4863912519444314, "learning_rate": 0.00019962484753846638, "loss": 3.23612904548645, "step": 2600, "token_acc": 0.2708356227749822 }, { "epoch": 1.5247727939020814, "grad_norm": 0.5688343992849446, "learning_rate": 0.0001996240083284448, "loss": 3.1475396156311035, "step": 2601, "token_acc": 0.2843472738912127 }, { "epoch": 1.5253591322192905, "grad_norm": 0.5127350507540789, "learning_rate": 0.00019962316818258988, "loss": 3.1505918502807617, "step": 2602, "token_acc": 0.28518486598293546 }, { "epoch": 1.5259454705364996, "grad_norm": 0.45775306404651056, "learning_rate": 0.00019962232710090956, "loss": 3.178192138671875, "step": 2603, "token_acc": 0.2800021140811014 }, { "epoch": 1.5265318088537085, "grad_norm": 0.4377406264509686, "learning_rate": 0.0001996214850834117, "loss": 3.189859390258789, "step": 2604, "token_acc": 0.27828651125722254 }, { "epoch": 1.5271181471709177, "grad_norm": 0.4769385208152471, "learning_rate": 0.00019962064213010426, "loss": 3.19708514213562, "step": 2605, "token_acc": 0.27913751349145405 }, { "epoch": 1.5277044854881265, "grad_norm": 0.47255046494484093, "learning_rate": 0.0001996197982409951, "loss": 3.1860170364379883, "step": 2606, "token_acc": 0.2778017286702698 }, { "epoch": 1.5282908238053357, "grad_norm": 0.6331900327363463, "learning_rate": 0.00019961895341609215, "loss": 3.226107120513916, "step": 2607, "token_acc": 0.27322191058551754 }, { "epoch": 1.5288771621225448, "grad_norm": 0.6149696737319772, "learning_rate": 0.00019961810765540343, "loss": 3.1917643547058105, "step": 2608, "token_acc": 0.2792448388313568 }, { "epoch": 1.5294635004397539, "grad_norm": 0.5239107493552146, "learning_rate": 0.00019961726095893677, "loss": 3.1599059104919434, "step": 2609, "token_acc": 0.28273398939268524 }, { "epoch": 1.5300498387569628, "grad_norm": 0.4945186851477478, "learning_rate": 0.0001996164133267002, "loss": 3.2321929931640625, "step": 2610, "token_acc": 0.27270286543930394 }, { "epoch": 1.5306361770741717, "grad_norm": 0.5753562879050244, "learning_rate": 0.00019961556475870168, "loss": 3.2100563049316406, "step": 2611, "token_acc": 0.2773342589378688 }, { "epoch": 1.5312225153913808, "grad_norm": 0.5160719678668378, "learning_rate": 0.00019961471525494916, "loss": 3.20725154876709, "step": 2612, "token_acc": 0.27617841178653846 }, { "epoch": 1.5318088537085899, "grad_norm": 0.5256446973537284, "learning_rate": 0.0001996138648154506, "loss": 3.1539833545684814, "step": 2613, "token_acc": 0.2822624860346754 }, { "epoch": 1.532395192025799, "grad_norm": 0.6137646415632242, "learning_rate": 0.00019961301344021404, "loss": 3.2356555461883545, "step": 2614, "token_acc": 0.27469596079437625 }, { "epoch": 1.5329815303430079, "grad_norm": 0.6521672014337166, "learning_rate": 0.00019961216112924742, "loss": 3.150270938873291, "step": 2615, "token_acc": 0.28337966119473434 }, { "epoch": 1.533567868660217, "grad_norm": 0.5332738384184421, "learning_rate": 0.00019961130788255879, "loss": 3.181734561920166, "step": 2616, "token_acc": 0.27969242713490344 }, { "epoch": 1.5341542069774259, "grad_norm": 0.4156409182836854, "learning_rate": 0.00019961045370015613, "loss": 3.2311954498291016, "step": 2617, "token_acc": 0.2751625786077235 }, { "epoch": 1.534740545294635, "grad_norm": 0.5004138723909748, "learning_rate": 0.00019960959858204754, "loss": 3.2367100715637207, "step": 2618, "token_acc": 0.2735227854582693 }, { "epoch": 1.535326883611844, "grad_norm": 0.6042173400527904, "learning_rate": 0.00019960874252824095, "loss": 3.194169044494629, "step": 2619, "token_acc": 0.2786639051183539 }, { "epoch": 1.5359132219290532, "grad_norm": 0.6074869541352533, "learning_rate": 0.00019960788553874447, "loss": 3.1802079677581787, "step": 2620, "token_acc": 0.27933774463502375 }, { "epoch": 1.536499560246262, "grad_norm": 0.5655148941661284, "learning_rate": 0.0001996070276135661, "loss": 3.2038466930389404, "step": 2621, "token_acc": 0.27691549642769153 }, { "epoch": 1.537085898563471, "grad_norm": 0.5229282285106968, "learning_rate": 0.00019960616875271394, "loss": 3.1922194957733154, "step": 2622, "token_acc": 0.27793751538514244 }, { "epoch": 1.53767223688068, "grad_norm": 0.662935111578389, "learning_rate": 0.00019960530895619605, "loss": 3.2193267345428467, "step": 2623, "token_acc": 0.2761253467358765 }, { "epoch": 1.5382585751978892, "grad_norm": 0.6163901793771169, "learning_rate": 0.00019960444822402052, "loss": 3.255826473236084, "step": 2624, "token_acc": 0.2709860892208593 }, { "epoch": 1.5388449135150983, "grad_norm": 0.5201199129258697, "learning_rate": 0.0001996035865561954, "loss": 3.194967269897461, "step": 2625, "token_acc": 0.276978892168926 }, { "epoch": 1.5394312518323072, "grad_norm": 0.5252987951046296, "learning_rate": 0.0001996027239527288, "loss": 3.2182321548461914, "step": 2626, "token_acc": 0.27467083397769293 }, { "epoch": 1.5400175901495161, "grad_norm": 0.5261210305135171, "learning_rate": 0.00019960186041362882, "loss": 3.1783833503723145, "step": 2627, "token_acc": 0.27898189905479703 }, { "epoch": 1.5406039284667252, "grad_norm": 0.5690603608090683, "learning_rate": 0.00019960099593890359, "loss": 3.1578307151794434, "step": 2628, "token_acc": 0.2823582628857462 }, { "epoch": 1.5411902667839343, "grad_norm": 0.6391591437890906, "learning_rate": 0.0001996001305285612, "loss": 3.191183567047119, "step": 2629, "token_acc": 0.27895011696747823 }, { "epoch": 1.5417766051011434, "grad_norm": 0.5516666086835875, "learning_rate": 0.0001995992641826098, "loss": 3.150019645690918, "step": 2630, "token_acc": 0.2834971021296807 }, { "epoch": 1.5423629434183523, "grad_norm": 0.4992849421599657, "learning_rate": 0.00019959839690105756, "loss": 3.1774020195007324, "step": 2631, "token_acc": 0.2818502258432184 }, { "epoch": 1.5429492817355615, "grad_norm": 0.6586184011729296, "learning_rate": 0.00019959752868391255, "loss": 3.149879217147827, "step": 2632, "token_acc": 0.2849889433512346 }, { "epoch": 1.5435356200527703, "grad_norm": 0.544734172641942, "learning_rate": 0.000199596659531183, "loss": 3.1702375411987305, "step": 2633, "token_acc": 0.28135284737918703 }, { "epoch": 1.5441219583699795, "grad_norm": 0.5693199786630552, "learning_rate": 0.000199595789442877, "loss": 3.2126379013061523, "step": 2634, "token_acc": 0.2774177174120658 }, { "epoch": 1.5447082966871886, "grad_norm": 0.6306540179255922, "learning_rate": 0.0001995949184190028, "loss": 3.2129197120666504, "step": 2635, "token_acc": 0.277051965255377 }, { "epoch": 1.5452946350043977, "grad_norm": 0.7031656807168499, "learning_rate": 0.00019959404645956852, "loss": 3.151315212249756, "step": 2636, "token_acc": 0.2851654513357908 }, { "epoch": 1.5458809733216066, "grad_norm": 0.6385330011965263, "learning_rate": 0.0001995931735645824, "loss": 3.206514358520508, "step": 2637, "token_acc": 0.2767687727285706 }, { "epoch": 1.5464673116388155, "grad_norm": 0.5427616952591011, "learning_rate": 0.0001995922997340526, "loss": 3.222639560699463, "step": 2638, "token_acc": 0.2738811113390889 }, { "epoch": 1.5470536499560246, "grad_norm": 0.6360618557810592, "learning_rate": 0.00019959142496798736, "loss": 3.174762725830078, "step": 2639, "token_acc": 0.28050552040844295 }, { "epoch": 1.5476399882732337, "grad_norm": 0.6747420405956391, "learning_rate": 0.00019959054926639488, "loss": 3.182965040206909, "step": 2640, "token_acc": 0.2802981890009137 }, { "epoch": 1.5482263265904428, "grad_norm": 0.6363593952484315, "learning_rate": 0.0001995896726292834, "loss": 3.257631301879883, "step": 2641, "token_acc": 0.2707891266835962 }, { "epoch": 1.5488126649076517, "grad_norm": 0.6625944911321094, "learning_rate": 0.00019958879505666116, "loss": 3.1755876541137695, "step": 2642, "token_acc": 0.28102680515634526 }, { "epoch": 1.5493990032248608, "grad_norm": 0.5672519022346407, "learning_rate": 0.00019958791654853635, "loss": 3.1996805667877197, "step": 2643, "token_acc": 0.27751013453968626 }, { "epoch": 1.5499853415420697, "grad_norm": 0.5406927723118318, "learning_rate": 0.00019958703710491727, "loss": 3.1929984092712402, "step": 2644, "token_acc": 0.27944954318506704 }, { "epoch": 1.5505716798592788, "grad_norm": 0.507959746515845, "learning_rate": 0.00019958615672581217, "loss": 3.1744046211242676, "step": 2645, "token_acc": 0.2798236925249174 }, { "epoch": 1.551158018176488, "grad_norm": 0.4750646893039651, "learning_rate": 0.00019958527541122934, "loss": 3.2198405265808105, "step": 2646, "token_acc": 0.2751212389841998 }, { "epoch": 1.551744356493697, "grad_norm": 0.4857295736421394, "learning_rate": 0.00019958439316117703, "loss": 3.1757779121398926, "step": 2647, "token_acc": 0.28054909628328795 }, { "epoch": 1.552330694810906, "grad_norm": 0.5201699525293312, "learning_rate": 0.0001995835099756635, "loss": 3.171346664428711, "step": 2648, "token_acc": 0.2825549642999701 }, { "epoch": 1.5529170331281148, "grad_norm": 0.5029864219315964, "learning_rate": 0.00019958262585469716, "loss": 3.2130441665649414, "step": 2649, "token_acc": 0.2756864398078347 }, { "epoch": 1.553503371445324, "grad_norm": 0.6019664214965286, "learning_rate": 0.00019958174079828618, "loss": 3.2080445289611816, "step": 2650, "token_acc": 0.2769343593556634 }, { "epoch": 1.554089709762533, "grad_norm": 0.7028617053857716, "learning_rate": 0.00019958085480643897, "loss": 3.239995241165161, "step": 2651, "token_acc": 0.27251708474853703 }, { "epoch": 1.5546760480797421, "grad_norm": 0.5436524782703289, "learning_rate": 0.00019957996787916377, "loss": 3.1789169311523438, "step": 2652, "token_acc": 0.2803233368021612 }, { "epoch": 1.555262386396951, "grad_norm": 0.4757696680370547, "learning_rate": 0.000199579080016469, "loss": 3.210191011428833, "step": 2653, "token_acc": 0.27740491585797705 }, { "epoch": 1.55584872471416, "grad_norm": 0.6083067017802386, "learning_rate": 0.00019957819121836295, "loss": 3.2088663578033447, "step": 2654, "token_acc": 0.2763939105107056 }, { "epoch": 1.556435063031369, "grad_norm": 0.6294612819493758, "learning_rate": 0.00019957730148485397, "loss": 3.23964262008667, "step": 2655, "token_acc": 0.27324605844445343 }, { "epoch": 1.5570214013485781, "grad_norm": 0.4144174263100731, "learning_rate": 0.00019957641081595043, "loss": 3.2180051803588867, "step": 2656, "token_acc": 0.274720674888109 }, { "epoch": 1.5576077396657872, "grad_norm": 0.5328353351184156, "learning_rate": 0.00019957551921166066, "loss": 3.224337100982666, "step": 2657, "token_acc": 0.2755813180530997 }, { "epoch": 1.5581940779829961, "grad_norm": 0.659026532932319, "learning_rate": 0.0001995746266719931, "loss": 3.1951637268066406, "step": 2658, "token_acc": 0.27689645966814525 }, { "epoch": 1.5587804163002053, "grad_norm": 0.5667812985259825, "learning_rate": 0.0001995737331969561, "loss": 3.1846213340759277, "step": 2659, "token_acc": 0.2788577125691601 }, { "epoch": 1.5593667546174141, "grad_norm": 0.479856358444414, "learning_rate": 0.00019957283878655803, "loss": 3.1924424171447754, "step": 2660, "token_acc": 0.27787733055606606 }, { "epoch": 1.5599530929346233, "grad_norm": 0.5176713862951597, "learning_rate": 0.00019957194344080733, "loss": 3.195065975189209, "step": 2661, "token_acc": 0.27675317847365927 }, { "epoch": 1.5605394312518324, "grad_norm": 0.5146265093831452, "learning_rate": 0.00019957104715971242, "loss": 3.1754794120788574, "step": 2662, "token_acc": 0.28130853907516157 }, { "epoch": 1.5611257695690415, "grad_norm": 0.5683193146204041, "learning_rate": 0.00019957014994328168, "loss": 3.1656100749969482, "step": 2663, "token_acc": 0.2802190425537408 }, { "epoch": 1.5617121078862504, "grad_norm": 0.43697032957482, "learning_rate": 0.00019956925179152353, "loss": 3.1724894046783447, "step": 2664, "token_acc": 0.2798126161954595 }, { "epoch": 1.5622984462034593, "grad_norm": 0.5494073812297098, "learning_rate": 0.00019956835270444647, "loss": 3.2296042442321777, "step": 2665, "token_acc": 0.27485585123638306 }, { "epoch": 1.5628847845206684, "grad_norm": 0.6155876772911851, "learning_rate": 0.00019956745268205888, "loss": 3.1836578845977783, "step": 2666, "token_acc": 0.27898089511219526 }, { "epoch": 1.5634711228378775, "grad_norm": 0.6041915727364132, "learning_rate": 0.00019956655172436924, "loss": 3.219562530517578, "step": 2667, "token_acc": 0.27594651000170095 }, { "epoch": 1.5640574611550866, "grad_norm": 0.5596558073345214, "learning_rate": 0.00019956564983138604, "loss": 3.269540786743164, "step": 2668, "token_acc": 0.26898652084133196 }, { "epoch": 1.5646437994722955, "grad_norm": 0.4728116260076817, "learning_rate": 0.0001995647470031177, "loss": 3.2143819332122803, "step": 2669, "token_acc": 0.27603340449444247 }, { "epoch": 1.5652301377895046, "grad_norm": 0.4644898856649255, "learning_rate": 0.00019956384323957274, "loss": 3.1786398887634277, "step": 2670, "token_acc": 0.27788483651733953 }, { "epoch": 1.5658164761067135, "grad_norm": 0.5413404826495073, "learning_rate": 0.00019956293854075962, "loss": 3.24879789352417, "step": 2671, "token_acc": 0.27239148791536777 }, { "epoch": 1.5664028144239226, "grad_norm": 0.5852980635252474, "learning_rate": 0.00019956203290668687, "loss": 3.1647677421569824, "step": 2672, "token_acc": 0.28221272648271706 }, { "epoch": 1.5669891527411317, "grad_norm": 0.7232027440384753, "learning_rate": 0.00019956112633736297, "loss": 3.205904722213745, "step": 2673, "token_acc": 0.2773987235837631 }, { "epoch": 1.5675754910583408, "grad_norm": 0.6398004954008825, "learning_rate": 0.00019956021883279647, "loss": 3.1643640995025635, "step": 2674, "token_acc": 0.28241405935695385 }, { "epoch": 1.5681618293755497, "grad_norm": 0.5213728902471416, "learning_rate": 0.00019955931039299584, "loss": 3.1736936569213867, "step": 2675, "token_acc": 0.28060349495198783 }, { "epoch": 1.5687481676927586, "grad_norm": 0.459778616600603, "learning_rate": 0.0001995584010179697, "loss": 3.189206123352051, "step": 2676, "token_acc": 0.2775991119733177 }, { "epoch": 1.5693345060099677, "grad_norm": 0.5447058164749606, "learning_rate": 0.0001995574907077265, "loss": 3.211618661880493, "step": 2677, "token_acc": 0.27584803650844403 }, { "epoch": 1.5699208443271768, "grad_norm": 0.5974072672707506, "learning_rate": 0.0001995565794622748, "loss": 3.1757566928863525, "step": 2678, "token_acc": 0.27955970128395047 }, { "epoch": 1.570507182644386, "grad_norm": 0.5087768719774082, "learning_rate": 0.00019955566728162324, "loss": 3.1685850620269775, "step": 2679, "token_acc": 0.2823508306514743 }, { "epoch": 1.5710935209615948, "grad_norm": 0.5180332022246887, "learning_rate": 0.00019955475416578034, "loss": 3.1553893089294434, "step": 2680, "token_acc": 0.28352413904004065 }, { "epoch": 1.5716798592788037, "grad_norm": 0.5078383827595645, "learning_rate": 0.00019955384011475466, "loss": 3.171081781387329, "step": 2681, "token_acc": 0.2821244942123537 }, { "epoch": 1.5722661975960128, "grad_norm": 0.45968922884256685, "learning_rate": 0.0001995529251285548, "loss": 3.180799961090088, "step": 2682, "token_acc": 0.279760293325273 }, { "epoch": 1.572852535913222, "grad_norm": 0.38608565678066015, "learning_rate": 0.00019955200920718935, "loss": 3.1922671794891357, "step": 2683, "token_acc": 0.2782131502229626 }, { "epoch": 1.573438874230431, "grad_norm": 0.4120686703038631, "learning_rate": 0.00019955109235066692, "loss": 3.1866798400878906, "step": 2684, "token_acc": 0.2787603731018378 }, { "epoch": 1.57402521254764, "grad_norm": 0.390296803124471, "learning_rate": 0.00019955017455899614, "loss": 3.2009198665618896, "step": 2685, "token_acc": 0.2778909601112092 }, { "epoch": 1.574611550864849, "grad_norm": 0.36461532552874704, "learning_rate": 0.00019954925583218563, "loss": 3.1958224773406982, "step": 2686, "token_acc": 0.27872545694960343 }, { "epoch": 1.575197889182058, "grad_norm": 0.4338738884702662, "learning_rate": 0.00019954833617024398, "loss": 3.2154078483581543, "step": 2687, "token_acc": 0.27445210850178753 }, { "epoch": 1.575784227499267, "grad_norm": 0.4951501833715675, "learning_rate": 0.00019954741557317985, "loss": 3.1404380798339844, "step": 2688, "token_acc": 0.2859533352225956 }, { "epoch": 1.5763705658164762, "grad_norm": 0.4442526149505525, "learning_rate": 0.00019954649404100192, "loss": 3.23830246925354, "step": 2689, "token_acc": 0.27081915756158476 }, { "epoch": 1.5769569041336853, "grad_norm": 0.47908842238193333, "learning_rate": 0.0001995455715737188, "loss": 3.1975960731506348, "step": 2690, "token_acc": 0.2759045852785061 }, { "epoch": 1.5775432424508942, "grad_norm": 0.4323151601088509, "learning_rate": 0.00019954464817133918, "loss": 3.2066562175750732, "step": 2691, "token_acc": 0.2761840301701575 }, { "epoch": 1.578129580768103, "grad_norm": 0.4377696635345492, "learning_rate": 0.00019954372383387172, "loss": 3.155653715133667, "step": 2692, "token_acc": 0.28266484842753836 }, { "epoch": 1.5787159190853122, "grad_norm": 0.42547903489818345, "learning_rate": 0.00019954279856132515, "loss": 3.1684136390686035, "step": 2693, "token_acc": 0.2815018958563869 }, { "epoch": 1.5793022574025213, "grad_norm": 0.4406429540119083, "learning_rate": 0.0001995418723537081, "loss": 3.16751766204834, "step": 2694, "token_acc": 0.281215789641502 }, { "epoch": 1.5798885957197304, "grad_norm": 0.45730537507453584, "learning_rate": 0.00019954094521102927, "loss": 3.2066006660461426, "step": 2695, "token_acc": 0.27585700036747335 }, { "epoch": 1.5804749340369393, "grad_norm": 0.5479819208363274, "learning_rate": 0.0001995400171332974, "loss": 3.167570114135742, "step": 2696, "token_acc": 0.28002477289345834 }, { "epoch": 1.5810612723541484, "grad_norm": 0.6279364661837348, "learning_rate": 0.0001995390881205212, "loss": 3.21063232421875, "step": 2697, "token_acc": 0.27513481949644486 }, { "epoch": 1.5816476106713573, "grad_norm": 0.5812659942346973, "learning_rate": 0.0001995381581727094, "loss": 3.187704563140869, "step": 2698, "token_acc": 0.27986604887640754 }, { "epoch": 1.5822339489885664, "grad_norm": 0.5916367253251017, "learning_rate": 0.00019953722728987075, "loss": 3.1654257774353027, "step": 2699, "token_acc": 0.28133582679321756 }, { "epoch": 1.5828202873057755, "grad_norm": 0.5577317903217045, "learning_rate": 0.00019953629547201398, "loss": 3.217228412628174, "step": 2700, "token_acc": 0.27419637325609664 }, { "epoch": 1.5834066256229846, "grad_norm": 0.619298920858638, "learning_rate": 0.0001995353627191478, "loss": 3.206254482269287, "step": 2701, "token_acc": 0.27676129094422647 }, { "epoch": 1.5839929639401935, "grad_norm": 0.5749382188890245, "learning_rate": 0.00019953442903128106, "loss": 3.1901440620422363, "step": 2702, "token_acc": 0.27944942212554313 }, { "epoch": 1.5845793022574024, "grad_norm": 0.518724504608595, "learning_rate": 0.0001995334944084225, "loss": 3.2007551193237305, "step": 2703, "token_acc": 0.2781976286192018 }, { "epoch": 1.5851656405746115, "grad_norm": 0.5017412619657687, "learning_rate": 0.00019953255885058082, "loss": 3.158094882965088, "step": 2704, "token_acc": 0.2820509585639959 }, { "epoch": 1.5857519788918206, "grad_norm": 0.49752078884240086, "learning_rate": 0.0001995316223577649, "loss": 3.185879945755005, "step": 2705, "token_acc": 0.2782083482208537 }, { "epoch": 1.5863383172090297, "grad_norm": 0.4458699434766253, "learning_rate": 0.00019953068492998353, "loss": 3.163083553314209, "step": 2706, "token_acc": 0.28040390383596864 }, { "epoch": 1.5869246555262386, "grad_norm": 0.5639789481036582, "learning_rate": 0.00019952974656724546, "loss": 3.2054343223571777, "step": 2707, "token_acc": 0.2781341618619222 }, { "epoch": 1.5875109938434475, "grad_norm": 0.5635996894132868, "learning_rate": 0.00019952880726955953, "loss": 3.1374831199645996, "step": 2708, "token_acc": 0.28522633396174457 }, { "epoch": 1.5880973321606566, "grad_norm": 0.4575514254484415, "learning_rate": 0.00019952786703693461, "loss": 3.1733005046844482, "step": 2709, "token_acc": 0.27982315344827136 }, { "epoch": 1.5886836704778657, "grad_norm": 0.43108544564551143, "learning_rate": 0.00019952692586937948, "loss": 3.1765170097351074, "step": 2710, "token_acc": 0.2815371987566775 }, { "epoch": 1.5892700087950749, "grad_norm": 0.46493941079626905, "learning_rate": 0.000199525983766903, "loss": 3.1962127685546875, "step": 2711, "token_acc": 0.27802675738578203 }, { "epoch": 1.5898563471122837, "grad_norm": 0.5392401843766587, "learning_rate": 0.00019952504072951398, "loss": 3.1908326148986816, "step": 2712, "token_acc": 0.2791409961261202 }, { "epoch": 1.5904426854294929, "grad_norm": 0.4911716648950553, "learning_rate": 0.00019952409675722137, "loss": 3.133856773376465, "step": 2713, "token_acc": 0.2855261546000245 }, { "epoch": 1.5910290237467017, "grad_norm": 0.4706750182588433, "learning_rate": 0.00019952315185003396, "loss": 3.18925404548645, "step": 2714, "token_acc": 0.27776780197204076 }, { "epoch": 1.5916153620639109, "grad_norm": 0.5703214209218508, "learning_rate": 0.00019952220600796063, "loss": 3.239129066467285, "step": 2715, "token_acc": 0.2723365881121179 }, { "epoch": 1.59220170038112, "grad_norm": 0.5799968257842225, "learning_rate": 0.0001995212592310103, "loss": 3.225174903869629, "step": 2716, "token_acc": 0.274231573444851 }, { "epoch": 1.592788038698329, "grad_norm": 0.5976963321482966, "learning_rate": 0.00019952031151919183, "loss": 3.2250146865844727, "step": 2717, "token_acc": 0.27567759665128494 }, { "epoch": 1.593374377015538, "grad_norm": 0.5470508560318481, "learning_rate": 0.00019951936287251415, "loss": 3.174149513244629, "step": 2718, "token_acc": 0.2816732072972812 }, { "epoch": 1.5939607153327469, "grad_norm": 0.5089985677498646, "learning_rate": 0.00019951841329098616, "loss": 3.167757511138916, "step": 2719, "token_acc": 0.280138073321695 }, { "epoch": 1.594547053649956, "grad_norm": 0.5175469222570545, "learning_rate": 0.0001995174627746168, "loss": 3.190876007080078, "step": 2720, "token_acc": 0.27822243367891153 }, { "epoch": 1.595133391967165, "grad_norm": 0.4874609566704708, "learning_rate": 0.00019951651132341496, "loss": 3.1772279739379883, "step": 2721, "token_acc": 0.2787389515155891 }, { "epoch": 1.5957197302843742, "grad_norm": 0.5352029315649367, "learning_rate": 0.0001995155589373896, "loss": 3.1827778816223145, "step": 2722, "token_acc": 0.281116451297919 }, { "epoch": 1.596306068601583, "grad_norm": 0.7246851472059336, "learning_rate": 0.00019951460561654964, "loss": 3.19466495513916, "step": 2723, "token_acc": 0.27677593216642693 }, { "epoch": 1.5968924069187922, "grad_norm": 0.7422870273747878, "learning_rate": 0.00019951365136090408, "loss": 3.2076501846313477, "step": 2724, "token_acc": 0.2752647582564694 }, { "epoch": 1.597478745236001, "grad_norm": 0.46800913628303287, "learning_rate": 0.00019951269617046188, "loss": 3.1576504707336426, "step": 2725, "token_acc": 0.28191748949427425 }, { "epoch": 1.5980650835532102, "grad_norm": 0.6107549778916752, "learning_rate": 0.00019951174004523194, "loss": 3.1973519325256348, "step": 2726, "token_acc": 0.27642424589557874 }, { "epoch": 1.5986514218704193, "grad_norm": 0.6172861175810778, "learning_rate": 0.0001995107829852233, "loss": 3.2109103202819824, "step": 2727, "token_acc": 0.2767768183327798 }, { "epoch": 1.5992377601876284, "grad_norm": 0.525818498176962, "learning_rate": 0.00019950982499044502, "loss": 3.1743531227111816, "step": 2728, "token_acc": 0.28082511182724545 }, { "epoch": 1.5998240985048373, "grad_norm": 0.5983146747536802, "learning_rate": 0.00019950886606090598, "loss": 3.2880496978759766, "step": 2729, "token_acc": 0.26584680729288257 }, { "epoch": 1.6004104368220462, "grad_norm": 0.4831373520742412, "learning_rate": 0.00019950790619661522, "loss": 3.1485490798950195, "step": 2730, "token_acc": 0.2836629001883239 }, { "epoch": 1.6009967751392553, "grad_norm": 0.6913192914898139, "learning_rate": 0.0001995069453975818, "loss": 3.185734748840332, "step": 2731, "token_acc": 0.27820943271148185 }, { "epoch": 1.6015831134564644, "grad_norm": 0.6058190723599078, "learning_rate": 0.00019950598366381468, "loss": 3.21842622756958, "step": 2732, "token_acc": 0.27552854817867856 }, { "epoch": 1.6021694517736735, "grad_norm": 0.5033882498220025, "learning_rate": 0.00019950502099532296, "loss": 3.1752514839172363, "step": 2733, "token_acc": 0.28058724241852495 }, { "epoch": 1.6027557900908824, "grad_norm": 0.588131090385271, "learning_rate": 0.00019950405739211564, "loss": 3.2080841064453125, "step": 2734, "token_acc": 0.27592276668628507 }, { "epoch": 1.6033421284080913, "grad_norm": 0.5080300539935406, "learning_rate": 0.0001995030928542018, "loss": 3.2264797687530518, "step": 2735, "token_acc": 0.27153140444863944 }, { "epoch": 1.6039284667253004, "grad_norm": 0.5451005728266386, "learning_rate": 0.00019950212738159044, "loss": 3.2129735946655273, "step": 2736, "token_acc": 0.2755253702090592 }, { "epoch": 1.6045148050425095, "grad_norm": 0.5973111399793514, "learning_rate": 0.00019950116097429071, "loss": 3.2457523345947266, "step": 2737, "token_acc": 0.27139484380235684 }, { "epoch": 1.6051011433597187, "grad_norm": 0.5126073270540585, "learning_rate": 0.00019950019363231163, "loss": 3.1716482639312744, "step": 2738, "token_acc": 0.2805184499404841 }, { "epoch": 1.6056874816769275, "grad_norm": 0.41094424643029165, "learning_rate": 0.00019949922535566234, "loss": 3.160980224609375, "step": 2739, "token_acc": 0.2823145319602347 }, { "epoch": 1.6062738199941367, "grad_norm": 0.548692898926108, "learning_rate": 0.00019949825614435187, "loss": 3.1645078659057617, "step": 2740, "token_acc": 0.28048002368470754 }, { "epoch": 1.6068601583113455, "grad_norm": 0.48559433319884715, "learning_rate": 0.0001994972859983894, "loss": 3.173524856567383, "step": 2741, "token_acc": 0.28287436230226554 }, { "epoch": 1.6074464966285547, "grad_norm": 0.4460907615137774, "learning_rate": 0.00019949631491778398, "loss": 3.15598464012146, "step": 2742, "token_acc": 0.28203202286300516 }, { "epoch": 1.6080328349457638, "grad_norm": 0.45976814831807006, "learning_rate": 0.00019949534290254474, "loss": 3.16910457611084, "step": 2743, "token_acc": 0.28094700900226377 }, { "epoch": 1.6086191732629729, "grad_norm": 0.3915946096315081, "learning_rate": 0.00019949436995268086, "loss": 3.1151230335235596, "step": 2744, "token_acc": 0.2892059609069751 }, { "epoch": 1.6092055115801818, "grad_norm": 0.6197851649176394, "learning_rate": 0.0001994933960682014, "loss": 3.155679225921631, "step": 2745, "token_acc": 0.28426036862149345 }, { "epoch": 1.6097918498973907, "grad_norm": 0.637770260883947, "learning_rate": 0.0001994924212491156, "loss": 3.19277286529541, "step": 2746, "token_acc": 0.277201729030205 }, { "epoch": 1.6103781882145998, "grad_norm": 0.4495472654610107, "learning_rate": 0.00019949144549543253, "loss": 3.215533971786499, "step": 2747, "token_acc": 0.27371296076922874 }, { "epoch": 1.6109645265318089, "grad_norm": 0.49777329723678526, "learning_rate": 0.0001994904688071614, "loss": 3.1957359313964844, "step": 2748, "token_acc": 0.27660357646472933 }, { "epoch": 1.611550864849018, "grad_norm": 0.544561957830631, "learning_rate": 0.0001994894911843114, "loss": 3.2205286026000977, "step": 2749, "token_acc": 0.2740589779704717 }, { "epoch": 1.6121372031662269, "grad_norm": 0.4828421329681335, "learning_rate": 0.0001994885126268917, "loss": 3.1864824295043945, "step": 2750, "token_acc": 0.27895519251574136 }, { "epoch": 1.612723541483436, "grad_norm": 0.5080422877395361, "learning_rate": 0.0001994875331349115, "loss": 3.1510653495788574, "step": 2751, "token_acc": 0.2836355525579987 }, { "epoch": 1.6133098798006449, "grad_norm": 0.5548527930230237, "learning_rate": 0.00019948655270837993, "loss": 3.157036781311035, "step": 2752, "token_acc": 0.28403876786039767 }, { "epoch": 1.613896218117854, "grad_norm": 0.46216027103306473, "learning_rate": 0.00019948557134730628, "loss": 3.168409824371338, "step": 2753, "token_acc": 0.2813674256727674 }, { "epoch": 1.614482556435063, "grad_norm": 0.4858823496593671, "learning_rate": 0.00019948458905169977, "loss": 3.16196608543396, "step": 2754, "token_acc": 0.28132535713811996 }, { "epoch": 1.6150688947522722, "grad_norm": 0.517089754032563, "learning_rate": 0.0001994836058215696, "loss": 3.2013792991638184, "step": 2755, "token_acc": 0.27720342154794125 }, { "epoch": 1.6156552330694811, "grad_norm": 0.5278272720208319, "learning_rate": 0.000199482621656925, "loss": 3.1952462196350098, "step": 2756, "token_acc": 0.27911832004911186 }, { "epoch": 1.61624157138669, "grad_norm": 0.4625206974032342, "learning_rate": 0.00019948163655777518, "loss": 3.2086873054504395, "step": 2757, "token_acc": 0.2774865796939432 }, { "epoch": 1.6168279097038991, "grad_norm": 0.42925296836957116, "learning_rate": 0.0001994806505241295, "loss": 3.1837313175201416, "step": 2758, "token_acc": 0.2777092888410627 }, { "epoch": 1.6174142480211082, "grad_norm": 0.5208040772991201, "learning_rate": 0.00019947966355599714, "loss": 3.2236547470092773, "step": 2759, "token_acc": 0.2710851188856017 }, { "epoch": 1.6180005863383173, "grad_norm": 0.5824409233992457, "learning_rate": 0.00019947867565338738, "loss": 3.2049150466918945, "step": 2760, "token_acc": 0.2781085939574694 }, { "epoch": 1.6185869246555262, "grad_norm": 0.7489718278310702, "learning_rate": 0.00019947768681630951, "loss": 3.2080612182617188, "step": 2761, "token_acc": 0.27595411057843283 }, { "epoch": 1.6191732629727351, "grad_norm": 0.7295635521381536, "learning_rate": 0.00019947669704477284, "loss": 3.1222336292266846, "step": 2762, "token_acc": 0.2876163039669872 }, { "epoch": 1.6197596012899442, "grad_norm": 0.693750412192316, "learning_rate": 0.00019947570633878665, "loss": 3.151315689086914, "step": 2763, "token_acc": 0.2832147979918719 }, { "epoch": 1.6203459396071533, "grad_norm": 0.7141214163460657, "learning_rate": 0.00019947471469836022, "loss": 3.1913294792175293, "step": 2764, "token_acc": 0.2762640125182772 }, { "epoch": 1.6209322779243625, "grad_norm": 0.5977329495917485, "learning_rate": 0.00019947372212350293, "loss": 3.1339991092681885, "step": 2765, "token_acc": 0.2844116214982933 }, { "epoch": 1.6215186162415713, "grad_norm": 0.5402359990732961, "learning_rate": 0.000199472728614224, "loss": 3.1637227535247803, "step": 2766, "token_acc": 0.28120816137576576 }, { "epoch": 1.6221049545587805, "grad_norm": 0.6132452949488804, "learning_rate": 0.00019947173417053285, "loss": 3.179791212081909, "step": 2767, "token_acc": 0.2818497784617017 }, { "epoch": 1.6226912928759893, "grad_norm": 0.5474417208949522, "learning_rate": 0.00019947073879243883, "loss": 3.1509242057800293, "step": 2768, "token_acc": 0.28458485950716145 }, { "epoch": 1.6232776311931985, "grad_norm": 0.46283082939957776, "learning_rate": 0.00019946974247995124, "loss": 3.203836441040039, "step": 2769, "token_acc": 0.27665301255990193 }, { "epoch": 1.6238639695104076, "grad_norm": 0.5812805316464508, "learning_rate": 0.00019946874523307947, "loss": 3.176304340362549, "step": 2770, "token_acc": 0.2798545052075638 }, { "epoch": 1.6244503078276167, "grad_norm": 0.5505599696850706, "learning_rate": 0.00019946774705183285, "loss": 3.176671266555786, "step": 2771, "token_acc": 0.27985580311161706 }, { "epoch": 1.6250366461448256, "grad_norm": 0.5357277806754507, "learning_rate": 0.0001994667479362208, "loss": 3.178579092025757, "step": 2772, "token_acc": 0.28141497705282614 }, { "epoch": 1.6256229844620345, "grad_norm": 0.6477300828637749, "learning_rate": 0.00019946574788625267, "loss": 3.2067723274230957, "step": 2773, "token_acc": 0.2753825811491719 }, { "epoch": 1.6262093227792436, "grad_norm": 0.5183817930345591, "learning_rate": 0.00019946474690193787, "loss": 3.248183250427246, "step": 2774, "token_acc": 0.27132052554557057 }, { "epoch": 1.6267956610964527, "grad_norm": 0.4787561276120512, "learning_rate": 0.00019946374498328582, "loss": 3.1965436935424805, "step": 2775, "token_acc": 0.2777290393791798 }, { "epoch": 1.6273819994136618, "grad_norm": 0.5559703945608404, "learning_rate": 0.00019946274213030588, "loss": 3.216770648956299, "step": 2776, "token_acc": 0.2768947462877564 }, { "epoch": 1.6279683377308707, "grad_norm": 0.4279772556025272, "learning_rate": 0.0001994617383430075, "loss": 3.153958320617676, "step": 2777, "token_acc": 0.282367425317854 }, { "epoch": 1.6285546760480798, "grad_norm": 0.3907500799893467, "learning_rate": 0.0001994607336214002, "loss": 3.156097173690796, "step": 2778, "token_acc": 0.2828516197181266 }, { "epoch": 1.6291410143652887, "grad_norm": 0.4678167192536674, "learning_rate": 0.00019945972796549323, "loss": 3.1726467609405518, "step": 2779, "token_acc": 0.2798339177741428 }, { "epoch": 1.6297273526824978, "grad_norm": 0.41685268286936933, "learning_rate": 0.0001994587213752962, "loss": 3.216644287109375, "step": 2780, "token_acc": 0.27539233973682 }, { "epoch": 1.630313690999707, "grad_norm": 0.42632467830232973, "learning_rate": 0.00019945771385081852, "loss": 3.1680068969726562, "step": 2781, "token_acc": 0.28034242248304897 }, { "epoch": 1.630900029316916, "grad_norm": 0.48642994751214513, "learning_rate": 0.0001994567053920696, "loss": 3.178630828857422, "step": 2782, "token_acc": 0.2807507335631356 }, { "epoch": 1.631486367634125, "grad_norm": 0.6684322972811882, "learning_rate": 0.00019945569599905894, "loss": 3.1403369903564453, "step": 2783, "token_acc": 0.28564544796260943 }, { "epoch": 1.6320727059513338, "grad_norm": 0.6217527901632093, "learning_rate": 0.0001994546856717961, "loss": 3.173407554626465, "step": 2784, "token_acc": 0.2812266485003551 }, { "epoch": 1.632659044268543, "grad_norm": 0.447455355709841, "learning_rate": 0.00019945367441029043, "loss": 3.1835484504699707, "step": 2785, "token_acc": 0.28042461018670617 }, { "epoch": 1.633245382585752, "grad_norm": 0.5709725194838717, "learning_rate": 0.00019945266221455153, "loss": 3.148618698120117, "step": 2786, "token_acc": 0.28191742174891027 }, { "epoch": 1.6338317209029611, "grad_norm": 0.6456972940792354, "learning_rate": 0.00019945164908458888, "loss": 3.1668946743011475, "step": 2787, "token_acc": 0.2824771258615904 }, { "epoch": 1.63441805922017, "grad_norm": 0.5409629829091859, "learning_rate": 0.00019945063502041204, "loss": 3.217435121536255, "step": 2788, "token_acc": 0.2754680326833172 }, { "epoch": 1.635004397537379, "grad_norm": 0.5476724181885979, "learning_rate": 0.00019944962002203044, "loss": 3.222550392150879, "step": 2789, "token_acc": 0.2746753077743942 }, { "epoch": 1.635590735854588, "grad_norm": 0.5563318894192146, "learning_rate": 0.0001994486040894537, "loss": 3.2024638652801514, "step": 2790, "token_acc": 0.275915849585174 }, { "epoch": 1.6361770741717971, "grad_norm": 0.5108529937699117, "learning_rate": 0.00019944758722269132, "loss": 3.213064670562744, "step": 2791, "token_acc": 0.2746729773332048 }, { "epoch": 1.6367634124890063, "grad_norm": 0.4669508848959545, "learning_rate": 0.00019944656942175287, "loss": 3.1516754627227783, "step": 2792, "token_acc": 0.2810695622879796 }, { "epoch": 1.6373497508062151, "grad_norm": 0.5512676952212291, "learning_rate": 0.0001994455506866479, "loss": 3.2636749744415283, "step": 2793, "token_acc": 0.26851698925407724 }, { "epoch": 1.6379360891234243, "grad_norm": 0.7116761369698605, "learning_rate": 0.000199444531017386, "loss": 3.210907459259033, "step": 2794, "token_acc": 0.2757381205213318 }, { "epoch": 1.6385224274406331, "grad_norm": 0.5933682303517525, "learning_rate": 0.00019944351041397673, "loss": 3.1618857383728027, "step": 2795, "token_acc": 0.28142113891818066 }, { "epoch": 1.6391087657578423, "grad_norm": 0.5454498753958668, "learning_rate": 0.0001994424888764297, "loss": 3.1935489177703857, "step": 2796, "token_acc": 0.27928899562346 }, { "epoch": 1.6396951040750514, "grad_norm": 0.44898988179923366, "learning_rate": 0.00019944146640475446, "loss": 3.233721971511841, "step": 2797, "token_acc": 0.2727671048196832 }, { "epoch": 1.6402814423922605, "grad_norm": 0.5118375794797234, "learning_rate": 0.00019944044299896065, "loss": 3.232759475708008, "step": 2798, "token_acc": 0.27262206470242945 }, { "epoch": 1.6408677807094694, "grad_norm": 0.46028969475881437, "learning_rate": 0.00019943941865905787, "loss": 3.2208476066589355, "step": 2799, "token_acc": 0.2740351849494095 }, { "epoch": 1.6414541190266783, "grad_norm": 0.39303700313203016, "learning_rate": 0.00019943839338505576, "loss": 3.2424120903015137, "step": 2800, "token_acc": 0.2709996169653302 }, { "epoch": 1.6420404573438874, "grad_norm": 0.4456127668592045, "learning_rate": 0.00019943736717696392, "loss": 3.1765012741088867, "step": 2801, "token_acc": 0.27878677068096863 }, { "epoch": 1.6426267956610965, "grad_norm": 0.4070435637317442, "learning_rate": 0.000199436340034792, "loss": 3.12042236328125, "step": 2802, "token_acc": 0.2877063544734332 }, { "epoch": 1.6432131339783056, "grad_norm": 0.3966690487864105, "learning_rate": 0.0001994353119585497, "loss": 3.1749918460845947, "step": 2803, "token_acc": 0.28035081916432375 }, { "epoch": 1.6437994722955145, "grad_norm": 0.4093232052551597, "learning_rate": 0.0001994342829482466, "loss": 3.1941781044006348, "step": 2804, "token_acc": 0.27880860950659914 }, { "epoch": 1.6443858106127234, "grad_norm": 0.3810772658009836, "learning_rate": 0.00019943325300389244, "loss": 3.184230327606201, "step": 2805, "token_acc": 0.279265041260886 }, { "epoch": 1.6449721489299325, "grad_norm": 0.45041957857186704, "learning_rate": 0.00019943222212549683, "loss": 3.19789457321167, "step": 2806, "token_acc": 0.2778620645244381 }, { "epoch": 1.6455584872471416, "grad_norm": 0.5384638761927716, "learning_rate": 0.00019943119031306947, "loss": 3.1767196655273438, "step": 2807, "token_acc": 0.27961630695443646 }, { "epoch": 1.6461448255643507, "grad_norm": 0.5611399780308302, "learning_rate": 0.00019943015756662008, "loss": 3.186880588531494, "step": 2808, "token_acc": 0.277730273909917 }, { "epoch": 1.6467311638815598, "grad_norm": 0.5147937348325553, "learning_rate": 0.00019942912388615832, "loss": 3.1641759872436523, "step": 2809, "token_acc": 0.28220344265190156 }, { "epoch": 1.6473175021987687, "grad_norm": 0.5654180450263895, "learning_rate": 0.00019942808927169393, "loss": 3.181835651397705, "step": 2810, "token_acc": 0.2800724241731822 }, { "epoch": 1.6479038405159776, "grad_norm": 0.4710954340628746, "learning_rate": 0.00019942705372323665, "loss": 3.110374927520752, "step": 2811, "token_acc": 0.2905339968612091 }, { "epoch": 1.6484901788331867, "grad_norm": 0.641923435500879, "learning_rate": 0.00019942601724079614, "loss": 3.148357391357422, "step": 2812, "token_acc": 0.2847719991926427 }, { "epoch": 1.6490765171503958, "grad_norm": 0.7035982045092186, "learning_rate": 0.00019942497982438221, "loss": 3.2139718532562256, "step": 2813, "token_acc": 0.27800097019465536 }, { "epoch": 1.649662855467605, "grad_norm": 0.6533232878713798, "learning_rate": 0.00019942394147400458, "loss": 3.1651039123535156, "step": 2814, "token_acc": 0.27936322402774777 }, { "epoch": 1.6502491937848138, "grad_norm": 0.5097307738582846, "learning_rate": 0.00019942290218967297, "loss": 3.2052364349365234, "step": 2815, "token_acc": 0.2763967126035146 }, { "epoch": 1.6508355321020227, "grad_norm": 0.5340012826822406, "learning_rate": 0.00019942186197139717, "loss": 3.178485631942749, "step": 2816, "token_acc": 0.28015954427557277 }, { "epoch": 1.6514218704192318, "grad_norm": 0.5458380092394408, "learning_rate": 0.00019942082081918696, "loss": 3.128283977508545, "step": 2817, "token_acc": 0.28661020253216507 }, { "epoch": 1.652008208736441, "grad_norm": 0.5302909609158541, "learning_rate": 0.00019941977873305208, "loss": 3.1871461868286133, "step": 2818, "token_acc": 0.2791355145052794 }, { "epoch": 1.65259454705365, "grad_norm": 0.5545279452543547, "learning_rate": 0.00019941873571300238, "loss": 3.1247646808624268, "step": 2819, "token_acc": 0.28715901906460584 }, { "epoch": 1.653180885370859, "grad_norm": 0.5637508909733369, "learning_rate": 0.0001994176917590476, "loss": 3.2287073135375977, "step": 2820, "token_acc": 0.2727975106896406 }, { "epoch": 1.653767223688068, "grad_norm": 0.4334130476328985, "learning_rate": 0.00019941664687119761, "loss": 3.1863481998443604, "step": 2821, "token_acc": 0.2769217046741501 }, { "epoch": 1.654353562005277, "grad_norm": 0.5661342567934482, "learning_rate": 0.00019941560104946214, "loss": 3.1425609588623047, "step": 2822, "token_acc": 0.28483202234412275 }, { "epoch": 1.654939900322486, "grad_norm": 0.4724971327625286, "learning_rate": 0.00019941455429385113, "loss": 3.1545259952545166, "step": 2823, "token_acc": 0.2845815757493748 }, { "epoch": 1.6555262386396952, "grad_norm": 0.42441310994818526, "learning_rate": 0.0001994135066043743, "loss": 3.1781954765319824, "step": 2824, "token_acc": 0.2791140845395099 }, { "epoch": 1.6561125769569043, "grad_norm": 0.4805471471675668, "learning_rate": 0.00019941245798104154, "loss": 3.1931538581848145, "step": 2825, "token_acc": 0.2764317471032799 }, { "epoch": 1.6566989152741132, "grad_norm": 0.4490735814543902, "learning_rate": 0.0001994114084238627, "loss": 3.1688804626464844, "step": 2826, "token_acc": 0.2796020803880624 }, { "epoch": 1.657285253591322, "grad_norm": 0.4837428920863334, "learning_rate": 0.00019941035793284763, "loss": 3.180664300918579, "step": 2827, "token_acc": 0.27999723307118457 }, { "epoch": 1.6578715919085312, "grad_norm": 0.6014004035986636, "learning_rate": 0.00019940930650800623, "loss": 3.1675233840942383, "step": 2828, "token_acc": 0.28181392715021164 }, { "epoch": 1.6584579302257403, "grad_norm": 0.6296702881561701, "learning_rate": 0.0001994082541493483, "loss": 3.1702237129211426, "step": 2829, "token_acc": 0.28171520863661 }, { "epoch": 1.6590442685429494, "grad_norm": 0.4554945635315403, "learning_rate": 0.00019940720085688383, "loss": 3.200129747390747, "step": 2830, "token_acc": 0.2752150997189167 }, { "epoch": 1.6596306068601583, "grad_norm": 0.468217256444725, "learning_rate": 0.00019940614663062264, "loss": 3.1139862537384033, "step": 2831, "token_acc": 0.287813491589331 }, { "epoch": 1.6602169451773672, "grad_norm": 0.4843141143557423, "learning_rate": 0.00019940509147057465, "loss": 3.161217451095581, "step": 2832, "token_acc": 0.2827087828607178 }, { "epoch": 1.6608032834945763, "grad_norm": 0.5172996191376336, "learning_rate": 0.00019940403537674976, "loss": 3.174161434173584, "step": 2833, "token_acc": 0.28115372826499174 }, { "epoch": 1.6613896218117854, "grad_norm": 0.5258942444428928, "learning_rate": 0.00019940297834915793, "loss": 3.162675619125366, "step": 2834, "token_acc": 0.28078772839358296 }, { "epoch": 1.6619759601289945, "grad_norm": 0.5964774941323191, "learning_rate": 0.00019940192038780908, "loss": 3.19197416305542, "step": 2835, "token_acc": 0.27723134520978415 }, { "epoch": 1.6625622984462036, "grad_norm": 0.5935246696336092, "learning_rate": 0.0001994008614927131, "loss": 3.1661930084228516, "step": 2836, "token_acc": 0.28156718862608926 }, { "epoch": 1.6631486367634125, "grad_norm": 0.47680265846594466, "learning_rate": 0.00019939980166387998, "loss": 3.1605894565582275, "step": 2837, "token_acc": 0.2811075905914905 }, { "epoch": 1.6637349750806214, "grad_norm": 0.4575440808755721, "learning_rate": 0.00019939874090131967, "loss": 3.230023145675659, "step": 2838, "token_acc": 0.2740931006392277 }, { "epoch": 1.6643213133978305, "grad_norm": 0.5302389176435637, "learning_rate": 0.00019939767920504212, "loss": 3.191206693649292, "step": 2839, "token_acc": 0.2803357288368818 }, { "epoch": 1.6649076517150396, "grad_norm": 0.5891384990559664, "learning_rate": 0.00019939661657505733, "loss": 3.1735386848449707, "step": 2840, "token_acc": 0.2803433542674766 }, { "epoch": 1.6654939900322487, "grad_norm": 0.5056553838673572, "learning_rate": 0.00019939555301137527, "loss": 3.1374077796936035, "step": 2841, "token_acc": 0.28513276380859126 }, { "epoch": 1.6660803283494576, "grad_norm": 0.5211869762050975, "learning_rate": 0.0001993944885140059, "loss": 3.178830146789551, "step": 2842, "token_acc": 0.2801529820412386 }, { "epoch": 1.6666666666666665, "grad_norm": 0.5290404941384889, "learning_rate": 0.00019939342308295928, "loss": 3.1898064613342285, "step": 2843, "token_acc": 0.27836815514908336 }, { "epoch": 1.6672530049838756, "grad_norm": 0.5248195790713743, "learning_rate": 0.00019939235671824536, "loss": 3.194986343383789, "step": 2844, "token_acc": 0.27819109028808875 }, { "epoch": 1.6678393433010847, "grad_norm": 0.5342485486341926, "learning_rate": 0.0001993912894198742, "loss": 3.234722375869751, "step": 2845, "token_acc": 0.2717894357785718 }, { "epoch": 1.6684256816182939, "grad_norm": 0.5723597126667632, "learning_rate": 0.0001993902211878558, "loss": 3.1826910972595215, "step": 2846, "token_acc": 0.2799006350647 }, { "epoch": 1.6690120199355027, "grad_norm": 0.5875655218507776, "learning_rate": 0.0001993891520222002, "loss": 3.2054429054260254, "step": 2847, "token_acc": 0.2763533220517509 }, { "epoch": 1.6695983582527119, "grad_norm": 0.45210739813903605, "learning_rate": 0.00019938808192291742, "loss": 3.1546449661254883, "step": 2848, "token_acc": 0.2835193023624874 }, { "epoch": 1.6701846965699207, "grad_norm": 0.410651589274117, "learning_rate": 0.0001993870108900176, "loss": 3.158632755279541, "step": 2849, "token_acc": 0.2825274691294399 }, { "epoch": 1.6707710348871299, "grad_norm": 0.5076025403410077, "learning_rate": 0.0001993859389235107, "loss": 3.151688814163208, "step": 2850, "token_acc": 0.2817990995219111 }, { "epoch": 1.671357373204339, "grad_norm": 0.5655705922447349, "learning_rate": 0.00019938486602340684, "loss": 3.1843314170837402, "step": 2851, "token_acc": 0.2783322364600674 }, { "epoch": 1.671943711521548, "grad_norm": 0.5054898852378661, "learning_rate": 0.00019938379218971606, "loss": 3.244879722595215, "step": 2852, "token_acc": 0.2693527268138005 }, { "epoch": 1.672530049838757, "grad_norm": 0.5145462850095652, "learning_rate": 0.00019938271742244847, "loss": 3.1979732513427734, "step": 2853, "token_acc": 0.2773263196081988 }, { "epoch": 1.6731163881559659, "grad_norm": 0.5860352353182224, "learning_rate": 0.0001993816417216142, "loss": 3.1838808059692383, "step": 2854, "token_acc": 0.279310915503884 }, { "epoch": 1.673702726473175, "grad_norm": 0.531101742611855, "learning_rate": 0.0001993805650872233, "loss": 3.1591529846191406, "step": 2855, "token_acc": 0.28283665538030855 }, { "epoch": 1.674289064790384, "grad_norm": 0.505796845679621, "learning_rate": 0.00019937948751928592, "loss": 3.19934344291687, "step": 2856, "token_acc": 0.27863207889495456 }, { "epoch": 1.6748754031075932, "grad_norm": 0.5913169894322906, "learning_rate": 0.00019937840901781216, "loss": 3.218362808227539, "step": 2857, "token_acc": 0.27454606545970217 }, { "epoch": 1.675461741424802, "grad_norm": 0.47681022526918515, "learning_rate": 0.0001993773295828122, "loss": 3.187335729598999, "step": 2858, "token_acc": 0.2782354546217782 }, { "epoch": 1.676048079742011, "grad_norm": 0.48875808842423657, "learning_rate": 0.0001993762492142961, "loss": 3.1691136360168457, "step": 2859, "token_acc": 0.28176882419385774 }, { "epoch": 1.67663441805922, "grad_norm": 0.5893408164355202, "learning_rate": 0.00019937516791227407, "loss": 3.1616156101226807, "step": 2860, "token_acc": 0.2805149123662818 }, { "epoch": 1.6772207563764292, "grad_norm": 0.49386637891633156, "learning_rate": 0.00019937408567675624, "loss": 3.156148910522461, "step": 2861, "token_acc": 0.28219289263705577 }, { "epoch": 1.6778070946936383, "grad_norm": 0.5274373794424084, "learning_rate": 0.0001993730025077528, "loss": 3.195784568786621, "step": 2862, "token_acc": 0.27668566810426987 }, { "epoch": 1.6783934330108474, "grad_norm": 0.5805842082848377, "learning_rate": 0.00019937191840527387, "loss": 3.163623094558716, "step": 2863, "token_acc": 0.2815180458293623 }, { "epoch": 1.6789797713280563, "grad_norm": 0.5690415842433055, "learning_rate": 0.0001993708333693297, "loss": 3.207353115081787, "step": 2864, "token_acc": 0.27436192075097954 }, { "epoch": 1.6795661096452652, "grad_norm": 0.4920691316075731, "learning_rate": 0.00019936974739993043, "loss": 3.185525894165039, "step": 2865, "token_acc": 0.280002003780818 }, { "epoch": 1.6801524479624743, "grad_norm": 0.4295407615000572, "learning_rate": 0.0001993686604970863, "loss": 3.162886142730713, "step": 2866, "token_acc": 0.280847477088161 }, { "epoch": 1.6807387862796834, "grad_norm": 0.47064503676598063, "learning_rate": 0.00019936757266080752, "loss": 3.225635290145874, "step": 2867, "token_acc": 0.27406725135931587 }, { "epoch": 1.6813251245968925, "grad_norm": 0.45731242505805786, "learning_rate": 0.00019936648389110427, "loss": 3.191657781600952, "step": 2868, "token_acc": 0.2790455386540409 }, { "epoch": 1.6819114629141014, "grad_norm": 0.4245866315215397, "learning_rate": 0.00019936539418798684, "loss": 3.1368765830993652, "step": 2869, "token_acc": 0.2837512944739622 }, { "epoch": 1.6824978012313103, "grad_norm": 0.45209880995773777, "learning_rate": 0.0001993643035514654, "loss": 3.225928783416748, "step": 2870, "token_acc": 0.272837426717328 }, { "epoch": 1.6830841395485194, "grad_norm": 0.5005974334931792, "learning_rate": 0.00019936321198155024, "loss": 3.171158790588379, "step": 2871, "token_acc": 0.27911255157080567 }, { "epoch": 1.6836704778657285, "grad_norm": 0.5394073750087237, "learning_rate": 0.00019936211947825156, "loss": 3.1790037155151367, "step": 2872, "token_acc": 0.27890558369220725 }, { "epoch": 1.6842568161829377, "grad_norm": 0.3825921915654264, "learning_rate": 0.00019936102604157968, "loss": 3.15989351272583, "step": 2873, "token_acc": 0.28138625616671104 }, { "epoch": 1.6848431545001465, "grad_norm": 0.46364738182691906, "learning_rate": 0.00019935993167154487, "loss": 3.1814894676208496, "step": 2874, "token_acc": 0.2791355754550869 }, { "epoch": 1.6854294928173557, "grad_norm": 0.535513402830021, "learning_rate": 0.0001993588363681574, "loss": 3.1777539253234863, "step": 2875, "token_acc": 0.2805452131121931 }, { "epoch": 1.6860158311345645, "grad_norm": 0.5640000929035263, "learning_rate": 0.0001993577401314275, "loss": 3.195171594619751, "step": 2876, "token_acc": 0.2782568544991382 }, { "epoch": 1.6866021694517737, "grad_norm": 0.48746766377487216, "learning_rate": 0.00019935664296136555, "loss": 3.2331278324127197, "step": 2877, "token_acc": 0.27207686175189627 }, { "epoch": 1.6871885077689828, "grad_norm": 0.5287358491402241, "learning_rate": 0.00019935554485798183, "loss": 3.1588358879089355, "step": 2878, "token_acc": 0.2815789021012874 }, { "epoch": 1.6877748460861919, "grad_norm": 0.5550813944975893, "learning_rate": 0.00019935444582128663, "loss": 3.1451661586761475, "step": 2879, "token_acc": 0.2844005191578331 }, { "epoch": 1.6883611844034008, "grad_norm": 0.4958963197967804, "learning_rate": 0.0001993533458512903, "loss": 3.176665782928467, "step": 2880, "token_acc": 0.27947364202849184 }, { "epoch": 1.6889475227206097, "grad_norm": 0.4833627588146955, "learning_rate": 0.00019935224494800315, "loss": 3.151298999786377, "step": 2881, "token_acc": 0.28536362463160836 }, { "epoch": 1.6895338610378188, "grad_norm": 0.48442665402806734, "learning_rate": 0.00019935114311143558, "loss": 3.200427532196045, "step": 2882, "token_acc": 0.2765808244412231 }, { "epoch": 1.6901201993550279, "grad_norm": 0.4945639896516093, "learning_rate": 0.00019935004034159787, "loss": 3.172539234161377, "step": 2883, "token_acc": 0.2806612085212444 }, { "epoch": 1.690706537672237, "grad_norm": 0.5308268897978228, "learning_rate": 0.00019934893663850042, "loss": 3.1641626358032227, "step": 2884, "token_acc": 0.2804209247143987 }, { "epoch": 1.6912928759894459, "grad_norm": 0.42741321845992697, "learning_rate": 0.00019934783200215356, "loss": 3.199472188949585, "step": 2885, "token_acc": 0.2758055863710463 }, { "epoch": 1.6918792143066548, "grad_norm": 0.4461637606371874, "learning_rate": 0.00019934672643256768, "loss": 3.1667284965515137, "step": 2886, "token_acc": 0.2798052557547137 }, { "epoch": 1.692465552623864, "grad_norm": 0.37347174832680125, "learning_rate": 0.00019934561992975323, "loss": 3.148986577987671, "step": 2887, "token_acc": 0.2835601598121861 }, { "epoch": 1.693051890941073, "grad_norm": 0.4827196511133989, "learning_rate": 0.0001993445124937205, "loss": 3.1320087909698486, "step": 2888, "token_acc": 0.2863869140782155 }, { "epoch": 1.6936382292582821, "grad_norm": 0.5928113365653325, "learning_rate": 0.00019934340412448, "loss": 3.104896068572998, "step": 2889, "token_acc": 0.2885311382212618 }, { "epoch": 1.6942245675754912, "grad_norm": 0.5769506545441206, "learning_rate": 0.00019934229482204203, "loss": 3.1635777950286865, "step": 2890, "token_acc": 0.2805257529761754 }, { "epoch": 1.6948109058927001, "grad_norm": 0.5741931261972979, "learning_rate": 0.00019934118458641708, "loss": 3.188312292098999, "step": 2891, "token_acc": 0.27832153907422724 }, { "epoch": 1.695397244209909, "grad_norm": 0.5481898046958634, "learning_rate": 0.00019934007341761557, "loss": 3.2050981521606445, "step": 2892, "token_acc": 0.2754472905460247 }, { "epoch": 1.6959835825271181, "grad_norm": 0.5889394511835178, "learning_rate": 0.00019933896131564796, "loss": 3.191642999649048, "step": 2893, "token_acc": 0.27666597242910207 }, { "epoch": 1.6965699208443272, "grad_norm": 0.5215155630058546, "learning_rate": 0.00019933784828052463, "loss": 3.182464838027954, "step": 2894, "token_acc": 0.27765188553808584 }, { "epoch": 1.6971562591615363, "grad_norm": 0.4124214757736865, "learning_rate": 0.0001993367343122561, "loss": 3.1560251712799072, "step": 2895, "token_acc": 0.28177326663083474 }, { "epoch": 1.6977425974787452, "grad_norm": 0.4565107019668311, "learning_rate": 0.0001993356194108528, "loss": 3.163017749786377, "step": 2896, "token_acc": 0.27998352129357845 }, { "epoch": 1.6983289357959541, "grad_norm": 0.46896999094016806, "learning_rate": 0.00019933450357632518, "loss": 3.179335594177246, "step": 2897, "token_acc": 0.27935651549550733 }, { "epoch": 1.6989152741131632, "grad_norm": 0.54030386956148, "learning_rate": 0.0001993333868086838, "loss": 3.145297050476074, "step": 2898, "token_acc": 0.28323526192623016 }, { "epoch": 1.6995016124303723, "grad_norm": 0.5992877342188514, "learning_rate": 0.00019933226910793907, "loss": 3.1982274055480957, "step": 2899, "token_acc": 0.27621170065122236 }, { "epoch": 1.7000879507475815, "grad_norm": 0.4978431391472942, "learning_rate": 0.00019933115047410157, "loss": 3.1967391967773438, "step": 2900, "token_acc": 0.2771879983383139 }, { "epoch": 1.7006742890647903, "grad_norm": 0.3541573185415294, "learning_rate": 0.0001993300309071817, "loss": 3.1787989139556885, "step": 2901, "token_acc": 0.278938959475094 }, { "epoch": 1.7012606273819995, "grad_norm": 0.5585327563935711, "learning_rate": 0.0001993289104071901, "loss": 3.211782217025757, "step": 2902, "token_acc": 0.2758927825142093 }, { "epoch": 1.7018469656992083, "grad_norm": 0.472150796169749, "learning_rate": 0.00019932778897413717, "loss": 3.1785478591918945, "step": 2903, "token_acc": 0.2790779318136225 }, { "epoch": 1.7024333040164175, "grad_norm": 0.4999137610265311, "learning_rate": 0.00019932666660803355, "loss": 3.155221939086914, "step": 2904, "token_acc": 0.28294798827534534 }, { "epoch": 1.7030196423336266, "grad_norm": 0.4005926558521058, "learning_rate": 0.00019932554330888972, "loss": 3.183867931365967, "step": 2905, "token_acc": 0.2772364740503499 }, { "epoch": 1.7036059806508357, "grad_norm": 0.4284079358266169, "learning_rate": 0.00019932441907671627, "loss": 3.149411201477051, "step": 2906, "token_acc": 0.2830999812065401 }, { "epoch": 1.7041923189680446, "grad_norm": 0.41098272253810497, "learning_rate": 0.00019932329391152376, "loss": 3.1500651836395264, "step": 2907, "token_acc": 0.2852632483842481 }, { "epoch": 1.7047786572852535, "grad_norm": 0.4161714813761226, "learning_rate": 0.00019932216781332274, "loss": 3.133920669555664, "step": 2908, "token_acc": 0.28551931357716437 }, { "epoch": 1.7053649956024626, "grad_norm": 0.5852421122953465, "learning_rate": 0.00019932104078212377, "loss": 3.20621395111084, "step": 2909, "token_acc": 0.27520247352269317 }, { "epoch": 1.7059513339196717, "grad_norm": 0.5208252732729349, "learning_rate": 0.00019931991281793747, "loss": 3.199059009552002, "step": 2910, "token_acc": 0.276131851543956 }, { "epoch": 1.7065376722368808, "grad_norm": 0.43240647867487997, "learning_rate": 0.0001993187839207744, "loss": 3.231233596801758, "step": 2911, "token_acc": 0.27231693415137115 }, { "epoch": 1.7071240105540897, "grad_norm": 0.497565831245306, "learning_rate": 0.00019931765409064522, "loss": 3.1517786979675293, "step": 2912, "token_acc": 0.2827456479538712 }, { "epoch": 1.7077103488712986, "grad_norm": 0.5521926148334956, "learning_rate": 0.0001993165233275605, "loss": 3.1693263053894043, "step": 2913, "token_acc": 0.2823290453622207 }, { "epoch": 1.7082966871885077, "grad_norm": 0.47795892042186205, "learning_rate": 0.00019931539163153087, "loss": 3.1930737495422363, "step": 2914, "token_acc": 0.2781994977883777 }, { "epoch": 1.7088830255057168, "grad_norm": 0.525501649135695, "learning_rate": 0.000199314259002567, "loss": 3.158327102661133, "step": 2915, "token_acc": 0.2821428660696209 }, { "epoch": 1.709469363822926, "grad_norm": 0.5044557197640296, "learning_rate": 0.00019931312544067944, "loss": 3.150179386138916, "step": 2916, "token_acc": 0.2823472966088675 }, { "epoch": 1.7100557021401348, "grad_norm": 0.45471575655936647, "learning_rate": 0.00019931199094587893, "loss": 3.1563398838043213, "step": 2917, "token_acc": 0.2827311188241892 }, { "epoch": 1.710642040457344, "grad_norm": 0.4285700704788987, "learning_rate": 0.00019931085551817606, "loss": 3.167116641998291, "step": 2918, "token_acc": 0.28129183864371565 }, { "epoch": 1.7112283787745528, "grad_norm": 0.3865481749065967, "learning_rate": 0.00019930971915758155, "loss": 3.2068588733673096, "step": 2919, "token_acc": 0.27631501094609784 }, { "epoch": 1.711814717091762, "grad_norm": 0.3600539480078632, "learning_rate": 0.00019930858186410606, "loss": 3.141140937805176, "step": 2920, "token_acc": 0.2842637126483831 }, { "epoch": 1.712401055408971, "grad_norm": 0.4203679226575949, "learning_rate": 0.00019930744363776023, "loss": 3.1812448501586914, "step": 2921, "token_acc": 0.27773104633727097 }, { "epoch": 1.7129873937261801, "grad_norm": 0.45423744609617117, "learning_rate": 0.00019930630447855482, "loss": 3.1945416927337646, "step": 2922, "token_acc": 0.27645626667357354 }, { "epoch": 1.713573732043389, "grad_norm": 0.590883518978047, "learning_rate": 0.00019930516438650047, "loss": 3.165205240249634, "step": 2923, "token_acc": 0.2807734240565356 }, { "epoch": 1.714160070360598, "grad_norm": 0.6288219517397439, "learning_rate": 0.00019930402336160792, "loss": 3.1942429542541504, "step": 2924, "token_acc": 0.2786602284797641 }, { "epoch": 1.714746408677807, "grad_norm": 0.5029922238265038, "learning_rate": 0.0001993028814038879, "loss": 3.18017578125, "step": 2925, "token_acc": 0.2822263903759493 }, { "epoch": 1.7153327469950161, "grad_norm": 0.431579024019595, "learning_rate": 0.0001993017385133511, "loss": 3.1377241611480713, "step": 2926, "token_acc": 0.28584441373423175 }, { "epoch": 1.7159190853122253, "grad_norm": 0.4072035207708839, "learning_rate": 0.00019930059469000828, "loss": 3.139044761657715, "step": 2927, "token_acc": 0.28454719632136927 }, { "epoch": 1.7165054236294341, "grad_norm": 0.4686421589854534, "learning_rate": 0.0001992994499338702, "loss": 3.217597007751465, "step": 2928, "token_acc": 0.2740496111149091 }, { "epoch": 1.7170917619466433, "grad_norm": 0.49424836300744446, "learning_rate": 0.00019929830424494758, "loss": 3.1672630310058594, "step": 2929, "token_acc": 0.27884598131320254 }, { "epoch": 1.7176781002638521, "grad_norm": 0.3828704871206606, "learning_rate": 0.00019929715762325118, "loss": 3.1571407318115234, "step": 2930, "token_acc": 0.28220529348569195 }, { "epoch": 1.7182644385810613, "grad_norm": 0.4249963223417634, "learning_rate": 0.0001992960100687918, "loss": 3.155071258544922, "step": 2931, "token_acc": 0.28383223243134875 }, { "epoch": 1.7188507768982704, "grad_norm": 0.43678675139626283, "learning_rate": 0.00019929486158158026, "loss": 3.1963229179382324, "step": 2932, "token_acc": 0.2767800938601923 }, { "epoch": 1.7194371152154795, "grad_norm": 0.4222783201707346, "learning_rate": 0.00019929371216162724, "loss": 3.1996073722839355, "step": 2933, "token_acc": 0.2763362212060528 }, { "epoch": 1.7200234535326884, "grad_norm": 0.40576027123520764, "learning_rate": 0.00019929256180894363, "loss": 3.219005584716797, "step": 2934, "token_acc": 0.2743520359089452 }, { "epoch": 1.7206097918498973, "grad_norm": 0.4079332691990933, "learning_rate": 0.00019929141052354017, "loss": 3.1944832801818848, "step": 2935, "token_acc": 0.27836408198721796 }, { "epoch": 1.7211961301671064, "grad_norm": 0.40567734116732407, "learning_rate": 0.00019929025830542772, "loss": 3.1318769454956055, "step": 2936, "token_acc": 0.28480665580345954 }, { "epoch": 1.7217824684843155, "grad_norm": 0.4677194849468562, "learning_rate": 0.00019928910515461707, "loss": 3.127445697784424, "step": 2937, "token_acc": 0.28622995232179343 }, { "epoch": 1.7223688068015246, "grad_norm": 0.5505240163025872, "learning_rate": 0.0001992879510711191, "loss": 3.1670050621032715, "step": 2938, "token_acc": 0.282949513554612 }, { "epoch": 1.7229551451187335, "grad_norm": 0.46898597172313217, "learning_rate": 0.0001992867960549446, "loss": 3.1783666610717773, "step": 2939, "token_acc": 0.27973292817793766 }, { "epoch": 1.7235414834359424, "grad_norm": 0.4200944151114649, "learning_rate": 0.00019928564010610446, "loss": 3.1799914836883545, "step": 2940, "token_acc": 0.2791474867453043 }, { "epoch": 1.7241278217531515, "grad_norm": 0.47003934108964335, "learning_rate": 0.0001992844832246095, "loss": 3.1871261596679688, "step": 2941, "token_acc": 0.27674479797624535 }, { "epoch": 1.7247141600703606, "grad_norm": 0.4548504319968893, "learning_rate": 0.00019928332541047062, "loss": 3.145134210586548, "step": 2942, "token_acc": 0.2828656992456029 }, { "epoch": 1.7253004983875697, "grad_norm": 0.5929313399835197, "learning_rate": 0.00019928216666369866, "loss": 3.223623275756836, "step": 2943, "token_acc": 0.27338482728679925 }, { "epoch": 1.7258868367047786, "grad_norm": 0.4946492615337616, "learning_rate": 0.00019928100698430457, "loss": 3.1497507095336914, "step": 2944, "token_acc": 0.28352766821664094 }, { "epoch": 1.7264731750219877, "grad_norm": 0.44667284874524504, "learning_rate": 0.00019927984637229916, "loss": 3.1424336433410645, "step": 2945, "token_acc": 0.28461872035112157 }, { "epoch": 1.7270595133391966, "grad_norm": 0.4875574370392208, "learning_rate": 0.0001992786848276934, "loss": 3.139509677886963, "step": 2946, "token_acc": 0.28392719261490457 }, { "epoch": 1.7276458516564057, "grad_norm": 0.5383482075781147, "learning_rate": 0.00019927752235049818, "loss": 3.210911512374878, "step": 2947, "token_acc": 0.2761984728818451 }, { "epoch": 1.7282321899736148, "grad_norm": 0.5675300881849085, "learning_rate": 0.00019927635894072441, "loss": 3.204254388809204, "step": 2948, "token_acc": 0.2760426158997297 }, { "epoch": 1.728818528290824, "grad_norm": 0.5185310735844899, "learning_rate": 0.000199275194598383, "loss": 3.1933059692382812, "step": 2949, "token_acc": 0.2753251075158714 }, { "epoch": 1.7294048666080328, "grad_norm": 0.44282516835485364, "learning_rate": 0.00019927402932348495, "loss": 3.1646814346313477, "step": 2950, "token_acc": 0.2821495417370398 }, { "epoch": 1.7299912049252417, "grad_norm": 0.6059149019326739, "learning_rate": 0.00019927286311604116, "loss": 3.1785902976989746, "step": 2951, "token_acc": 0.2787837699549734 }, { "epoch": 1.7305775432424508, "grad_norm": 0.6009788122075265, "learning_rate": 0.00019927169597606259, "loss": 3.1773481369018555, "step": 2952, "token_acc": 0.2785320789447859 }, { "epoch": 1.73116388155966, "grad_norm": 0.4108489499660139, "learning_rate": 0.00019927052790356018, "loss": 3.190739154815674, "step": 2953, "token_acc": 0.2777231600696069 }, { "epoch": 1.731750219876869, "grad_norm": 0.5943709731097288, "learning_rate": 0.00019926935889854496, "loss": 3.1499979496002197, "step": 2954, "token_acc": 0.28312958305897784 }, { "epoch": 1.732336558194078, "grad_norm": 0.5570602788999452, "learning_rate": 0.00019926818896102785, "loss": 3.1789417266845703, "step": 2955, "token_acc": 0.2800467377901401 }, { "epoch": 1.732922896511287, "grad_norm": 0.45445196617147793, "learning_rate": 0.0001992670180910199, "loss": 3.168210983276367, "step": 2956, "token_acc": 0.2819754329651425 }, { "epoch": 1.733509234828496, "grad_norm": 0.5279809570234126, "learning_rate": 0.00019926584628853207, "loss": 3.1595590114593506, "step": 2957, "token_acc": 0.2831381109572995 }, { "epoch": 1.734095573145705, "grad_norm": 0.44546732674888434, "learning_rate": 0.00019926467355357538, "loss": 3.141796112060547, "step": 2958, "token_acc": 0.28208896778572545 }, { "epoch": 1.7346819114629142, "grad_norm": 0.5877392437830491, "learning_rate": 0.00019926349988616085, "loss": 3.2119336128234863, "step": 2959, "token_acc": 0.2753157357290625 }, { "epoch": 1.7352682497801233, "grad_norm": 0.607689834386909, "learning_rate": 0.0001992623252862995, "loss": 3.2077219486236572, "step": 2960, "token_acc": 0.2747951398700198 }, { "epoch": 1.7358545880973322, "grad_norm": 0.464827305686298, "learning_rate": 0.00019926114975400233, "loss": 3.1841320991516113, "step": 2961, "token_acc": 0.27840309944306885 }, { "epoch": 1.736440926414541, "grad_norm": 0.5245681655155779, "learning_rate": 0.00019925997328928044, "loss": 3.1783597469329834, "step": 2962, "token_acc": 0.27932300814306243 }, { "epoch": 1.7370272647317502, "grad_norm": 0.4342385783593552, "learning_rate": 0.00019925879589214484, "loss": 3.1499011516571045, "step": 2963, "token_acc": 0.2833291091944262 }, { "epoch": 1.7376136030489593, "grad_norm": 0.5632735135512961, "learning_rate": 0.00019925761756260662, "loss": 3.15573787689209, "step": 2964, "token_acc": 0.28356300499437803 }, { "epoch": 1.7381999413661684, "grad_norm": 0.6530433317404106, "learning_rate": 0.00019925643830067684, "loss": 3.247023105621338, "step": 2965, "token_acc": 0.26976083718972377 }, { "epoch": 1.7387862796833773, "grad_norm": 0.5584649856214898, "learning_rate": 0.00019925525810636654, "loss": 3.2268121242523193, "step": 2966, "token_acc": 0.2721100807782146 }, { "epoch": 1.7393726180005862, "grad_norm": 0.5187481398812246, "learning_rate": 0.00019925407697968687, "loss": 3.2307956218719482, "step": 2967, "token_acc": 0.2724502197370537 }, { "epoch": 1.7399589563177953, "grad_norm": 0.4993459761634379, "learning_rate": 0.00019925289492064887, "loss": 3.14756441116333, "step": 2968, "token_acc": 0.284447056937505 }, { "epoch": 1.7405452946350044, "grad_norm": 0.5812028092626107, "learning_rate": 0.00019925171192926368, "loss": 3.1684460639953613, "step": 2969, "token_acc": 0.2806512964356571 }, { "epoch": 1.7411316329522135, "grad_norm": 0.4636865440896602, "learning_rate": 0.0001992505280055424, "loss": 3.179443120956421, "step": 2970, "token_acc": 0.2778191494901732 }, { "epoch": 1.7417179712694224, "grad_norm": 0.5199778602800091, "learning_rate": 0.00019924934314949615, "loss": 3.167562961578369, "step": 2971, "token_acc": 0.2773112278059198 }, { "epoch": 1.7423043095866315, "grad_norm": 0.5894125515202733, "learning_rate": 0.00019924815736113604, "loss": 3.16648530960083, "step": 2972, "token_acc": 0.279594569755534 }, { "epoch": 1.7428906479038404, "grad_norm": 0.6045610857425243, "learning_rate": 0.00019924697064047325, "loss": 3.2132866382598877, "step": 2973, "token_acc": 0.2751497635596008 }, { "epoch": 1.7434769862210495, "grad_norm": 0.5335964002396725, "learning_rate": 0.00019924578298751892, "loss": 3.1057212352752686, "step": 2974, "token_acc": 0.2878994852723486 }, { "epoch": 1.7440633245382586, "grad_norm": 0.4881282210096283, "learning_rate": 0.00019924459440228418, "loss": 3.102341890335083, "step": 2975, "token_acc": 0.2914280976833117 }, { "epoch": 1.7446496628554677, "grad_norm": 0.4900026835507201, "learning_rate": 0.0001992434048847802, "loss": 3.1748886108398438, "step": 2976, "token_acc": 0.2813820432445144 }, { "epoch": 1.7452360011726766, "grad_norm": 0.4473743388228127, "learning_rate": 0.0001992422144350182, "loss": 3.161172389984131, "step": 2977, "token_acc": 0.28295660782049165 }, { "epoch": 1.7458223394898855, "grad_norm": 0.3869530721282537, "learning_rate": 0.0001992410230530093, "loss": 3.1615891456604004, "step": 2978, "token_acc": 0.28125982721989706 }, { "epoch": 1.7464086778070946, "grad_norm": 0.5210363465722914, "learning_rate": 0.0001992398307387647, "loss": 3.178642511367798, "step": 2979, "token_acc": 0.2799093771803463 }, { "epoch": 1.7469950161243037, "grad_norm": 0.49233621170987435, "learning_rate": 0.00019923863749229565, "loss": 3.1947317123413086, "step": 2980, "token_acc": 0.2767130119697093 }, { "epoch": 1.7475813544415129, "grad_norm": 0.5036711434451157, "learning_rate": 0.0001992374433136133, "loss": 3.186971664428711, "step": 2981, "token_acc": 0.2781111932674747 }, { "epoch": 1.7481676927587217, "grad_norm": 0.5637417984398475, "learning_rate": 0.00019923624820272892, "loss": 3.1288182735443115, "step": 2982, "token_acc": 0.2842603793982137 }, { "epoch": 1.7487540310759309, "grad_norm": 0.45974986236021675, "learning_rate": 0.0001992350521596537, "loss": 3.1544909477233887, "step": 2983, "token_acc": 0.2832700126997001 }, { "epoch": 1.7493403693931397, "grad_norm": 0.4974945816916409, "learning_rate": 0.00019923385518439888, "loss": 3.1807093620300293, "step": 2984, "token_acc": 0.276837415890835 }, { "epoch": 1.7499267077103489, "grad_norm": 0.5223041141785619, "learning_rate": 0.00019923265727697572, "loss": 3.149597644805908, "step": 2985, "token_acc": 0.2835854969595085 }, { "epoch": 1.750513046027558, "grad_norm": 0.4795707459014642, "learning_rate": 0.00019923145843739546, "loss": 3.155219793319702, "step": 2986, "token_acc": 0.282635992248865 }, { "epoch": 1.751099384344767, "grad_norm": 0.4800813183042214, "learning_rate": 0.00019923025866566934, "loss": 3.1162140369415283, "step": 2987, "token_acc": 0.28655299941605683 }, { "epoch": 1.751685722661976, "grad_norm": 0.47063526100676, "learning_rate": 0.00019922905796180868, "loss": 3.1883671283721924, "step": 2988, "token_acc": 0.2774695374265878 }, { "epoch": 1.7522720609791849, "grad_norm": 0.4740801150819172, "learning_rate": 0.00019922785632582474, "loss": 3.1551594734191895, "step": 2989, "token_acc": 0.28174274394414545 }, { "epoch": 1.752858399296394, "grad_norm": 0.42031980860106155, "learning_rate": 0.00019922665375772877, "loss": 3.0999274253845215, "step": 2990, "token_acc": 0.2921123869663792 }, { "epoch": 1.753444737613603, "grad_norm": 0.4279337941873416, "learning_rate": 0.0001992254502575321, "loss": 3.1406030654907227, "step": 2991, "token_acc": 0.28388867436032333 }, { "epoch": 1.7540310759308122, "grad_norm": 0.4822528199621917, "learning_rate": 0.00019922424582524605, "loss": 3.140556573867798, "step": 2992, "token_acc": 0.2823709581803621 }, { "epoch": 1.754617414248021, "grad_norm": 0.46709560319205784, "learning_rate": 0.00019922304046088193, "loss": 3.17539644241333, "step": 2993, "token_acc": 0.2804816219702862 }, { "epoch": 1.75520375256523, "grad_norm": 0.4735641819553356, "learning_rate": 0.00019922183416445105, "loss": 3.187039375305176, "step": 2994, "token_acc": 0.2771921039453918 }, { "epoch": 1.755790090882439, "grad_norm": 0.4243697906927226, "learning_rate": 0.0001992206269359647, "loss": 3.1152756214141846, "step": 2995, "token_acc": 0.286199438614131 }, { "epoch": 1.7563764291996482, "grad_norm": 0.3803997306002624, "learning_rate": 0.0001992194187754343, "loss": 3.159212827682495, "step": 2996, "token_acc": 0.28233056531132567 }, { "epoch": 1.7569627675168573, "grad_norm": 0.47718705361028896, "learning_rate": 0.00019921820968287114, "loss": 3.1570539474487305, "step": 2997, "token_acc": 0.2809807326764675 }, { "epoch": 1.7575491058340662, "grad_norm": 0.4334016567262529, "learning_rate": 0.00019921699965828662, "loss": 3.221966505050659, "step": 2998, "token_acc": 0.2720114493884986 }, { "epoch": 1.7581354441512753, "grad_norm": 0.4224043239909534, "learning_rate": 0.00019921578870169207, "loss": 3.139960289001465, "step": 2999, "token_acc": 0.2831893670949925 }, { "epoch": 1.7587217824684842, "grad_norm": 0.5760770644554536, "learning_rate": 0.0001992145768130989, "loss": 3.186737537384033, "step": 3000, "token_acc": 0.2788789185291265 }, { "epoch": 1.7593081207856933, "grad_norm": 0.5278759212283373, "learning_rate": 0.00019921336399251845, "loss": 3.180309295654297, "step": 3001, "token_acc": 0.27854078742595695 }, { "epoch": 1.7598944591029024, "grad_norm": 0.5175272863777317, "learning_rate": 0.00019921215023996214, "loss": 3.1663174629211426, "step": 3002, "token_acc": 0.2823732731222378 }, { "epoch": 1.7604807974201115, "grad_norm": 0.5391112315476505, "learning_rate": 0.00019921093555544137, "loss": 3.1592674255371094, "step": 3003, "token_acc": 0.2818145646797544 }, { "epoch": 1.7610671357373204, "grad_norm": 0.36222608133305795, "learning_rate": 0.00019920971993896754, "loss": 3.1571292877197266, "step": 3004, "token_acc": 0.28130298056669434 }, { "epoch": 1.7616534740545293, "grad_norm": 0.4627275609861067, "learning_rate": 0.0001992085033905521, "loss": 3.1610560417175293, "step": 3005, "token_acc": 0.2795801400064243 }, { "epoch": 1.7622398123717384, "grad_norm": 0.5197302972298847, "learning_rate": 0.00019920728591020644, "loss": 3.1341347694396973, "step": 3006, "token_acc": 0.2844290169037867 }, { "epoch": 1.7628261506889475, "grad_norm": 0.3826671986522429, "learning_rate": 0.000199206067497942, "loss": 3.1703736782073975, "step": 3007, "token_acc": 0.2788192245630119 }, { "epoch": 1.7634124890061567, "grad_norm": 0.3791269480787098, "learning_rate": 0.00019920484815377028, "loss": 3.132236957550049, "step": 3008, "token_acc": 0.2844655062234226 }, { "epoch": 1.7639988273233655, "grad_norm": 0.46593906643907695, "learning_rate": 0.00019920362787770267, "loss": 3.167722702026367, "step": 3009, "token_acc": 0.28091726384364823 }, { "epoch": 1.7645851656405747, "grad_norm": 0.5206506000069724, "learning_rate": 0.00019920240666975063, "loss": 3.1813013553619385, "step": 3010, "token_acc": 0.27919728675133726 }, { "epoch": 1.7651715039577835, "grad_norm": 0.6684871069170841, "learning_rate": 0.00019920118452992566, "loss": 3.1811587810516357, "step": 3011, "token_acc": 0.2798628190413729 }, { "epoch": 1.7657578422749927, "grad_norm": 0.4810106260010527, "learning_rate": 0.00019919996145823928, "loss": 3.149163246154785, "step": 3012, "token_acc": 0.28248165787102586 }, { "epoch": 1.7663441805922018, "grad_norm": 0.4199080589424211, "learning_rate": 0.0001991987374547029, "loss": 3.176048755645752, "step": 3013, "token_acc": 0.28029436286693443 }, { "epoch": 1.7669305189094109, "grad_norm": 0.656167772073662, "learning_rate": 0.00019919751251932805, "loss": 3.181730270385742, "step": 3014, "token_acc": 0.2788619373725757 }, { "epoch": 1.7675168572266198, "grad_norm": 0.6365634301450452, "learning_rate": 0.00019919628665212625, "loss": 3.1575212478637695, "step": 3015, "token_acc": 0.2834912413841664 }, { "epoch": 1.7681031955438287, "grad_norm": 0.6061216058130342, "learning_rate": 0.00019919505985310903, "loss": 3.1693007946014404, "step": 3016, "token_acc": 0.28168481568282167 }, { "epoch": 1.7686895338610378, "grad_norm": 0.5224879460581037, "learning_rate": 0.00019919383212228787, "loss": 3.177699089050293, "step": 3017, "token_acc": 0.27810073050913203 }, { "epoch": 1.7692758721782469, "grad_norm": 0.4708567621285417, "learning_rate": 0.00019919260345967432, "loss": 3.159008264541626, "step": 3018, "token_acc": 0.2821663586934847 }, { "epoch": 1.769862210495456, "grad_norm": 0.569703262239816, "learning_rate": 0.00019919137386527992, "loss": 3.1542842388153076, "step": 3019, "token_acc": 0.28311320853762245 }, { "epoch": 1.770448548812665, "grad_norm": 0.5081818788344902, "learning_rate": 0.0001991901433391162, "loss": 3.203446626663208, "step": 3020, "token_acc": 0.2757139325009272 }, { "epoch": 1.7710348871298738, "grad_norm": 0.37851780497659443, "learning_rate": 0.00019918891188119479, "loss": 3.192237615585327, "step": 3021, "token_acc": 0.27776048751838484 }, { "epoch": 1.771621225447083, "grad_norm": 0.5072145605374052, "learning_rate": 0.0001991876794915272, "loss": 3.1420278549194336, "step": 3022, "token_acc": 0.28276179634438037 }, { "epoch": 1.772207563764292, "grad_norm": 0.42189941961971417, "learning_rate": 0.000199186446170125, "loss": 3.1737537384033203, "step": 3023, "token_acc": 0.279103261384525 }, { "epoch": 1.7727939020815011, "grad_norm": 0.43456976457360336, "learning_rate": 0.00019918521191699982, "loss": 3.1521992683410645, "step": 3024, "token_acc": 0.2824392609228256 }, { "epoch": 1.77338024039871, "grad_norm": 0.5967042142888881, "learning_rate": 0.00019918397673216321, "loss": 3.148320198059082, "step": 3025, "token_acc": 0.2832938618237096 }, { "epoch": 1.7739665787159191, "grad_norm": 0.5760626797180998, "learning_rate": 0.0001991827406156268, "loss": 3.2021431922912598, "step": 3026, "token_acc": 0.2751859699966751 }, { "epoch": 1.774552917033128, "grad_norm": 0.5881445536292035, "learning_rate": 0.0001991815035674022, "loss": 3.160079002380371, "step": 3027, "token_acc": 0.28160910509831033 }, { "epoch": 1.7751392553503371, "grad_norm": 0.6072959336180391, "learning_rate": 0.000199180265587501, "loss": 3.178093671798706, "step": 3028, "token_acc": 0.2801709860088017 }, { "epoch": 1.7757255936675462, "grad_norm": 0.4275048324707711, "learning_rate": 0.00019917902667593486, "loss": 3.1129212379455566, "step": 3029, "token_acc": 0.289480570456627 }, { "epoch": 1.7763119319847553, "grad_norm": 0.62312483881686, "learning_rate": 0.00019917778683271542, "loss": 3.1683366298675537, "step": 3030, "token_acc": 0.27920484358164543 }, { "epoch": 1.7768982703019642, "grad_norm": 0.5855244741365908, "learning_rate": 0.0001991765460578543, "loss": 3.1667299270629883, "step": 3031, "token_acc": 0.2795283266566668 }, { "epoch": 1.7774846086191731, "grad_norm": 0.4476736547873714, "learning_rate": 0.00019917530435136315, "loss": 3.1795425415039062, "step": 3032, "token_acc": 0.2780890166001695 }, { "epoch": 1.7780709469363822, "grad_norm": 0.4544646699813525, "learning_rate": 0.00019917406171325372, "loss": 3.1279683113098145, "step": 3033, "token_acc": 0.28464937491018827 }, { "epoch": 1.7786572852535913, "grad_norm": 0.5585388899459923, "learning_rate": 0.00019917281814353757, "loss": 3.1551103591918945, "step": 3034, "token_acc": 0.2820819062360247 }, { "epoch": 1.7792436235708005, "grad_norm": 0.5288719587818126, "learning_rate": 0.00019917157364222646, "loss": 3.1912384033203125, "step": 3035, "token_acc": 0.27838537919125056 }, { "epoch": 1.7798299618880093, "grad_norm": 0.3976081046917742, "learning_rate": 0.000199170328209332, "loss": 3.155442953109741, "step": 3036, "token_acc": 0.281032449059833 }, { "epoch": 1.7804163002052185, "grad_norm": 0.4651851421845836, "learning_rate": 0.000199169081844866, "loss": 3.1420576572418213, "step": 3037, "token_acc": 0.2857071839539088 }, { "epoch": 1.7810026385224274, "grad_norm": 0.4282274246645864, "learning_rate": 0.0001991678345488401, "loss": 3.177009344100952, "step": 3038, "token_acc": 0.27881092756124404 }, { "epoch": 1.7815889768396365, "grad_norm": 0.4288496461862397, "learning_rate": 0.000199166586321266, "loss": 3.1417412757873535, "step": 3039, "token_acc": 0.2827716383923786 }, { "epoch": 1.7821753151568456, "grad_norm": 0.40504853086945547, "learning_rate": 0.00019916533716215544, "loss": 3.1637840270996094, "step": 3040, "token_acc": 0.27994391960460285 }, { "epoch": 1.7827616534740547, "grad_norm": 0.4298805277151954, "learning_rate": 0.0001991640870715202, "loss": 3.1684818267822266, "step": 3041, "token_acc": 0.28085395330924645 }, { "epoch": 1.7833479917912636, "grad_norm": 0.48612480313029516, "learning_rate": 0.00019916283604937197, "loss": 3.147237777709961, "step": 3042, "token_acc": 0.2839322341699118 }, { "epoch": 1.7839343301084725, "grad_norm": 0.4735647922796634, "learning_rate": 0.00019916158409572253, "loss": 3.201700448989868, "step": 3043, "token_acc": 0.2793902308678923 }, { "epoch": 1.7845206684256816, "grad_norm": 0.47738699892800107, "learning_rate": 0.0001991603312105836, "loss": 3.154893398284912, "step": 3044, "token_acc": 0.28147513754801207 }, { "epoch": 1.7851070067428907, "grad_norm": 0.5457774547215198, "learning_rate": 0.00019915907739396702, "loss": 3.14322566986084, "step": 3045, "token_acc": 0.2843496606461381 }, { "epoch": 1.7856933450600998, "grad_norm": 0.6286875772424948, "learning_rate": 0.00019915782264588448, "loss": 3.175854206085205, "step": 3046, "token_acc": 0.28110637141403133 }, { "epoch": 1.7862796833773087, "grad_norm": 0.4347377571502573, "learning_rate": 0.00019915656696634787, "loss": 3.20487904548645, "step": 3047, "token_acc": 0.27684753305095283 }, { "epoch": 1.7868660216945176, "grad_norm": 0.41599768451301034, "learning_rate": 0.00019915531035536889, "loss": 3.1952695846557617, "step": 3048, "token_acc": 0.27704563179423514 }, { "epoch": 1.7874523600117267, "grad_norm": 0.5451411727324161, "learning_rate": 0.00019915405281295935, "loss": 3.126847743988037, "step": 3049, "token_acc": 0.28492401262253875 }, { "epoch": 1.7880386983289358, "grad_norm": 0.5327343397423094, "learning_rate": 0.00019915279433913114, "loss": 3.1617894172668457, "step": 3050, "token_acc": 0.2805701092265341 }, { "epoch": 1.788625036646145, "grad_norm": 0.6607358887310494, "learning_rate": 0.00019915153493389604, "loss": 3.1562929153442383, "step": 3051, "token_acc": 0.2825911016558186 }, { "epoch": 1.7892113749633538, "grad_norm": 0.48660973076480024, "learning_rate": 0.00019915027459726587, "loss": 3.141599655151367, "step": 3052, "token_acc": 0.2835019790428629 }, { "epoch": 1.789797713280563, "grad_norm": 0.5029640909091398, "learning_rate": 0.0001991490133292525, "loss": 3.1314804553985596, "step": 3053, "token_acc": 0.28575136743628443 }, { "epoch": 1.7903840515977718, "grad_norm": 0.4483339364013523, "learning_rate": 0.0001991477511298677, "loss": 3.1960182189941406, "step": 3054, "token_acc": 0.2757918220285726 }, { "epoch": 1.790970389914981, "grad_norm": 0.4076418437939134, "learning_rate": 0.00019914648799912343, "loss": 3.1197569370269775, "step": 3055, "token_acc": 0.28692908517720045 }, { "epoch": 1.79155672823219, "grad_norm": 0.4656427706249111, "learning_rate": 0.00019914522393703148, "loss": 3.1805639266967773, "step": 3056, "token_acc": 0.2792051125278751 }, { "epoch": 1.7921430665493991, "grad_norm": 0.3995577284508524, "learning_rate": 0.00019914395894360376, "loss": 3.1196675300598145, "step": 3057, "token_acc": 0.287803298357192 }, { "epoch": 1.792729404866608, "grad_norm": 0.3999947543571872, "learning_rate": 0.00019914269301885216, "loss": 3.1162376403808594, "step": 3058, "token_acc": 0.28868699573169915 }, { "epoch": 1.793315743183817, "grad_norm": 0.4848404806778235, "learning_rate": 0.00019914142616278853, "loss": 3.151334524154663, "step": 3059, "token_acc": 0.2840709265143338 }, { "epoch": 1.793902081501026, "grad_norm": 0.5072766140544103, "learning_rate": 0.0001991401583754248, "loss": 3.2069458961486816, "step": 3060, "token_acc": 0.275489189146041 }, { "epoch": 1.7944884198182351, "grad_norm": 0.42656632984038606, "learning_rate": 0.00019913888965677288, "loss": 3.173333168029785, "step": 3061, "token_acc": 0.27925479802407005 }, { "epoch": 1.7950747581354443, "grad_norm": 0.5381640481886466, "learning_rate": 0.0001991376200068447, "loss": 3.1604881286621094, "step": 3062, "token_acc": 0.28115659502324025 }, { "epoch": 1.7956610964526531, "grad_norm": 0.6167835803124507, "learning_rate": 0.00019913634942565214, "loss": 3.2319087982177734, "step": 3063, "token_acc": 0.27010222636096465 }, { "epoch": 1.7962474347698623, "grad_norm": 0.5446378898704225, "learning_rate": 0.00019913507791320716, "loss": 3.204888343811035, "step": 3064, "token_acc": 0.27742048163064026 }, { "epoch": 1.7968337730870712, "grad_norm": 0.46935161188374286, "learning_rate": 0.00019913380546952175, "loss": 3.116142749786377, "step": 3065, "token_acc": 0.2882703049171321 }, { "epoch": 1.7974201114042803, "grad_norm": 0.4913748013302024, "learning_rate": 0.0001991325320946078, "loss": 3.1805639266967773, "step": 3066, "token_acc": 0.2785570981031671 }, { "epoch": 1.7980064497214894, "grad_norm": 0.4811261140490574, "learning_rate": 0.00019913125778847727, "loss": 3.1658692359924316, "step": 3067, "token_acc": 0.28080753602546915 }, { "epoch": 1.7985927880386985, "grad_norm": 0.5044322634295477, "learning_rate": 0.00019912998255114218, "loss": 3.2018842697143555, "step": 3068, "token_acc": 0.2766618925260341 }, { "epoch": 1.7991791263559074, "grad_norm": 0.4577689454026066, "learning_rate": 0.0001991287063826145, "loss": 3.218240261077881, "step": 3069, "token_acc": 0.27432852023635884 }, { "epoch": 1.7997654646731163, "grad_norm": 0.4946044488876774, "learning_rate": 0.00019912742928290619, "loss": 3.1451101303100586, "step": 3070, "token_acc": 0.2819843874202556 }, { "epoch": 1.8003518029903254, "grad_norm": 0.5411147607557574, "learning_rate": 0.00019912615125202923, "loss": 3.2268686294555664, "step": 3071, "token_acc": 0.2741400034342625 }, { "epoch": 1.8009381413075345, "grad_norm": 0.4538215012039641, "learning_rate": 0.00019912487228999565, "loss": 3.1676385402679443, "step": 3072, "token_acc": 0.2793928795503998 }, { "epoch": 1.8015244796247436, "grad_norm": 0.40272706270350467, "learning_rate": 0.0001991235923968175, "loss": 3.149338483810425, "step": 3073, "token_acc": 0.2834419796883512 }, { "epoch": 1.8021108179419525, "grad_norm": 0.4820116694752974, "learning_rate": 0.00019912231157250676, "loss": 3.1644253730773926, "step": 3074, "token_acc": 0.2815013196507523 }, { "epoch": 1.8026971562591614, "grad_norm": 0.37437633237231566, "learning_rate": 0.0001991210298170755, "loss": 3.14346981048584, "step": 3075, "token_acc": 0.28332368268674 }, { "epoch": 1.8032834945763705, "grad_norm": 0.4542849510565713, "learning_rate": 0.00019911974713053568, "loss": 3.1468682289123535, "step": 3076, "token_acc": 0.2828481960932949 }, { "epoch": 1.8038698328935796, "grad_norm": 0.4786750086557114, "learning_rate": 0.00019911846351289945, "loss": 3.2040350437164307, "step": 3077, "token_acc": 0.2752003594853844 }, { "epoch": 1.8044561712107887, "grad_norm": 0.4683742858060124, "learning_rate": 0.00019911717896417877, "loss": 3.1499221324920654, "step": 3078, "token_acc": 0.2820556092521849 }, { "epoch": 1.8050425095279976, "grad_norm": 0.4596835123780443, "learning_rate": 0.0001991158934843858, "loss": 3.1615498065948486, "step": 3079, "token_acc": 0.28226946178306445 }, { "epoch": 1.8056288478452067, "grad_norm": 0.3979062359320113, "learning_rate": 0.00019911460707353252, "loss": 3.1861116886138916, "step": 3080, "token_acc": 0.2778765450334724 }, { "epoch": 1.8062151861624156, "grad_norm": 0.539597419602727, "learning_rate": 0.0001991133197316311, "loss": 3.175027370452881, "step": 3081, "token_acc": 0.2790517783594051 }, { "epoch": 1.8068015244796247, "grad_norm": 0.5410597373827273, "learning_rate": 0.00019911203145869363, "loss": 3.1609606742858887, "step": 3082, "token_acc": 0.28009506219082186 }, { "epoch": 1.8073878627968338, "grad_norm": 0.45497342033714855, "learning_rate": 0.0001991107422547321, "loss": 3.163029670715332, "step": 3083, "token_acc": 0.28220511931280096 }, { "epoch": 1.807974201114043, "grad_norm": 0.46070598860710826, "learning_rate": 0.00019910945211975878, "loss": 3.1028380393981934, "step": 3084, "token_acc": 0.2891598972581285 }, { "epoch": 1.8085605394312518, "grad_norm": 0.3973208565188474, "learning_rate": 0.00019910816105378566, "loss": 3.174038887023926, "step": 3085, "token_acc": 0.2800026711185309 }, { "epoch": 1.8091468777484607, "grad_norm": 0.5937607544865409, "learning_rate": 0.00019910686905682493, "loss": 3.144839286804199, "step": 3086, "token_acc": 0.28407432680699174 }, { "epoch": 1.8097332160656698, "grad_norm": 0.44947229912356496, "learning_rate": 0.0001991055761288887, "loss": 3.121633529663086, "step": 3087, "token_acc": 0.2861460381906997 }, { "epoch": 1.810319554382879, "grad_norm": 0.4881463974021204, "learning_rate": 0.00019910428226998917, "loss": 3.191561698913574, "step": 3088, "token_acc": 0.2780304822771585 }, { "epoch": 1.810905892700088, "grad_norm": 0.4986072395865644, "learning_rate": 0.0001991029874801384, "loss": 3.105213165283203, "step": 3089, "token_acc": 0.2889542538437207 }, { "epoch": 1.811492231017297, "grad_norm": 0.39515824330155225, "learning_rate": 0.00019910169175934862, "loss": 3.141718864440918, "step": 3090, "token_acc": 0.28354780383614947 }, { "epoch": 1.812078569334506, "grad_norm": 0.44609364501065435, "learning_rate": 0.000199100395107632, "loss": 3.207822799682617, "step": 3091, "token_acc": 0.27437709957327594 }, { "epoch": 1.812664907651715, "grad_norm": 0.39730960624598455, "learning_rate": 0.00019909909752500072, "loss": 3.1471433639526367, "step": 3092, "token_acc": 0.28447041859192285 }, { "epoch": 1.813251245968924, "grad_norm": 0.4299903857887841, "learning_rate": 0.00019909779901146696, "loss": 3.0998849868774414, "step": 3093, "token_acc": 0.2882964324801995 }, { "epoch": 1.8138375842861332, "grad_norm": 0.42318956721727574, "learning_rate": 0.00019909649956704288, "loss": 3.146383285522461, "step": 3094, "token_acc": 0.2846735484499744 }, { "epoch": 1.8144239226033423, "grad_norm": 0.3964355897550008, "learning_rate": 0.00019909519919174073, "loss": 3.156064987182617, "step": 3095, "token_acc": 0.28207419695773744 }, { "epoch": 1.8150102609205512, "grad_norm": 0.3886094065854018, "learning_rate": 0.0001990938978855727, "loss": 3.1582894325256348, "step": 3096, "token_acc": 0.2823546332005199 }, { "epoch": 1.81559659923776, "grad_norm": 0.4322618227611294, "learning_rate": 0.00019909259564855107, "loss": 3.126788854598999, "step": 3097, "token_acc": 0.2877920745701149 }, { "epoch": 1.8161829375549692, "grad_norm": 0.49541260098442413, "learning_rate": 0.000199091292480688, "loss": 3.12581205368042, "step": 3098, "token_acc": 0.2866413067800403 }, { "epoch": 1.8167692758721783, "grad_norm": 0.5257304249492958, "learning_rate": 0.0001990899883819958, "loss": 3.19355845451355, "step": 3099, "token_acc": 0.2769926104760851 }, { "epoch": 1.8173556141893874, "grad_norm": 0.5266293716946155, "learning_rate": 0.00019908868335248665, "loss": 3.18611741065979, "step": 3100, "token_acc": 0.2790668954746952 }, { "epoch": 1.8179419525065963, "grad_norm": 0.530398560636547, "learning_rate": 0.00019908737739217288, "loss": 3.201910972595215, "step": 3101, "token_acc": 0.27632447555378586 }, { "epoch": 1.8185282908238052, "grad_norm": 0.5592629899128742, "learning_rate": 0.0001990860705010667, "loss": 3.1702399253845215, "step": 3102, "token_acc": 0.27971160674770223 }, { "epoch": 1.8191146291410143, "grad_norm": 0.4928914900045002, "learning_rate": 0.0001990847626791804, "loss": 3.1639957427978516, "step": 3103, "token_acc": 0.2814083925195036 }, { "epoch": 1.8197009674582234, "grad_norm": 0.4619609014090586, "learning_rate": 0.00019908345392652627, "loss": 3.1560728549957275, "step": 3104, "token_acc": 0.28103688870317955 }, { "epoch": 1.8202873057754325, "grad_norm": 0.46498529137086914, "learning_rate": 0.00019908214424311659, "loss": 3.1222422122955322, "step": 3105, "token_acc": 0.28770440183283025 }, { "epoch": 1.8208736440926414, "grad_norm": 0.3822225674850646, "learning_rate": 0.00019908083362896374, "loss": 3.158790111541748, "step": 3106, "token_acc": 0.2818113135268579 }, { "epoch": 1.8214599824098505, "grad_norm": 0.45697850826883385, "learning_rate": 0.00019907952208407994, "loss": 3.168910503387451, "step": 3107, "token_acc": 0.2804518996633234 }, { "epoch": 1.8220463207270594, "grad_norm": 0.5235350239621589, "learning_rate": 0.00019907820960847751, "loss": 3.1152772903442383, "step": 3108, "token_acc": 0.287955321479604 }, { "epoch": 1.8226326590442685, "grad_norm": 0.4414073006027613, "learning_rate": 0.00019907689620216886, "loss": 3.1422181129455566, "step": 3109, "token_acc": 0.2832322958648017 }, { "epoch": 1.8232189973614776, "grad_norm": 0.47080282638609633, "learning_rate": 0.00019907558186516627, "loss": 3.181764602661133, "step": 3110, "token_acc": 0.2803719336435384 }, { "epoch": 1.8238053356786867, "grad_norm": 0.5955304612722607, "learning_rate": 0.00019907426659748209, "loss": 3.1344666481018066, "step": 3111, "token_acc": 0.2840586464393233 }, { "epoch": 1.8243916739958956, "grad_norm": 0.5011131899334585, "learning_rate": 0.00019907295039912865, "loss": 3.1668143272399902, "step": 3112, "token_acc": 0.2806568501851981 }, { "epoch": 1.8249780123131045, "grad_norm": 0.3836304485547096, "learning_rate": 0.00019907163327011836, "loss": 3.1633336544036865, "step": 3113, "token_acc": 0.28461729602346963 }, { "epoch": 1.8255643506303136, "grad_norm": 0.5570824636065277, "learning_rate": 0.00019907031521046358, "loss": 3.139370918273926, "step": 3114, "token_acc": 0.28400644443233736 }, { "epoch": 1.8261506889475227, "grad_norm": 0.48508610386875756, "learning_rate": 0.0001990689962201767, "loss": 3.155259132385254, "step": 3115, "token_acc": 0.28220868759026685 }, { "epoch": 1.8267370272647319, "grad_norm": 0.4578595611560492, "learning_rate": 0.00019906767629927007, "loss": 3.1544992923736572, "step": 3116, "token_acc": 0.2832061108127092 }, { "epoch": 1.8273233655819408, "grad_norm": 0.4664607070688245, "learning_rate": 0.00019906635544775613, "loss": 3.16265869140625, "step": 3117, "token_acc": 0.28168601181982805 }, { "epoch": 1.8279097038991499, "grad_norm": 0.542635133224902, "learning_rate": 0.00019906503366564726, "loss": 3.124886989593506, "step": 3118, "token_acc": 0.28445999281441525 }, { "epoch": 1.8284960422163588, "grad_norm": 0.37000960648454784, "learning_rate": 0.00019906371095295593, "loss": 3.1056067943573, "step": 3119, "token_acc": 0.28758862933026913 }, { "epoch": 1.8290823805335679, "grad_norm": 0.48343534455547665, "learning_rate": 0.00019906238730969447, "loss": 3.17891001701355, "step": 3120, "token_acc": 0.27801841715957565 }, { "epoch": 1.829668718850777, "grad_norm": 0.5271375500367259, "learning_rate": 0.0001990610627358754, "loss": 3.131997585296631, "step": 3121, "token_acc": 0.28568261826472935 }, { "epoch": 1.830255057167986, "grad_norm": 0.3844341963235111, "learning_rate": 0.00019905973723151116, "loss": 3.1893386840820312, "step": 3122, "token_acc": 0.2754078405948585 }, { "epoch": 1.830841395485195, "grad_norm": 0.4408772790351935, "learning_rate": 0.00019905841079661415, "loss": 3.1531100273132324, "step": 3123, "token_acc": 0.28219660365334487 }, { "epoch": 1.8314277338024039, "grad_norm": 0.5017661646029039, "learning_rate": 0.00019905708343119684, "loss": 3.083510160446167, "step": 3124, "token_acc": 0.29125712797208386 }, { "epoch": 1.832014072119613, "grad_norm": 0.4635979401537257, "learning_rate": 0.00019905575513527173, "loss": 3.1558456420898438, "step": 3125, "token_acc": 0.2812394270892072 }, { "epoch": 1.832600410436822, "grad_norm": 0.4390360060925875, "learning_rate": 0.0001990544259088513, "loss": 3.167863607406616, "step": 3126, "token_acc": 0.28058437367169914 }, { "epoch": 1.8331867487540312, "grad_norm": 0.3825523965730083, "learning_rate": 0.00019905309575194796, "loss": 3.180602550506592, "step": 3127, "token_acc": 0.2799893286089116 }, { "epoch": 1.83377308707124, "grad_norm": 0.3999150554311957, "learning_rate": 0.00019905176466457428, "loss": 3.2018299102783203, "step": 3128, "token_acc": 0.274423176519083 }, { "epoch": 1.834359425388449, "grad_norm": 0.43843222210410726, "learning_rate": 0.00019905043264674277, "loss": 3.1406569480895996, "step": 3129, "token_acc": 0.28449841246243535 }, { "epoch": 1.834945763705658, "grad_norm": 0.4199883691086164, "learning_rate": 0.0001990490996984659, "loss": 3.1223912239074707, "step": 3130, "token_acc": 0.28435085515085823 }, { "epoch": 1.8355321020228672, "grad_norm": 0.49060521693446923, "learning_rate": 0.0001990477658197562, "loss": 3.1625328063964844, "step": 3131, "token_acc": 0.28045417811106366 }, { "epoch": 1.8361184403400763, "grad_norm": 0.5913217811317946, "learning_rate": 0.00019904643101062623, "loss": 3.162557601928711, "step": 3132, "token_acc": 0.28365348013619696 }, { "epoch": 1.8367047786572852, "grad_norm": 0.5151613602192372, "learning_rate": 0.0001990450952710885, "loss": 3.1815497875213623, "step": 3133, "token_acc": 0.2780936978297162 }, { "epoch": 1.8372911169744943, "grad_norm": 0.48828518108244184, "learning_rate": 0.00019904375860115556, "loss": 3.175595760345459, "step": 3134, "token_acc": 0.2802329372923842 }, { "epoch": 1.8378774552917032, "grad_norm": 0.5194582629651129, "learning_rate": 0.00019904242100083995, "loss": 3.173851490020752, "step": 3135, "token_acc": 0.27954982705873693 }, { "epoch": 1.8384637936089123, "grad_norm": 0.47738387383060926, "learning_rate": 0.0001990410824701543, "loss": 3.15293550491333, "step": 3136, "token_acc": 0.28130395461645974 }, { "epoch": 1.8390501319261214, "grad_norm": 0.5504641213053837, "learning_rate": 0.0001990397430091111, "loss": 3.176800012588501, "step": 3137, "token_acc": 0.27962055671257907 }, { "epoch": 1.8396364702433305, "grad_norm": 0.47256662147259404, "learning_rate": 0.000199038402617723, "loss": 3.148754596710205, "step": 3138, "token_acc": 0.28285508560456557 }, { "epoch": 1.8402228085605394, "grad_norm": 0.4510580171307375, "learning_rate": 0.00019903706129600256, "loss": 3.150977373123169, "step": 3139, "token_acc": 0.2831304148795884 }, { "epoch": 1.8408091468777483, "grad_norm": 0.4536714716277695, "learning_rate": 0.00019903571904396236, "loss": 3.131408214569092, "step": 3140, "token_acc": 0.28619942215701516 }, { "epoch": 1.8413954851949574, "grad_norm": 0.40055272148812404, "learning_rate": 0.00019903437586161506, "loss": 3.176074981689453, "step": 3141, "token_acc": 0.2797853212745535 }, { "epoch": 1.8419818235121665, "grad_norm": 0.5316181387254535, "learning_rate": 0.00019903303174897326, "loss": 3.1866860389709473, "step": 3142, "token_acc": 0.2789888101526589 }, { "epoch": 1.8425681618293757, "grad_norm": 0.5494955938967461, "learning_rate": 0.00019903168670604954, "loss": 3.120840072631836, "step": 3143, "token_acc": 0.2872036809020802 }, { "epoch": 1.8431545001465846, "grad_norm": 0.6059995295340069, "learning_rate": 0.00019903034073285659, "loss": 3.1684136390686035, "step": 3144, "token_acc": 0.2804469658263335 }, { "epoch": 1.8437408384637937, "grad_norm": 0.4685524579929522, "learning_rate": 0.00019902899382940703, "loss": 3.1659278869628906, "step": 3145, "token_acc": 0.28042383381337793 }, { "epoch": 1.8443271767810026, "grad_norm": 0.49366014345488596, "learning_rate": 0.0001990276459957135, "loss": 3.148099899291992, "step": 3146, "token_acc": 0.28271852902504424 }, { "epoch": 1.8449135150982117, "grad_norm": 0.4906771288993331, "learning_rate": 0.0001990262972317887, "loss": 3.182332992553711, "step": 3147, "token_acc": 0.27778937507000806 }, { "epoch": 1.8454998534154208, "grad_norm": 0.36889227100532984, "learning_rate": 0.00019902494753764528, "loss": 3.136589527130127, "step": 3148, "token_acc": 0.2857649357607337 }, { "epoch": 1.8460861917326299, "grad_norm": 0.4447326938565774, "learning_rate": 0.0001990235969132959, "loss": 3.1523277759552, "step": 3149, "token_acc": 0.28308221063436245 }, { "epoch": 1.8466725300498388, "grad_norm": 0.4246869726872641, "learning_rate": 0.00019902224535875326, "loss": 3.125051498413086, "step": 3150, "token_acc": 0.2865712395596402 }, { "epoch": 1.8472588683670477, "grad_norm": 0.45784392703608906, "learning_rate": 0.00019902089287403008, "loss": 3.15807843208313, "step": 3151, "token_acc": 0.2820328311221048 }, { "epoch": 1.8478452066842568, "grad_norm": 0.4949707066269936, "learning_rate": 0.00019901953945913902, "loss": 3.156583786010742, "step": 3152, "token_acc": 0.28144236428128333 }, { "epoch": 1.848431545001466, "grad_norm": 0.5047709018593335, "learning_rate": 0.00019901818511409283, "loss": 3.1799654960632324, "step": 3153, "token_acc": 0.27888227779383284 }, { "epoch": 1.849017883318675, "grad_norm": 0.5342647379919129, "learning_rate": 0.0001990168298389042, "loss": 3.160592555999756, "step": 3154, "token_acc": 0.28125910430681283 }, { "epoch": 1.849604221635884, "grad_norm": 0.37399436600903335, "learning_rate": 0.00019901547363358591, "loss": 3.170722007751465, "step": 3155, "token_acc": 0.27877136000662006 }, { "epoch": 1.8501905599530928, "grad_norm": 0.42857212560436597, "learning_rate": 0.00019901411649815066, "loss": 3.1543173789978027, "step": 3156, "token_acc": 0.28016198112987284 }, { "epoch": 1.850776898270302, "grad_norm": 0.3761209757628327, "learning_rate": 0.00019901275843261121, "loss": 3.1635982990264893, "step": 3157, "token_acc": 0.2823472037395881 }, { "epoch": 1.851363236587511, "grad_norm": 0.4259229383619852, "learning_rate": 0.00019901139943698031, "loss": 3.129589557647705, "step": 3158, "token_acc": 0.2839228416389456 }, { "epoch": 1.8519495749047201, "grad_norm": 0.39051938087359483, "learning_rate": 0.00019901003951127073, "loss": 3.174107551574707, "step": 3159, "token_acc": 0.2797626425384234 }, { "epoch": 1.852535913221929, "grad_norm": 0.3915902518792056, "learning_rate": 0.00019900867865549527, "loss": 3.1363000869750977, "step": 3160, "token_acc": 0.28624257911026424 }, { "epoch": 1.8531222515391381, "grad_norm": 0.3785700978596616, "learning_rate": 0.00019900731686966664, "loss": 3.180429458618164, "step": 3161, "token_acc": 0.2794011360533656 }, { "epoch": 1.853708589856347, "grad_norm": 0.38563795576470977, "learning_rate": 0.00019900595415379773, "loss": 3.126704216003418, "step": 3162, "token_acc": 0.28596632939375904 }, { "epoch": 1.8542949281735561, "grad_norm": 0.3914382463597483, "learning_rate": 0.00019900459050790125, "loss": 3.181446075439453, "step": 3163, "token_acc": 0.2805578819718734 }, { "epoch": 1.8548812664907652, "grad_norm": 0.4024508840454432, "learning_rate": 0.0001990032259319901, "loss": 3.2028589248657227, "step": 3164, "token_acc": 0.27596925794587185 }, { "epoch": 1.8554676048079743, "grad_norm": 0.4120602701393418, "learning_rate": 0.00019900186042607701, "loss": 3.168173313140869, "step": 3165, "token_acc": 0.27970881257714814 }, { "epoch": 1.8560539431251832, "grad_norm": 0.4295809551244402, "learning_rate": 0.0001990004939901749, "loss": 3.1263885498046875, "step": 3166, "token_acc": 0.2859093509630666 }, { "epoch": 1.8566402814423921, "grad_norm": 0.4991480125804199, "learning_rate": 0.0001989991266242965, "loss": 3.1439974308013916, "step": 3167, "token_acc": 0.28274302782106886 }, { "epoch": 1.8572266197596012, "grad_norm": 0.4152419616776265, "learning_rate": 0.00019899775832845474, "loss": 3.133056402206421, "step": 3168, "token_acc": 0.285669307442251 }, { "epoch": 1.8578129580768104, "grad_norm": 0.4198853473450349, "learning_rate": 0.0001989963891026624, "loss": 3.138800621032715, "step": 3169, "token_acc": 0.2836669862119605 }, { "epoch": 1.8583992963940195, "grad_norm": 0.3865886311465718, "learning_rate": 0.00019899501894693242, "loss": 3.152841567993164, "step": 3170, "token_acc": 0.2819349054640347 }, { "epoch": 1.8589856347112284, "grad_norm": 0.4376017857952635, "learning_rate": 0.00019899364786127763, "loss": 3.117344379425049, "step": 3171, "token_acc": 0.28789297230690447 }, { "epoch": 1.8595719730284375, "grad_norm": 0.4605694190844209, "learning_rate": 0.0001989922758457109, "loss": 3.159036636352539, "step": 3172, "token_acc": 0.28163598598360523 }, { "epoch": 1.8601583113456464, "grad_norm": 0.42679047445479973, "learning_rate": 0.00019899090290024515, "loss": 3.1550045013427734, "step": 3173, "token_acc": 0.2815179716566147 }, { "epoch": 1.8607446496628555, "grad_norm": 0.38364390450781116, "learning_rate": 0.00019898952902489328, "loss": 3.154336929321289, "step": 3174, "token_acc": 0.28197236661312997 }, { "epoch": 1.8613309879800646, "grad_norm": 0.4613518652409264, "learning_rate": 0.00019898815421966814, "loss": 3.152181386947632, "step": 3175, "token_acc": 0.2814420961136387 }, { "epoch": 1.8619173262972737, "grad_norm": 0.541889744943375, "learning_rate": 0.0001989867784845827, "loss": 3.1032278537750244, "step": 3176, "token_acc": 0.28945720753263016 }, { "epoch": 1.8625036646144826, "grad_norm": 0.4744886296487689, "learning_rate": 0.00019898540181964984, "loss": 3.183441638946533, "step": 3177, "token_acc": 0.27838271030453443 }, { "epoch": 1.8630900029316915, "grad_norm": 0.4113562635132549, "learning_rate": 0.00019898402422488252, "loss": 3.155787467956543, "step": 3178, "token_acc": 0.28137896858683337 }, { "epoch": 1.8636763412489006, "grad_norm": 0.46275175698303483, "learning_rate": 0.0001989826457002937, "loss": 3.1900312900543213, "step": 3179, "token_acc": 0.2780106990330221 }, { "epoch": 1.8642626795661097, "grad_norm": 0.5517062487342601, "learning_rate": 0.00019898126624589625, "loss": 3.137910842895508, "step": 3180, "token_acc": 0.28437317442136334 }, { "epoch": 1.8648490178833188, "grad_norm": 0.5793527723350571, "learning_rate": 0.00019897988586170325, "loss": 3.170560836791992, "step": 3181, "token_acc": 0.280449121261692 }, { "epoch": 1.8654353562005277, "grad_norm": 0.4169418895038681, "learning_rate": 0.00019897850454772756, "loss": 3.175182342529297, "step": 3182, "token_acc": 0.27981192173517366 }, { "epoch": 1.8660216945177366, "grad_norm": 0.470673492609235, "learning_rate": 0.0001989771223039822, "loss": 3.155707359313965, "step": 3183, "token_acc": 0.2796852139664154 }, { "epoch": 1.8666080328349457, "grad_norm": 0.4759097125799998, "learning_rate": 0.00019897573913048015, "loss": 3.1349759101867676, "step": 3184, "token_acc": 0.2847529225552344 }, { "epoch": 1.8671943711521548, "grad_norm": 0.4436084008989719, "learning_rate": 0.0001989743550272344, "loss": 3.1443428993225098, "step": 3185, "token_acc": 0.28360019536849257 }, { "epoch": 1.867780709469364, "grad_norm": 0.4973986451857044, "learning_rate": 0.00019897296999425796, "loss": 3.1864750385284424, "step": 3186, "token_acc": 0.2769893738091889 }, { "epoch": 1.8683670477865728, "grad_norm": 0.67971881169631, "learning_rate": 0.00019897158403156385, "loss": 3.1825883388519287, "step": 3187, "token_acc": 0.27941157717821263 }, { "epoch": 1.868953386103782, "grad_norm": 0.551316890839125, "learning_rate": 0.00019897019713916505, "loss": 3.126471519470215, "step": 3188, "token_acc": 0.28547865170896464 }, { "epoch": 1.8695397244209908, "grad_norm": 0.560415571591795, "learning_rate": 0.0001989688093170746, "loss": 3.1342384815216064, "step": 3189, "token_acc": 0.2844964558750535 }, { "epoch": 1.8701260627382, "grad_norm": 0.37715300556323406, "learning_rate": 0.00019896742056530557, "loss": 3.146027088165283, "step": 3190, "token_acc": 0.2810979523515864 }, { "epoch": 1.870712401055409, "grad_norm": 0.5176268270274003, "learning_rate": 0.00019896603088387097, "loss": 3.1563191413879395, "step": 3191, "token_acc": 0.2839779473681423 }, { "epoch": 1.8712987393726181, "grad_norm": 0.5449484842845632, "learning_rate": 0.00019896464027278386, "loss": 3.1326215267181396, "step": 3192, "token_acc": 0.2843115874577804 }, { "epoch": 1.871885077689827, "grad_norm": 0.4325658013105419, "learning_rate": 0.00019896324873205736, "loss": 3.163142681121826, "step": 3193, "token_acc": 0.280619848060109 }, { "epoch": 1.872471416007036, "grad_norm": 0.4338395486238251, "learning_rate": 0.00019896185626170447, "loss": 3.138472080230713, "step": 3194, "token_acc": 0.2846876903961837 }, { "epoch": 1.873057754324245, "grad_norm": 0.38759165733547385, "learning_rate": 0.00019896046286173826, "loss": 3.1290807723999023, "step": 3195, "token_acc": 0.2846563168577541 }, { "epoch": 1.8736440926414542, "grad_norm": 0.3883007953784523, "learning_rate": 0.00019895906853217187, "loss": 3.130443811416626, "step": 3196, "token_acc": 0.2842502250411282 }, { "epoch": 1.8742304309586633, "grad_norm": 0.40024998634002007, "learning_rate": 0.00019895767327301843, "loss": 3.1582517623901367, "step": 3197, "token_acc": 0.2824491552781664 }, { "epoch": 1.8748167692758722, "grad_norm": 0.33091114363166074, "learning_rate": 0.00019895627708429095, "loss": 3.15684175491333, "step": 3198, "token_acc": 0.28035562250311086 }, { "epoch": 1.875403107593081, "grad_norm": 0.464193434237222, "learning_rate": 0.0001989548799660026, "loss": 3.125330686569214, "step": 3199, "token_acc": 0.28594692320888926 }, { "epoch": 1.8759894459102902, "grad_norm": 0.41848552746109946, "learning_rate": 0.00019895348191816648, "loss": 3.1433699131011963, "step": 3200, "token_acc": 0.285457865335396 }, { "epoch": 1.8765757842274993, "grad_norm": 0.36822204876631076, "learning_rate": 0.00019895208294079574, "loss": 3.1163330078125, "step": 3201, "token_acc": 0.2858785297202118 }, { "epoch": 1.8771621225447084, "grad_norm": 0.42315000973182176, "learning_rate": 0.00019895068303390354, "loss": 3.087249755859375, "step": 3202, "token_acc": 0.29129069798912016 }, { "epoch": 1.8777484608619175, "grad_norm": 0.42522593857781243, "learning_rate": 0.00019894928219750302, "loss": 3.164109230041504, "step": 3203, "token_acc": 0.28085653347620915 }, { "epoch": 1.8783347991791264, "grad_norm": 0.39354903853590534, "learning_rate": 0.0001989478804316073, "loss": 3.155294418334961, "step": 3204, "token_acc": 0.2827194182648332 }, { "epoch": 1.8789211374963353, "grad_norm": 0.4630169258894746, "learning_rate": 0.0001989464777362296, "loss": 3.157960891723633, "step": 3205, "token_acc": 0.2803755176100145 }, { "epoch": 1.8795074758135444, "grad_norm": 0.5026799394237053, "learning_rate": 0.00019894507411138306, "loss": 3.1797566413879395, "step": 3206, "token_acc": 0.2790555533410399 }, { "epoch": 1.8800938141307535, "grad_norm": 0.4373500449653247, "learning_rate": 0.00019894366955708085, "loss": 3.12471866607666, "step": 3207, "token_acc": 0.2846829782460095 }, { "epoch": 1.8806801524479626, "grad_norm": 0.4406153282876968, "learning_rate": 0.00019894226407333624, "loss": 3.1485939025878906, "step": 3208, "token_acc": 0.2831579057943611 }, { "epoch": 1.8812664907651715, "grad_norm": 0.38880043225933736, "learning_rate": 0.0001989408576601624, "loss": 3.144099712371826, "step": 3209, "token_acc": 0.2819289096906153 }, { "epoch": 1.8818528290823804, "grad_norm": 0.40873807140141116, "learning_rate": 0.00019893945031757246, "loss": 3.166224479675293, "step": 3210, "token_acc": 0.280845210137918 }, { "epoch": 1.8824391673995895, "grad_norm": 0.4396707790499638, "learning_rate": 0.00019893804204557976, "loss": 3.1492788791656494, "step": 3211, "token_acc": 0.28203996610648213 }, { "epoch": 1.8830255057167986, "grad_norm": 0.41933922322374717, "learning_rate": 0.00019893663284419746, "loss": 3.169053554534912, "step": 3212, "token_acc": 0.2790902012522845 }, { "epoch": 1.8836118440340077, "grad_norm": 0.5402001093625535, "learning_rate": 0.0001989352227134388, "loss": 3.166184425354004, "step": 3213, "token_acc": 0.2798218323566291 }, { "epoch": 1.8841981823512166, "grad_norm": 0.45991063578681857, "learning_rate": 0.00019893381165331708, "loss": 3.143467903137207, "step": 3214, "token_acc": 0.28353184217909155 }, { "epoch": 1.8847845206684257, "grad_norm": 0.4392756768707656, "learning_rate": 0.00019893239966384547, "loss": 3.1961116790771484, "step": 3215, "token_acc": 0.2780671288642478 }, { "epoch": 1.8853708589856346, "grad_norm": 0.4851611001595174, "learning_rate": 0.0001989309867450373, "loss": 3.1460814476013184, "step": 3216, "token_acc": 0.28068519498493855 }, { "epoch": 1.8859571973028437, "grad_norm": 0.4769388617482181, "learning_rate": 0.0001989295728969058, "loss": 3.171431064605713, "step": 3217, "token_acc": 0.27904988294801036 }, { "epoch": 1.8865435356200528, "grad_norm": 0.4281180237668282, "learning_rate": 0.00019892815811946432, "loss": 3.144864797592163, "step": 3218, "token_acc": 0.28378929745637566 }, { "epoch": 1.887129873937262, "grad_norm": 0.4114349528380137, "learning_rate": 0.00019892674241272604, "loss": 3.1885626316070557, "step": 3219, "token_acc": 0.27822747734415015 }, { "epoch": 1.8877162122544708, "grad_norm": 0.4530915036357786, "learning_rate": 0.00019892532577670436, "loss": 3.157224655151367, "step": 3220, "token_acc": 0.28090873634945396 }, { "epoch": 1.8883025505716797, "grad_norm": 0.41687071293147554, "learning_rate": 0.00019892390821141254, "loss": 3.1666622161865234, "step": 3221, "token_acc": 0.28051364536669016 }, { "epoch": 1.8888888888888888, "grad_norm": 0.47071131866251914, "learning_rate": 0.0001989224897168639, "loss": 3.181093692779541, "step": 3222, "token_acc": 0.27842009522615574 }, { "epoch": 1.889475227206098, "grad_norm": 0.5178138802282173, "learning_rate": 0.00019892107029307175, "loss": 3.178314208984375, "step": 3223, "token_acc": 0.2774907174897517 }, { "epoch": 1.890061565523307, "grad_norm": 0.5301123123952524, "learning_rate": 0.0001989196499400495, "loss": 3.15483021736145, "step": 3224, "token_acc": 0.28167424058735036 }, { "epoch": 1.890647903840516, "grad_norm": 0.38066199442346643, "learning_rate": 0.0001989182286578104, "loss": 3.1507301330566406, "step": 3225, "token_acc": 0.28354695732450613 }, { "epoch": 1.8912342421577248, "grad_norm": 0.5204048495102469, "learning_rate": 0.00019891680644636782, "loss": 3.1960787773132324, "step": 3226, "token_acc": 0.275404504348358 }, { "epoch": 1.891820580474934, "grad_norm": 0.49118000046949734, "learning_rate": 0.00019891538330573516, "loss": 3.191270112991333, "step": 3227, "token_acc": 0.2767167457549299 }, { "epoch": 1.892406918792143, "grad_norm": 0.3899909943241877, "learning_rate": 0.00019891395923592574, "loss": 3.1280264854431152, "step": 3228, "token_acc": 0.286118061584027 }, { "epoch": 1.8929932571093522, "grad_norm": 0.42050702720869215, "learning_rate": 0.00019891253423695297, "loss": 3.1218748092651367, "step": 3229, "token_acc": 0.28625676140866796 }, { "epoch": 1.8935795954265613, "grad_norm": 0.42389573750178067, "learning_rate": 0.00019891110830883024, "loss": 3.086203098297119, "step": 3230, "token_acc": 0.2896606914212548 }, { "epoch": 1.8941659337437702, "grad_norm": 0.3980895946819844, "learning_rate": 0.00019890968145157092, "loss": 3.151057720184326, "step": 3231, "token_acc": 0.28283329047377925 }, { "epoch": 1.894752272060979, "grad_norm": 0.5630381563534455, "learning_rate": 0.0001989082536651884, "loss": 3.1902682781219482, "step": 3232, "token_acc": 0.2777019180107638 }, { "epoch": 1.8953386103781882, "grad_norm": 0.4678747353585678, "learning_rate": 0.0001989068249496962, "loss": 3.1839427947998047, "step": 3233, "token_acc": 0.2783641160949868 }, { "epoch": 1.8959249486953973, "grad_norm": 0.3531644593514653, "learning_rate": 0.00019890539530510756, "loss": 3.1549038887023926, "step": 3234, "token_acc": 0.28208884575617266 }, { "epoch": 1.8965112870126064, "grad_norm": 0.4652094218639067, "learning_rate": 0.0001989039647314361, "loss": 3.1172502040863037, "step": 3235, "token_acc": 0.28850686655926067 }, { "epoch": 1.8970976253298153, "grad_norm": 0.423795459729361, "learning_rate": 0.00019890253322869507, "loss": 3.153761148452759, "step": 3236, "token_acc": 0.2811508508797231 }, { "epoch": 1.8976839636470242, "grad_norm": 0.4079519359854486, "learning_rate": 0.0001989011007968981, "loss": 3.1571176052093506, "step": 3237, "token_acc": 0.28128988572027397 }, { "epoch": 1.8982703019642333, "grad_norm": 0.5022121493751411, "learning_rate": 0.0001988996674360585, "loss": 3.1455979347229004, "step": 3238, "token_acc": 0.2820751399745747 }, { "epoch": 1.8988566402814424, "grad_norm": 0.44162989100445377, "learning_rate": 0.0001988982331461898, "loss": 3.111053466796875, "step": 3239, "token_acc": 0.28854914518878155 }, { "epoch": 1.8994429785986515, "grad_norm": 0.4108162647140152, "learning_rate": 0.0001988967979273055, "loss": 3.148909091949463, "step": 3240, "token_acc": 0.28336517697199404 }, { "epoch": 1.9000293169158604, "grad_norm": 0.46593593405901823, "learning_rate": 0.00019889536177941902, "loss": 3.1266627311706543, "step": 3241, "token_acc": 0.2853437335726309 }, { "epoch": 1.9006156552330695, "grad_norm": 0.4885165951808546, "learning_rate": 0.0001988939247025439, "loss": 3.1374144554138184, "step": 3242, "token_acc": 0.28303717388126154 }, { "epoch": 1.9012019935502784, "grad_norm": 0.4270875478585992, "learning_rate": 0.0001988924866966936, "loss": 3.158627986907959, "step": 3243, "token_acc": 0.280642790182376 }, { "epoch": 1.9017883318674875, "grad_norm": 0.378508542696163, "learning_rate": 0.00019889104776188167, "loss": 3.1566808223724365, "step": 3244, "token_acc": 0.2791309951210531 }, { "epoch": 1.9023746701846966, "grad_norm": 0.4332333932746044, "learning_rate": 0.00019888960789812157, "loss": 3.181943893432617, "step": 3245, "token_acc": 0.2781220370245639 }, { "epoch": 1.9029610085019057, "grad_norm": 0.3622081683512169, "learning_rate": 0.0001988881671054269, "loss": 3.1232476234436035, "step": 3246, "token_acc": 0.2884079992922822 }, { "epoch": 1.9035473468191146, "grad_norm": 0.4354392331831683, "learning_rate": 0.0001988867253838111, "loss": 3.1134345531463623, "step": 3247, "token_acc": 0.2862761521054132 }, { "epoch": 1.9041336851363235, "grad_norm": 0.4529090136982653, "learning_rate": 0.00019888528273328778, "loss": 3.1472015380859375, "step": 3248, "token_acc": 0.2817212417093578 }, { "epoch": 1.9047200234535326, "grad_norm": 0.3402119759869073, "learning_rate": 0.0001988838391538705, "loss": 3.17484712600708, "step": 3249, "token_acc": 0.2786861618207602 }, { "epoch": 1.9053063617707418, "grad_norm": 0.5259127551499162, "learning_rate": 0.0001988823946455728, "loss": 3.1805531978607178, "step": 3250, "token_acc": 0.27801108337828806 }, { "epoch": 1.9058927000879509, "grad_norm": 0.6804863844074353, "learning_rate": 0.00019888094920840826, "loss": 3.110687732696533, "step": 3251, "token_acc": 0.28825601101578224 }, { "epoch": 1.9064790384051598, "grad_norm": 0.5303752984573485, "learning_rate": 0.00019887950284239043, "loss": 3.1947684288024902, "step": 3252, "token_acc": 0.27786302676530206 }, { "epoch": 1.9070653767223686, "grad_norm": 0.6977649656088845, "learning_rate": 0.00019887805554753293, "loss": 3.1728124618530273, "step": 3253, "token_acc": 0.2783591859200066 }, { "epoch": 1.9076517150395778, "grad_norm": 0.565955665320739, "learning_rate": 0.0001988766073238493, "loss": 3.152412176132202, "step": 3254, "token_acc": 0.2821726788637258 }, { "epoch": 1.9082380533567869, "grad_norm": 0.429956343896276, "learning_rate": 0.00019887515817135318, "loss": 3.138113498687744, "step": 3255, "token_acc": 0.2829673441137608 }, { "epoch": 1.908824391673996, "grad_norm": 0.4172307717284425, "learning_rate": 0.00019887370809005823, "loss": 3.1706557273864746, "step": 3256, "token_acc": 0.28047691039032435 }, { "epoch": 1.909410729991205, "grad_norm": 0.4403381961855377, "learning_rate": 0.000198872257079978, "loss": 3.145526885986328, "step": 3257, "token_acc": 0.2823988848335231 }, { "epoch": 1.909997068308414, "grad_norm": 0.45722631012763915, "learning_rate": 0.00019887080514112616, "loss": 3.1019086837768555, "step": 3258, "token_acc": 0.28945744064694895 }, { "epoch": 1.9105834066256229, "grad_norm": 0.43816366204106433, "learning_rate": 0.00019886935227351628, "loss": 3.133572816848755, "step": 3259, "token_acc": 0.2841028609526861 }, { "epoch": 1.911169744942832, "grad_norm": 0.4360792024039627, "learning_rate": 0.00019886789847716214, "loss": 3.0969831943511963, "step": 3260, "token_acc": 0.28956911514858685 }, { "epoch": 1.911756083260041, "grad_norm": 0.5317691126725539, "learning_rate": 0.00019886644375207728, "loss": 3.141867160797119, "step": 3261, "token_acc": 0.28271220459342566 }, { "epoch": 1.9123424215772502, "grad_norm": 0.5305958061096548, "learning_rate": 0.0001988649880982754, "loss": 3.178436279296875, "step": 3262, "token_acc": 0.2790539063069442 }, { "epoch": 1.912928759894459, "grad_norm": 0.4118645617158987, "learning_rate": 0.0001988635315157702, "loss": 3.121683120727539, "step": 3263, "token_acc": 0.28587478432998514 }, { "epoch": 1.913515098211668, "grad_norm": 0.4905472482001144, "learning_rate": 0.00019886207400457532, "loss": 3.1227633953094482, "step": 3264, "token_acc": 0.28750758181006864 }, { "epoch": 1.914101436528877, "grad_norm": 0.5474341061494424, "learning_rate": 0.0001988606155647045, "loss": 3.111152172088623, "step": 3265, "token_acc": 0.28842247058114895 }, { "epoch": 1.9146877748460862, "grad_norm": 0.5576072595580398, "learning_rate": 0.0001988591561961714, "loss": 3.160722494125366, "step": 3266, "token_acc": 0.2810477272490385 }, { "epoch": 1.9152741131632953, "grad_norm": 0.4483321811592267, "learning_rate": 0.00019885769589898976, "loss": 3.158362865447998, "step": 3267, "token_acc": 0.28155793510752297 }, { "epoch": 1.9158604514805042, "grad_norm": 0.5518105917816112, "learning_rate": 0.00019885623467317325, "loss": 3.152453899383545, "step": 3268, "token_acc": 0.28189682214766887 }, { "epoch": 1.9164467897977133, "grad_norm": 0.46328509549078806, "learning_rate": 0.00019885477251873563, "loss": 3.1459293365478516, "step": 3269, "token_acc": 0.2838478452183762 }, { "epoch": 1.9170331281149222, "grad_norm": 0.4901014470659852, "learning_rate": 0.00019885330943569061, "loss": 3.1369147300720215, "step": 3270, "token_acc": 0.2839331734899286 }, { "epoch": 1.9176194664321313, "grad_norm": 0.46140819061057525, "learning_rate": 0.000198851845424052, "loss": 3.1623873710632324, "step": 3271, "token_acc": 0.28042558065675 }, { "epoch": 1.9182058047493404, "grad_norm": 0.4297237070282822, "learning_rate": 0.0001988503804838335, "loss": 3.136974334716797, "step": 3272, "token_acc": 0.2852420885107425 }, { "epoch": 1.9187921430665495, "grad_norm": 0.43603662214846406, "learning_rate": 0.00019884891461504888, "loss": 3.114511489868164, "step": 3273, "token_acc": 0.28738246610019147 }, { "epoch": 1.9193784813837584, "grad_norm": 0.40496148678850025, "learning_rate": 0.00019884744781771188, "loss": 3.106574058532715, "step": 3274, "token_acc": 0.2887540598216222 }, { "epoch": 1.9199648197009673, "grad_norm": 0.5032773562536458, "learning_rate": 0.00019884598009183631, "loss": 3.1133415699005127, "step": 3275, "token_acc": 0.28727047766969843 }, { "epoch": 1.9205511580181764, "grad_norm": 0.42612099397598424, "learning_rate": 0.000198844511437436, "loss": 3.171154737472534, "step": 3276, "token_acc": 0.2806739911535418 }, { "epoch": 1.9211374963353856, "grad_norm": 0.40530336978197173, "learning_rate": 0.00019884304185452463, "loss": 3.1084368228912354, "step": 3277, "token_acc": 0.28794486958531595 }, { "epoch": 1.9217238346525947, "grad_norm": 0.5002513987649199, "learning_rate": 0.0001988415713431161, "loss": 3.146696090698242, "step": 3278, "token_acc": 0.28150187394212944 }, { "epoch": 1.9223101729698036, "grad_norm": 0.49568001781253934, "learning_rate": 0.00019884009990322424, "loss": 3.1555867195129395, "step": 3279, "token_acc": 0.2810712071786429 }, { "epoch": 1.9228965112870124, "grad_norm": 0.41848514741737625, "learning_rate": 0.0001988386275348628, "loss": 3.1539266109466553, "step": 3280, "token_acc": 0.283250924972562 }, { "epoch": 1.9234828496042216, "grad_norm": 0.3924025283135276, "learning_rate": 0.00019883715423804561, "loss": 3.142360210418701, "step": 3281, "token_acc": 0.2814026341496278 }, { "epoch": 1.9240691879214307, "grad_norm": 0.49662023762200874, "learning_rate": 0.00019883568001278658, "loss": 3.1667699813842773, "step": 3282, "token_acc": 0.2814986148839461 }, { "epoch": 1.9246555262386398, "grad_norm": 0.47243956565238826, "learning_rate": 0.00019883420485909952, "loss": 3.130139112472534, "step": 3283, "token_acc": 0.2833863507219747 }, { "epoch": 1.9252418645558487, "grad_norm": 0.4353539007460067, "learning_rate": 0.00019883272877699825, "loss": 3.1643974781036377, "step": 3284, "token_acc": 0.27937373221816764 }, { "epoch": 1.9258282028730578, "grad_norm": 0.4536369818582669, "learning_rate": 0.0001988312517664967, "loss": 3.161803722381592, "step": 3285, "token_acc": 0.2811986579964589 }, { "epoch": 1.9264145411902667, "grad_norm": 0.4128846601990197, "learning_rate": 0.0001988297738276087, "loss": 3.1450726985931396, "step": 3286, "token_acc": 0.2837359733159987 }, { "epoch": 1.9270008795074758, "grad_norm": 0.4858051560011724, "learning_rate": 0.00019882829496034815, "loss": 3.1175756454467773, "step": 3287, "token_acc": 0.2867556918262933 }, { "epoch": 1.927587217824685, "grad_norm": 0.39386646450620316, "learning_rate": 0.00019882681516472897, "loss": 3.1193647384643555, "step": 3288, "token_acc": 0.2862091812102598 }, { "epoch": 1.928173556141894, "grad_norm": 0.44727425742997867, "learning_rate": 0.00019882533444076503, "loss": 3.0887303352355957, "step": 3289, "token_acc": 0.29138277747966024 }, { "epoch": 1.928759894459103, "grad_norm": 0.4319031486000401, "learning_rate": 0.00019882385278847023, "loss": 3.186368942260742, "step": 3290, "token_acc": 0.2747711554109032 }, { "epoch": 1.9293462327763118, "grad_norm": 0.45965988926961476, "learning_rate": 0.00019882237020785848, "loss": 3.165771961212158, "step": 3291, "token_acc": 0.28079758001439975 }, { "epoch": 1.929932571093521, "grad_norm": 0.47732279940864375, "learning_rate": 0.00019882088669894373, "loss": 3.1610002517700195, "step": 3292, "token_acc": 0.28118670383106836 }, { "epoch": 1.93051890941073, "grad_norm": 0.5632949973091116, "learning_rate": 0.00019881940226173993, "loss": 3.1582798957824707, "step": 3293, "token_acc": 0.28076239765178435 }, { "epoch": 1.9311052477279391, "grad_norm": 0.6016462719645413, "learning_rate": 0.00019881791689626101, "loss": 3.18418288230896, "step": 3294, "token_acc": 0.27669563157498034 }, { "epoch": 1.931691586045148, "grad_norm": 0.5180000945548677, "learning_rate": 0.0001988164306025209, "loss": 3.158323287963867, "step": 3295, "token_acc": 0.2817328122831182 }, { "epoch": 1.9322779243623571, "grad_norm": 0.6408493537146691, "learning_rate": 0.00019881494338053361, "loss": 3.1660799980163574, "step": 3296, "token_acc": 0.28048748919250543 }, { "epoch": 1.932864262679566, "grad_norm": 0.5210578365408927, "learning_rate": 0.00019881345523031305, "loss": 3.147230386734009, "step": 3297, "token_acc": 0.28441532785528556 }, { "epoch": 1.9334506009967751, "grad_norm": 0.42222538281236116, "learning_rate": 0.00019881196615187326, "loss": 3.145153045654297, "step": 3298, "token_acc": 0.2823341970077519 }, { "epoch": 1.9340369393139842, "grad_norm": 0.4735551531552477, "learning_rate": 0.00019881047614522817, "loss": 3.1301538944244385, "step": 3299, "token_acc": 0.2849992345711411 }, { "epoch": 1.9346232776311933, "grad_norm": 0.5227444240607516, "learning_rate": 0.00019880898521039184, "loss": 3.1397571563720703, "step": 3300, "token_acc": 0.28414099763452816 }, { "epoch": 1.9352096159484022, "grad_norm": 0.3761506949759527, "learning_rate": 0.0001988074933473782, "loss": 3.170996904373169, "step": 3301, "token_acc": 0.28052839287370296 }, { "epoch": 1.9357959542656111, "grad_norm": 0.41975710479807016, "learning_rate": 0.00019880600055620135, "loss": 3.1350131034851074, "step": 3302, "token_acc": 0.28523785384594863 }, { "epoch": 1.9363822925828202, "grad_norm": 0.3863424758899897, "learning_rate": 0.00019880450683687525, "loss": 3.1554925441741943, "step": 3303, "token_acc": 0.28064895879180607 }, { "epoch": 1.9369686309000294, "grad_norm": 0.415419385074687, "learning_rate": 0.00019880301218941392, "loss": 3.1456151008605957, "step": 3304, "token_acc": 0.284612005124684 }, { "epoch": 1.9375549692172385, "grad_norm": 0.396386282676236, "learning_rate": 0.00019880151661383146, "loss": 3.145545721054077, "step": 3305, "token_acc": 0.28296389916830456 }, { "epoch": 1.9381413075344474, "grad_norm": 0.34068428027016884, "learning_rate": 0.00019880002011014186, "loss": 3.1382217407226562, "step": 3306, "token_acc": 0.28485461297363274 }, { "epoch": 1.9387276458516562, "grad_norm": 0.3733837201393907, "learning_rate": 0.00019879852267835926, "loss": 3.1436362266540527, "step": 3307, "token_acc": 0.2833681594257234 }, { "epoch": 1.9393139841688654, "grad_norm": 0.4271428322944052, "learning_rate": 0.00019879702431849762, "loss": 3.169227123260498, "step": 3308, "token_acc": 0.27909203036542757 }, { "epoch": 1.9399003224860745, "grad_norm": 0.3735020631025754, "learning_rate": 0.00019879552503057109, "loss": 3.181687831878662, "step": 3309, "token_acc": 0.27802120971142197 }, { "epoch": 1.9404866608032836, "grad_norm": 0.3493616037545797, "learning_rate": 0.00019879402481459373, "loss": 3.1482725143432617, "step": 3310, "token_acc": 0.28244964462710653 }, { "epoch": 1.9410729991204925, "grad_norm": 0.3638311640730497, "learning_rate": 0.0001987925236705796, "loss": 3.1121773719787598, "step": 3311, "token_acc": 0.28694095177462997 }, { "epoch": 1.9416593374377016, "grad_norm": 0.4579330539231343, "learning_rate": 0.0001987910215985429, "loss": 3.148362398147583, "step": 3312, "token_acc": 0.28170394605061094 }, { "epoch": 1.9422456757549105, "grad_norm": 0.5126014948678351, "learning_rate": 0.00019878951859849764, "loss": 3.1898064613342285, "step": 3313, "token_acc": 0.27752203929105806 }, { "epoch": 1.9428320140721196, "grad_norm": 0.4775509119943933, "learning_rate": 0.00019878801467045794, "loss": 3.183704376220703, "step": 3314, "token_acc": 0.2772992384687874 }, { "epoch": 1.9434183523893287, "grad_norm": 0.3792946399382829, "learning_rate": 0.000198786509814438, "loss": 3.169325828552246, "step": 3315, "token_acc": 0.28031376746132874 }, { "epoch": 1.9440046907065378, "grad_norm": 0.5277176341625125, "learning_rate": 0.00019878500403045193, "loss": 3.1394429206848145, "step": 3316, "token_acc": 0.2827082241745049 }, { "epoch": 1.9445910290237467, "grad_norm": 0.48063624585973536, "learning_rate": 0.00019878349731851383, "loss": 3.1598827838897705, "step": 3317, "token_acc": 0.2785116999942403 }, { "epoch": 1.9451773673409556, "grad_norm": 0.39340284887613947, "learning_rate": 0.0001987819896786379, "loss": 3.1648051738739014, "step": 3318, "token_acc": 0.2801377908588172 }, { "epoch": 1.9457637056581647, "grad_norm": 0.5167857954480706, "learning_rate": 0.00019878048111083828, "loss": 3.1408488750457764, "step": 3319, "token_acc": 0.2840455056745914 }, { "epoch": 1.9463500439753738, "grad_norm": 0.43064435349633695, "learning_rate": 0.00019877897161512916, "loss": 3.149380922317505, "step": 3320, "token_acc": 0.28209120356176437 }, { "epoch": 1.946936382292583, "grad_norm": 0.39222523034198636, "learning_rate": 0.0001987774611915247, "loss": 3.087924003601074, "step": 3321, "token_acc": 0.29100672121566334 }, { "epoch": 1.9475227206097918, "grad_norm": 0.47295727228284346, "learning_rate": 0.00019877594984003908, "loss": 3.149669885635376, "step": 3322, "token_acc": 0.2809865064491827 }, { "epoch": 1.948109058927001, "grad_norm": 0.32591262465000376, "learning_rate": 0.00019877443756068655, "loss": 3.1068527698516846, "step": 3323, "token_acc": 0.2885880770954729 }, { "epoch": 1.9486953972442098, "grad_norm": 0.4387291187794354, "learning_rate": 0.00019877292435348124, "loss": 3.195404529571533, "step": 3324, "token_acc": 0.27587786983245816 }, { "epoch": 1.949281735561419, "grad_norm": 0.4497504094418033, "learning_rate": 0.00019877141021843744, "loss": 3.0984115600585938, "step": 3325, "token_acc": 0.29035156379190596 }, { "epoch": 1.949868073878628, "grad_norm": 0.49371039634781844, "learning_rate": 0.0001987698951555693, "loss": 3.1666388511657715, "step": 3326, "token_acc": 0.27935762843245027 }, { "epoch": 1.9504544121958372, "grad_norm": 0.4759427493389348, "learning_rate": 0.0001987683791648911, "loss": 3.1158556938171387, "step": 3327, "token_acc": 0.2856056083554776 }, { "epoch": 1.951040750513046, "grad_norm": 0.41721154036558145, "learning_rate": 0.0001987668622464171, "loss": 3.1539034843444824, "step": 3328, "token_acc": 0.28220472042139383 }, { "epoch": 1.951627088830255, "grad_norm": 0.4375019697132133, "learning_rate": 0.0001987653444001615, "loss": 3.151810646057129, "step": 3329, "token_acc": 0.28326284115060074 }, { "epoch": 1.952213427147464, "grad_norm": 0.4656794834872127, "learning_rate": 0.00019876382562613855, "loss": 3.1609416007995605, "step": 3330, "token_acc": 0.2799758578221472 }, { "epoch": 1.9527997654646732, "grad_norm": 0.43300670870512536, "learning_rate": 0.0001987623059243626, "loss": 3.1560091972351074, "step": 3331, "token_acc": 0.2809870713534952 }, { "epoch": 1.9533861037818823, "grad_norm": 0.40685596219884385, "learning_rate": 0.00019876078529484784, "loss": 3.098418712615967, "step": 3332, "token_acc": 0.28920361631235375 }, { "epoch": 1.9539724420990912, "grad_norm": 0.4838990784783711, "learning_rate": 0.00019875926373760856, "loss": 3.1322765350341797, "step": 3333, "token_acc": 0.28403591826440594 }, { "epoch": 1.9545587804163, "grad_norm": 0.41922430021704327, "learning_rate": 0.00019875774125265911, "loss": 3.144613742828369, "step": 3334, "token_acc": 0.28423512758693553 }, { "epoch": 1.9551451187335092, "grad_norm": 0.4577425714268011, "learning_rate": 0.00019875621784001376, "loss": 3.1639633178710938, "step": 3335, "token_acc": 0.2799635240142184 }, { "epoch": 1.9557314570507183, "grad_norm": 0.4800700659895863, "learning_rate": 0.0001987546934996868, "loss": 3.1706573963165283, "step": 3336, "token_acc": 0.27969955598776725 }, { "epoch": 1.9563177953679274, "grad_norm": 0.4314120027220838, "learning_rate": 0.00019875316823169257, "loss": 3.170544147491455, "step": 3337, "token_acc": 0.2772709211255529 }, { "epoch": 1.9569041336851363, "grad_norm": 0.4620926943256627, "learning_rate": 0.0001987516420360454, "loss": 3.1554579734802246, "step": 3338, "token_acc": 0.2813799386623303 }, { "epoch": 1.9574904720023454, "grad_norm": 0.5480097916324622, "learning_rate": 0.00019875011491275963, "loss": 3.132462978363037, "step": 3339, "token_acc": 0.28333226190093536 }, { "epoch": 1.9580768103195543, "grad_norm": 0.5265670614061894, "learning_rate": 0.0001987485868618496, "loss": 3.1695430278778076, "step": 3340, "token_acc": 0.27784372034166405 }, { "epoch": 1.9586631486367634, "grad_norm": 0.469388764590352, "learning_rate": 0.00019874705788332965, "loss": 3.171898365020752, "step": 3341, "token_acc": 0.2783012653291384 }, { "epoch": 1.9592494869539725, "grad_norm": 0.5154525378605237, "learning_rate": 0.00019874552797721415, "loss": 3.185338020324707, "step": 3342, "token_acc": 0.2791533016992286 }, { "epoch": 1.9598358252711816, "grad_norm": 0.48672872858945443, "learning_rate": 0.00019874399714351747, "loss": 3.162562608718872, "step": 3343, "token_acc": 0.2797225679097112 }, { "epoch": 1.9604221635883905, "grad_norm": 0.563296153780986, "learning_rate": 0.00019874246538225402, "loss": 3.175508737564087, "step": 3344, "token_acc": 0.27854774120885994 }, { "epoch": 1.9610085019055994, "grad_norm": 0.4600294746050792, "learning_rate": 0.00019874093269343817, "loss": 3.1723577976226807, "step": 3345, "token_acc": 0.28015407295684436 }, { "epoch": 1.9615948402228085, "grad_norm": 0.41544442130913456, "learning_rate": 0.0001987393990770843, "loss": 3.1486682891845703, "step": 3346, "token_acc": 0.2807905904542359 }, { "epoch": 1.9621811785400176, "grad_norm": 0.4325196323458287, "learning_rate": 0.00019873786453320682, "loss": 3.15328049659729, "step": 3347, "token_acc": 0.2827602383569081 }, { "epoch": 1.9627675168572267, "grad_norm": 0.4361981691380038, "learning_rate": 0.00019873632906182017, "loss": 3.1095821857452393, "step": 3348, "token_acc": 0.2869903838512278 }, { "epoch": 1.9633538551744356, "grad_norm": 0.39307651954305384, "learning_rate": 0.00019873479266293873, "loss": 3.1318202018737793, "step": 3349, "token_acc": 0.28561577858605425 }, { "epoch": 1.9639401934916447, "grad_norm": 0.38923621504452854, "learning_rate": 0.00019873325533657698, "loss": 3.1935160160064697, "step": 3350, "token_acc": 0.2759723291360192 }, { "epoch": 1.9645265318088536, "grad_norm": 0.4452045939892435, "learning_rate": 0.00019873171708274936, "loss": 3.127716541290283, "step": 3351, "token_acc": 0.28447302913317724 }, { "epoch": 1.9651128701260627, "grad_norm": 0.41973476067702986, "learning_rate": 0.00019873017790147026, "loss": 3.152920722961426, "step": 3352, "token_acc": 0.28289961935313973 }, { "epoch": 1.9656992084432718, "grad_norm": 0.43602126808209046, "learning_rate": 0.0001987286377927542, "loss": 3.1512486934661865, "step": 3353, "token_acc": 0.2825903589051495 }, { "epoch": 1.966285546760481, "grad_norm": 0.4422121562419161, "learning_rate": 0.00019872709675661563, "loss": 3.161752700805664, "step": 3354, "token_acc": 0.2802415432622347 }, { "epoch": 1.9668718850776898, "grad_norm": 0.48987517201512987, "learning_rate": 0.000198725554793069, "loss": 3.0940101146698, "step": 3355, "token_acc": 0.28962415129118946 }, { "epoch": 1.9674582233948987, "grad_norm": 0.457699278304656, "learning_rate": 0.00019872401190212884, "loss": 3.1350083351135254, "step": 3356, "token_acc": 0.28572879145779667 }, { "epoch": 1.9680445617121078, "grad_norm": 0.40411359416622067, "learning_rate": 0.0001987224680838096, "loss": 3.1508283615112305, "step": 3357, "token_acc": 0.2820381212072607 }, { "epoch": 1.968630900029317, "grad_norm": 0.5006961450935864, "learning_rate": 0.00019872092333812584, "loss": 3.1286706924438477, "step": 3358, "token_acc": 0.2856202527709176 }, { "epoch": 1.969217238346526, "grad_norm": 0.3607412408312874, "learning_rate": 0.00019871937766509202, "loss": 3.0948667526245117, "step": 3359, "token_acc": 0.2901008152145106 }, { "epoch": 1.969803576663735, "grad_norm": 0.4733421343166111, "learning_rate": 0.00019871783106472263, "loss": 3.1291213035583496, "step": 3360, "token_acc": 0.28497296286484547 }, { "epoch": 1.9703899149809438, "grad_norm": 0.4503017132922778, "learning_rate": 0.0001987162835370323, "loss": 3.182110071182251, "step": 3361, "token_acc": 0.2754765786435286 }, { "epoch": 1.970976253298153, "grad_norm": 0.41805652952339045, "learning_rate": 0.00019871473508203548, "loss": 3.175023317337036, "step": 3362, "token_acc": 0.27923016517375854 }, { "epoch": 1.971562591615362, "grad_norm": 0.39126822844907544, "learning_rate": 0.00019871318569974675, "loss": 3.1669154167175293, "step": 3363, "token_acc": 0.28012119405439856 }, { "epoch": 1.9721489299325712, "grad_norm": 0.41518041785061105, "learning_rate": 0.00019871163539018064, "loss": 3.149796962738037, "step": 3364, "token_acc": 0.2815502463349828 }, { "epoch": 1.97273526824978, "grad_norm": 0.3815359428472301, "learning_rate": 0.00019871008415335174, "loss": 3.1111063957214355, "step": 3365, "token_acc": 0.2875617283950617 }, { "epoch": 1.9733216065669892, "grad_norm": 0.3772803270741792, "learning_rate": 0.00019870853198927465, "loss": 3.130253791809082, "step": 3366, "token_acc": 0.2863953610411419 }, { "epoch": 1.973907944884198, "grad_norm": 0.4216104835740131, "learning_rate": 0.00019870697889796385, "loss": 3.1638271808624268, "step": 3367, "token_acc": 0.27893247800859944 }, { "epoch": 1.9744942832014072, "grad_norm": 0.42528007468966156, "learning_rate": 0.00019870542487943405, "loss": 3.1918272972106934, "step": 3368, "token_acc": 0.27538355524225167 }, { "epoch": 1.9750806215186163, "grad_norm": 0.40591839088801823, "learning_rate": 0.00019870386993369973, "loss": 3.091796875, "step": 3369, "token_acc": 0.2925289121409811 }, { "epoch": 1.9756669598358254, "grad_norm": 0.41318265937469273, "learning_rate": 0.0001987023140607756, "loss": 3.1552672386169434, "step": 3370, "token_acc": 0.2815559777337886 }, { "epoch": 1.9762532981530343, "grad_norm": 0.42682423412482684, "learning_rate": 0.00019870075726067624, "loss": 3.16373348236084, "step": 3371, "token_acc": 0.279953922833294 }, { "epoch": 1.9768396364702432, "grad_norm": 0.37378410161823516, "learning_rate": 0.00019869919953341625, "loss": 3.1154117584228516, "step": 3372, "token_acc": 0.28725539049979126 }, { "epoch": 1.9774259747874523, "grad_norm": 0.4600561649469349, "learning_rate": 0.00019869764087901027, "loss": 3.170482635498047, "step": 3373, "token_acc": 0.27974898297973255 }, { "epoch": 1.9780123131046614, "grad_norm": 0.4685646491125004, "learning_rate": 0.00019869608129747298, "loss": 3.1628518104553223, "step": 3374, "token_acc": 0.27904471748623205 }, { "epoch": 1.9785986514218705, "grad_norm": 0.43323598555098025, "learning_rate": 0.000198694520788819, "loss": 3.1561319828033447, "step": 3375, "token_acc": 0.2815776272251415 }, { "epoch": 1.9791849897390794, "grad_norm": 0.41106599945008765, "learning_rate": 0.00019869295935306296, "loss": 3.1661226749420166, "step": 3376, "token_acc": 0.27984305748059296 }, { "epoch": 1.9797713280562885, "grad_norm": 0.43136435540252194, "learning_rate": 0.00019869139699021956, "loss": 3.1784708499908447, "step": 3377, "token_acc": 0.277418543987401 }, { "epoch": 1.9803576663734974, "grad_norm": 0.3785295059250903, "learning_rate": 0.00019868983370030348, "loss": 3.1546106338500977, "step": 3378, "token_acc": 0.28043269355634626 }, { "epoch": 1.9809440046907065, "grad_norm": 0.38796815634361265, "learning_rate": 0.0001986882694833294, "loss": 3.1565778255462646, "step": 3379, "token_acc": 0.2815353324425814 }, { "epoch": 1.9815303430079156, "grad_norm": 0.46409959664125106, "learning_rate": 0.000198686704339312, "loss": 3.139758586883545, "step": 3380, "token_acc": 0.2824740250121675 }, { "epoch": 1.9821166813251248, "grad_norm": 0.5320599989592694, "learning_rate": 0.000198685138268266, "loss": 3.126986026763916, "step": 3381, "token_acc": 0.2844456209191035 }, { "epoch": 1.9827030196423336, "grad_norm": 0.4424768302535164, "learning_rate": 0.00019868357127020612, "loss": 3.149501323699951, "step": 3382, "token_acc": 0.28110199385533485 }, { "epoch": 1.9832893579595425, "grad_norm": 0.4068907625625445, "learning_rate": 0.00019868200334514707, "loss": 3.1123039722442627, "step": 3383, "token_acc": 0.28754924014405053 }, { "epoch": 1.9838756962767516, "grad_norm": 0.41291295574925646, "learning_rate": 0.00019868043449310357, "loss": 3.161336660385132, "step": 3384, "token_acc": 0.2824079239985364 }, { "epoch": 1.9844620345939608, "grad_norm": 0.5100690021328849, "learning_rate": 0.00019867886471409033, "loss": 3.113718032836914, "step": 3385, "token_acc": 0.28607224676812304 }, { "epoch": 1.9850483729111699, "grad_norm": 0.4253595109390804, "learning_rate": 0.00019867729400812215, "loss": 3.158912420272827, "step": 3386, "token_acc": 0.28058674159910285 }, { "epoch": 1.9856347112283788, "grad_norm": 0.47414792190475336, "learning_rate": 0.00019867572237521376, "loss": 3.140915870666504, "step": 3387, "token_acc": 0.2838225022814289 }, { "epoch": 1.9862210495455876, "grad_norm": 0.5041684332202685, "learning_rate": 0.00019867414981537994, "loss": 3.1242451667785645, "step": 3388, "token_acc": 0.2864778810267547 }, { "epoch": 1.9868073878627968, "grad_norm": 0.31775098694333365, "learning_rate": 0.00019867257632863545, "loss": 3.145717144012451, "step": 3389, "token_acc": 0.28361240805884236 }, { "epoch": 1.9873937261800059, "grad_norm": 0.4548406079373691, "learning_rate": 0.00019867100191499505, "loss": 3.1325159072875977, "step": 3390, "token_acc": 0.28403392041748204 }, { "epoch": 1.987980064497215, "grad_norm": 0.4459615977449683, "learning_rate": 0.00019866942657447356, "loss": 3.1439404487609863, "step": 3391, "token_acc": 0.28202944397128604 }, { "epoch": 1.9885664028144239, "grad_norm": 0.3444542004327783, "learning_rate": 0.00019866785030708576, "loss": 3.1276817321777344, "step": 3392, "token_acc": 0.2840370166432796 }, { "epoch": 1.989152741131633, "grad_norm": 0.4310743480927085, "learning_rate": 0.00019866627311284645, "loss": 3.1900343894958496, "step": 3393, "token_acc": 0.2770741904949914 }, { "epoch": 1.9897390794488419, "grad_norm": 0.3660813583455183, "learning_rate": 0.0001986646949917705, "loss": 3.1720938682556152, "step": 3394, "token_acc": 0.27853221239349224 }, { "epoch": 1.990325417766051, "grad_norm": 0.4419095801887838, "learning_rate": 0.00019866311594387263, "loss": 3.148575782775879, "step": 3395, "token_acc": 0.2823388978685363 }, { "epoch": 1.99091175608326, "grad_norm": 0.3771321482302679, "learning_rate": 0.0001986615359691678, "loss": 3.161640167236328, "step": 3396, "token_acc": 0.28125916460681216 }, { "epoch": 1.9914980944004692, "grad_norm": 0.3976610643672019, "learning_rate": 0.00019865995506767074, "loss": 3.1200175285339355, "step": 3397, "token_acc": 0.2871431249804702 }, { "epoch": 1.992084432717678, "grad_norm": 0.31356070794172497, "learning_rate": 0.0001986583732393964, "loss": 3.149045944213867, "step": 3398, "token_acc": 0.2845456971942661 }, { "epoch": 1.992670771034887, "grad_norm": 0.3957748313816249, "learning_rate": 0.00019865679048435952, "loss": 3.183539390563965, "step": 3399, "token_acc": 0.27813685026502694 }, { "epoch": 1.993257109352096, "grad_norm": 0.4635986896605718, "learning_rate": 0.00019865520680257507, "loss": 3.182753562927246, "step": 3400, "token_acc": 0.2778512426403186 }, { "epoch": 1.9938434476693052, "grad_norm": 0.36538791459746844, "learning_rate": 0.0001986536221940579, "loss": 3.1634182929992676, "step": 3401, "token_acc": 0.27964701758102634 }, { "epoch": 1.9944297859865143, "grad_norm": 0.45411599852976825, "learning_rate": 0.00019865203665882288, "loss": 3.1822521686553955, "step": 3402, "token_acc": 0.27824841501106046 }, { "epoch": 1.9950161243037232, "grad_norm": 0.5380577160784337, "learning_rate": 0.00019865045019688488, "loss": 3.170513153076172, "step": 3403, "token_acc": 0.27743396541467946 }, { "epoch": 1.9956024626209323, "grad_norm": 0.46977869056729854, "learning_rate": 0.00019864886280825887, "loss": 3.1478023529052734, "step": 3404, "token_acc": 0.28211187162501444 }, { "epoch": 1.9961888009381412, "grad_norm": 0.4067343623388572, "learning_rate": 0.00019864727449295973, "loss": 3.167191982269287, "step": 3405, "token_acc": 0.2790123591045766 }, { "epoch": 1.9967751392553503, "grad_norm": 0.5719470991780657, "learning_rate": 0.00019864568525100235, "loss": 3.1479108333587646, "step": 3406, "token_acc": 0.2808350046251519 }, { "epoch": 1.9973614775725594, "grad_norm": 0.5640755107525108, "learning_rate": 0.0001986440950824017, "loss": 3.1524441242218018, "step": 3407, "token_acc": 0.2820435300396182 }, { "epoch": 1.9979478158897686, "grad_norm": 0.41340584585630324, "learning_rate": 0.0001986425039871727, "loss": 3.1483821868896484, "step": 3408, "token_acc": 0.28315104139120134 }, { "epoch": 1.9985341542069774, "grad_norm": 0.451575231052006, "learning_rate": 0.0001986409119653303, "loss": 3.119262218475342, "step": 3409, "token_acc": 0.28730103037007076 }, { "epoch": 1.9991204925241863, "grad_norm": 0.43381716125735736, "learning_rate": 0.00019863931901688942, "loss": 3.137439727783203, "step": 3410, "token_acc": 0.28318712429290516 }, { "epoch": 1.9997068308413954, "grad_norm": 0.4454884243532071, "learning_rate": 0.00019863772514186508, "loss": 3.137871265411377, "step": 3411, "token_acc": 0.2824936790643489 }, { "epoch": 2.0, "grad_norm": 0.5569415830635872, "learning_rate": 0.00019863613034027224, "loss": 3.1297950744628906, "step": 3412, "token_acc": 0.2860031799662129 }, { "epoch": 2.0, "eval_loss": 3.125737190246582, "eval_runtime": 16.5539, "eval_samples_per_second": 15.465, "eval_steps_per_second": 1.933, "eval_token_acc": 0.284939371791372, "step": 3412 }, { "epoch": 2.000586338317209, "grad_norm": 0.49998177441428876, "learning_rate": 0.00019863453461212586, "loss": 3.0684006214141846, "step": 3413, "token_acc": 0.2918300454927708 }, { "epoch": 2.0011726766344182, "grad_norm": 0.5058384648990105, "learning_rate": 0.00019863293795744093, "loss": 3.1008150577545166, "step": 3414, "token_acc": 0.28668079556569936 }, { "epoch": 2.001759014951627, "grad_norm": 0.512666475312042, "learning_rate": 0.00019863134037623246, "loss": 3.144676446914673, "step": 3415, "token_acc": 0.28229278178549544 }, { "epoch": 2.002345353268836, "grad_norm": 0.4595128529384985, "learning_rate": 0.00019862974186851548, "loss": 3.1008362770080566, "step": 3416, "token_acc": 0.2864899099397408 }, { "epoch": 2.002931691586045, "grad_norm": 0.4934855750548127, "learning_rate": 0.00019862814243430497, "loss": 3.083914279937744, "step": 3417, "token_acc": 0.2897059667914207 }, { "epoch": 2.0035180299032542, "grad_norm": 0.4384831085729346, "learning_rate": 0.00019862654207361595, "loss": 3.0431952476501465, "step": 3418, "token_acc": 0.295449011206499 }, { "epoch": 2.0041043682204633, "grad_norm": 0.5781943659392206, "learning_rate": 0.00019862494078646346, "loss": 3.1228113174438477, "step": 3419, "token_acc": 0.284195790366606 }, { "epoch": 2.0046907065376725, "grad_norm": 0.4053322140989937, "learning_rate": 0.00019862333857286258, "loss": 3.0822365283966064, "step": 3420, "token_acc": 0.28940557612319917 }, { "epoch": 2.005277044854881, "grad_norm": 0.41631005009025635, "learning_rate": 0.0001986217354328283, "loss": 3.0864272117614746, "step": 3421, "token_acc": 0.2897685593697841 }, { "epoch": 2.0058633831720902, "grad_norm": 0.4859054693647988, "learning_rate": 0.00019862013136637575, "loss": 3.0980827808380127, "step": 3422, "token_acc": 0.290450429600486 }, { "epoch": 2.0064497214892993, "grad_norm": 0.4716793129928112, "learning_rate": 0.00019861852637351993, "loss": 3.0215601921081543, "step": 3423, "token_acc": 0.2977535771448794 }, { "epoch": 2.0070360598065085, "grad_norm": 0.3894036170417833, "learning_rate": 0.00019861692045427593, "loss": 3.0676109790802, "step": 3424, "token_acc": 0.2916630855178341 }, { "epoch": 2.0076223981237176, "grad_norm": 0.49506202086101725, "learning_rate": 0.00019861531360865887, "loss": 3.062096118927002, "step": 3425, "token_acc": 0.29257836496758355 }, { "epoch": 2.0082087364409262, "grad_norm": 0.5123133372513321, "learning_rate": 0.00019861370583668385, "loss": 3.070327043533325, "step": 3426, "token_acc": 0.2919775506934651 }, { "epoch": 2.0087950747581353, "grad_norm": 0.3602691345753264, "learning_rate": 0.0001986120971383659, "loss": 3.081812858581543, "step": 3427, "token_acc": 0.28976210521496504 }, { "epoch": 2.0093814130753445, "grad_norm": 0.4063315734456028, "learning_rate": 0.0001986104875137202, "loss": 3.067838430404663, "step": 3428, "token_acc": 0.29133206102777987 }, { "epoch": 2.0099677513925536, "grad_norm": 0.4386921411857858, "learning_rate": 0.00019860887696276184, "loss": 3.0660181045532227, "step": 3429, "token_acc": 0.2910207190101613 }, { "epoch": 2.0105540897097627, "grad_norm": 0.3945006421655463, "learning_rate": 0.00019860726548550598, "loss": 3.036353826522827, "step": 3430, "token_acc": 0.29546336597042605 }, { "epoch": 2.0111404280269713, "grad_norm": 0.3728931073773427, "learning_rate": 0.00019860565308196774, "loss": 3.0315330028533936, "step": 3431, "token_acc": 0.2956075014443872 }, { "epoch": 2.0117267663441805, "grad_norm": 0.3634816282563962, "learning_rate": 0.0001986040397521622, "loss": 3.0377249717712402, "step": 3432, "token_acc": 0.2943529859744875 }, { "epoch": 2.0123131046613896, "grad_norm": 0.4164182457161111, "learning_rate": 0.00019860242549610464, "loss": 3.0665740966796875, "step": 3433, "token_acc": 0.2913614950713841 }, { "epoch": 2.0128994429785987, "grad_norm": 0.5309312411672618, "learning_rate": 0.0001986008103138101, "loss": 3.046165943145752, "step": 3434, "token_acc": 0.2952960849782499 }, { "epoch": 2.013485781295808, "grad_norm": 0.48631486628813625, "learning_rate": 0.0001985991942052939, "loss": 3.0645008087158203, "step": 3435, "token_acc": 0.29291761998940036 }, { "epoch": 2.014072119613017, "grad_norm": 0.43106715431796166, "learning_rate": 0.00019859757717057108, "loss": 3.063448667526245, "step": 3436, "token_acc": 0.29248950728223944 }, { "epoch": 2.0146584579302256, "grad_norm": 0.5491545871820467, "learning_rate": 0.0001985959592096569, "loss": 3.0395636558532715, "step": 3437, "token_acc": 0.29448824328339207 }, { "epoch": 2.0152447962474347, "grad_norm": 0.43026686914734347, "learning_rate": 0.00019859434032256653, "loss": 3.0461792945861816, "step": 3438, "token_acc": 0.29394151291843723 }, { "epoch": 2.015831134564644, "grad_norm": 0.4737817193480008, "learning_rate": 0.0001985927205093152, "loss": 3.0697813034057617, "step": 3439, "token_acc": 0.28973387451513577 }, { "epoch": 2.016417472881853, "grad_norm": 0.5152683331568874, "learning_rate": 0.00019859109976991813, "loss": 3.0762979984283447, "step": 3440, "token_acc": 0.29219706265867046 }, { "epoch": 2.017003811199062, "grad_norm": 0.37020473520260655, "learning_rate": 0.00019858947810439052, "loss": 3.063088893890381, "step": 3441, "token_acc": 0.2914163203726741 }, { "epoch": 2.0175901495162707, "grad_norm": 0.46565884157360854, "learning_rate": 0.0001985878555127476, "loss": 3.0816354751586914, "step": 3442, "token_acc": 0.2916014828594348 }, { "epoch": 2.01817648783348, "grad_norm": 0.40670115880252516, "learning_rate": 0.0001985862319950046, "loss": 3.1309866905212402, "step": 3443, "token_acc": 0.281848419868346 }, { "epoch": 2.018762826150689, "grad_norm": 0.3987171154986315, "learning_rate": 0.00019858460755117684, "loss": 3.087066650390625, "step": 3444, "token_acc": 0.28855074872693753 }, { "epoch": 2.019349164467898, "grad_norm": 0.4644166502967986, "learning_rate": 0.00019858298218127955, "loss": 3.088263511657715, "step": 3445, "token_acc": 0.288081842214614 }, { "epoch": 2.019935502785107, "grad_norm": 0.4039692632761868, "learning_rate": 0.00019858135588532796, "loss": 3.0819506645202637, "step": 3446, "token_acc": 0.2898649526896724 }, { "epoch": 2.0205218411023163, "grad_norm": 0.4255264216949989, "learning_rate": 0.00019857972866333737, "loss": 3.0539302825927734, "step": 3447, "token_acc": 0.29272292467946565 }, { "epoch": 2.021108179419525, "grad_norm": 0.32938719001048683, "learning_rate": 0.00019857810051532307, "loss": 3.023671865463257, "step": 3448, "token_acc": 0.2993614499292624 }, { "epoch": 2.021694517736734, "grad_norm": 0.3666656134199677, "learning_rate": 0.00019857647144130036, "loss": 3.032489061355591, "step": 3449, "token_acc": 0.29779452364550413 }, { "epoch": 2.022280856053943, "grad_norm": 0.3731779593142578, "learning_rate": 0.00019857484144128451, "loss": 3.063467025756836, "step": 3450, "token_acc": 0.29030540042911274 }, { "epoch": 2.0228671943711523, "grad_norm": 0.3733023019833434, "learning_rate": 0.00019857321051529087, "loss": 3.065704584121704, "step": 3451, "token_acc": 0.29059855505190363 }, { "epoch": 2.0234535326883614, "grad_norm": 0.41854461610494786, "learning_rate": 0.00019857157866333479, "loss": 3.117169141769409, "step": 3452, "token_acc": 0.28622212213580833 }, { "epoch": 2.02403987100557, "grad_norm": 0.383719543772172, "learning_rate": 0.0001985699458854315, "loss": 2.9990878105163574, "step": 3453, "token_acc": 0.3008129651276572 }, { "epoch": 2.024626209322779, "grad_norm": 0.3909280486530855, "learning_rate": 0.0001985683121815964, "loss": 3.1234984397888184, "step": 3454, "token_acc": 0.28396082501690095 }, { "epoch": 2.0252125476399883, "grad_norm": 0.4681280032661688, "learning_rate": 0.00019856667755184483, "loss": 3.0707132816314697, "step": 3455, "token_acc": 0.289514245679589 }, { "epoch": 2.0257988859571974, "grad_norm": 0.3912012569231109, "learning_rate": 0.00019856504199619213, "loss": 3.0272216796875, "step": 3456, "token_acc": 0.2984804400458735 }, { "epoch": 2.0263852242744065, "grad_norm": 0.3685152149409096, "learning_rate": 0.00019856340551465375, "loss": 3.0364389419555664, "step": 3457, "token_acc": 0.29583195774182897 }, { "epoch": 2.026971562591615, "grad_norm": 0.377225996263772, "learning_rate": 0.00019856176810724492, "loss": 3.0778818130493164, "step": 3458, "token_acc": 0.28806333260880385 }, { "epoch": 2.0275579009088243, "grad_norm": 0.46385651039535053, "learning_rate": 0.0001985601297739811, "loss": 2.998626947402954, "step": 3459, "token_acc": 0.30145061915386256 }, { "epoch": 2.0281442392260334, "grad_norm": 0.48582397782206593, "learning_rate": 0.0001985584905148777, "loss": 3.0886669158935547, "step": 3460, "token_acc": 0.2898088681087191 }, { "epoch": 2.0287305775432425, "grad_norm": 0.5842700349621689, "learning_rate": 0.00019855685032995005, "loss": 3.066650390625, "step": 3461, "token_acc": 0.29034488409968556 }, { "epoch": 2.0293169158604516, "grad_norm": 0.5087797994759428, "learning_rate": 0.00019855520921921365, "loss": 3.0560476779937744, "step": 3462, "token_acc": 0.29298299146153484 }, { "epoch": 2.0299032541776607, "grad_norm": 0.4210059422162485, "learning_rate": 0.00019855356718268384, "loss": 3.0664210319519043, "step": 3463, "token_acc": 0.2936837703651165 }, { "epoch": 2.0304895924948694, "grad_norm": 0.4591620472515439, "learning_rate": 0.00019855192422037608, "loss": 3.0583577156066895, "step": 3464, "token_acc": 0.2933609325859828 }, { "epoch": 2.0310759308120785, "grad_norm": 0.5921027615980743, "learning_rate": 0.00019855028033230576, "loss": 3.0732574462890625, "step": 3465, "token_acc": 0.2918837444648508 }, { "epoch": 2.0316622691292876, "grad_norm": 0.5137673535451546, "learning_rate": 0.00019854863551848836, "loss": 3.0678906440734863, "step": 3466, "token_acc": 0.2931201793793116 }, { "epoch": 2.0322486074464967, "grad_norm": 0.3943921958960617, "learning_rate": 0.00019854698977893938, "loss": 3.0622215270996094, "step": 3467, "token_acc": 0.2919001417793718 }, { "epoch": 2.032834945763706, "grad_norm": 0.4995869652818478, "learning_rate": 0.00019854534311367417, "loss": 3.094608783721924, "step": 3468, "token_acc": 0.28634284886548234 }, { "epoch": 2.0334212840809145, "grad_norm": 0.533878743324846, "learning_rate": 0.00019854369552270827, "loss": 3.0816850662231445, "step": 3469, "token_acc": 0.28921093562263495 }, { "epoch": 2.0340076223981236, "grad_norm": 0.40059612158034985, "learning_rate": 0.00019854204700605715, "loss": 3.0667741298675537, "step": 3470, "token_acc": 0.2913932884513108 }, { "epoch": 2.0345939607153327, "grad_norm": 0.3892015949638416, "learning_rate": 0.00019854039756373622, "loss": 3.0442728996276855, "step": 3471, "token_acc": 0.29434659742643227 }, { "epoch": 2.035180299032542, "grad_norm": 0.4033420378507342, "learning_rate": 0.0001985387471957611, "loss": 3.072866439819336, "step": 3472, "token_acc": 0.2899768014355261 }, { "epoch": 2.035766637349751, "grad_norm": 0.43480898610428975, "learning_rate": 0.00019853709590214727, "loss": 3.086298942565918, "step": 3473, "token_acc": 0.2895063902065166 }, { "epoch": 2.03635297566696, "grad_norm": 0.41313741237025026, "learning_rate": 0.00019853544368291014, "loss": 3.0476536750793457, "step": 3474, "token_acc": 0.2943198957719525 }, { "epoch": 2.0369393139841687, "grad_norm": 0.34062561269281616, "learning_rate": 0.0001985337905380653, "loss": 3.030444622039795, "step": 3475, "token_acc": 0.2968849954268857 }, { "epoch": 2.037525652301378, "grad_norm": 0.4832973665437105, "learning_rate": 0.00019853213646762828, "loss": 3.0658798217773438, "step": 3476, "token_acc": 0.29068780439834585 }, { "epoch": 2.038111990618587, "grad_norm": 0.3406440089866578, "learning_rate": 0.00019853048147161465, "loss": 3.0281901359558105, "step": 3477, "token_acc": 0.294366284410772 }, { "epoch": 2.038698328935796, "grad_norm": 0.34721901517972675, "learning_rate": 0.00019852882555003988, "loss": 3.0644588470458984, "step": 3478, "token_acc": 0.2929816358199139 }, { "epoch": 2.039284667253005, "grad_norm": 0.3278211538544744, "learning_rate": 0.00019852716870291957, "loss": 3.0754313468933105, "step": 3479, "token_acc": 0.29226345761473105 }, { "epoch": 2.039871005570214, "grad_norm": 0.36777062679505923, "learning_rate": 0.00019852551093026927, "loss": 3.0932412147521973, "step": 3480, "token_acc": 0.2874230559476872 }, { "epoch": 2.040457343887423, "grad_norm": 0.3582524845269563, "learning_rate": 0.00019852385223210459, "loss": 3.059009075164795, "step": 3481, "token_acc": 0.2947999550255073 }, { "epoch": 2.041043682204632, "grad_norm": 0.3687789468112822, "learning_rate": 0.00019852219260844105, "loss": 3.057910919189453, "step": 3482, "token_acc": 0.292932579009593 }, { "epoch": 2.041630020521841, "grad_norm": 0.41719645953801243, "learning_rate": 0.00019852053205929426, "loss": 3.035706043243408, "step": 3483, "token_acc": 0.29699823927248126 }, { "epoch": 2.0422163588390503, "grad_norm": 0.4020654247008344, "learning_rate": 0.00019851887058467985, "loss": 3.092768669128418, "step": 3484, "token_acc": 0.28870452936573976 }, { "epoch": 2.042802697156259, "grad_norm": 0.4206807940837558, "learning_rate": 0.00019851720818461343, "loss": 3.0886054039001465, "step": 3485, "token_acc": 0.28889771778598083 }, { "epoch": 2.043389035473468, "grad_norm": 0.4154958071402737, "learning_rate": 0.00019851554485911054, "loss": 3.057370185852051, "step": 3486, "token_acc": 0.29132255749434965 }, { "epoch": 2.043975373790677, "grad_norm": 0.410118890770655, "learning_rate": 0.0001985138806081869, "loss": 3.0689868927001953, "step": 3487, "token_acc": 0.2920596962047739 }, { "epoch": 2.0445617121078863, "grad_norm": 0.5391771603918918, "learning_rate": 0.0001985122154318581, "loss": 3.072880744934082, "step": 3488, "token_acc": 0.29119332693132827 }, { "epoch": 2.0451480504250954, "grad_norm": 0.4158624111171172, "learning_rate": 0.00019851054933013975, "loss": 3.0308005809783936, "step": 3489, "token_acc": 0.29695606072361447 }, { "epoch": 2.0457343887423045, "grad_norm": 0.37581621398878684, "learning_rate": 0.00019850888230304756, "loss": 3.028323173522949, "step": 3490, "token_acc": 0.29698904068610427 }, { "epoch": 2.046320727059513, "grad_norm": 0.43321913811391444, "learning_rate": 0.00019850721435059717, "loss": 3.059274196624756, "step": 3491, "token_acc": 0.2940401405451448 }, { "epoch": 2.0469070653767223, "grad_norm": 0.5072053974047472, "learning_rate": 0.0001985055454728042, "loss": 3.0779662132263184, "step": 3492, "token_acc": 0.2899087070220951 }, { "epoch": 2.0474934036939314, "grad_norm": 0.4286230359049069, "learning_rate": 0.00019850387566968443, "loss": 3.050110340118408, "step": 3493, "token_acc": 0.29475422006370006 }, { "epoch": 2.0480797420111405, "grad_norm": 0.4311795484638781, "learning_rate": 0.00019850220494125345, "loss": 3.088566303253174, "step": 3494, "token_acc": 0.28928430270458944 }, { "epoch": 2.0486660803283496, "grad_norm": 0.5333324649753729, "learning_rate": 0.00019850053328752699, "loss": 3.0589442253112793, "step": 3495, "token_acc": 0.2923652698479981 }, { "epoch": 2.0492524186455583, "grad_norm": 0.3646003012931469, "learning_rate": 0.00019849886070852073, "loss": 3.0413968563079834, "step": 3496, "token_acc": 0.29438729036060657 }, { "epoch": 2.0498387569627674, "grad_norm": 0.4564982574744297, "learning_rate": 0.00019849718720425043, "loss": 3.089567184448242, "step": 3497, "token_acc": 0.28769977714926864 }, { "epoch": 2.0504250952799765, "grad_norm": 0.4059631300798474, "learning_rate": 0.00019849551277473175, "loss": 3.08150053024292, "step": 3498, "token_acc": 0.28999146906990986 }, { "epoch": 2.0510114335971856, "grad_norm": 0.425047068153382, "learning_rate": 0.0001984938374199805, "loss": 3.053403854370117, "step": 3499, "token_acc": 0.2946768459578217 }, { "epoch": 2.0515977719143947, "grad_norm": 0.46989438353270113, "learning_rate": 0.00019849216114001234, "loss": 3.061589241027832, "step": 3500, "token_acc": 0.2916836650093831 }, { "epoch": 2.052184110231604, "grad_norm": 0.3759395336356404, "learning_rate": 0.00019849048393484305, "loss": 3.0550172328948975, "step": 3501, "token_acc": 0.29278502437383536 }, { "epoch": 2.0527704485488125, "grad_norm": 0.4078171189552231, "learning_rate": 0.00019848880580448838, "loss": 3.027357578277588, "step": 3502, "token_acc": 0.29764322521976355 }, { "epoch": 2.0533567868660216, "grad_norm": 0.3494611134069768, "learning_rate": 0.0001984871267489641, "loss": 3.0791642665863037, "step": 3503, "token_acc": 0.28908811097408005 }, { "epoch": 2.0539431251832307, "grad_norm": 0.42854641975385266, "learning_rate": 0.00019848544676828595, "loss": 3.0633440017700195, "step": 3504, "token_acc": 0.2923390811811173 }, { "epoch": 2.05452946350044, "grad_norm": 0.34141891620291975, "learning_rate": 0.00019848376586246977, "loss": 3.0867271423339844, "step": 3505, "token_acc": 0.28649241731234837 }, { "epoch": 2.055115801817649, "grad_norm": 0.4106934820384049, "learning_rate": 0.00019848208403153131, "loss": 3.045044422149658, "step": 3506, "token_acc": 0.29336567278454706 }, { "epoch": 2.0557021401348576, "grad_norm": 0.423858969468152, "learning_rate": 0.0001984804012754864, "loss": 3.0614547729492188, "step": 3507, "token_acc": 0.2937394146002519 }, { "epoch": 2.0562884784520667, "grad_norm": 0.3661184348548931, "learning_rate": 0.00019847871759435078, "loss": 3.078434705734253, "step": 3508, "token_acc": 0.29179172681670423 }, { "epoch": 2.056874816769276, "grad_norm": 0.37746873292411587, "learning_rate": 0.00019847703298814034, "loss": 3.074838399887085, "step": 3509, "token_acc": 0.2877032958018007 }, { "epoch": 2.057461155086485, "grad_norm": 0.4806787404143008, "learning_rate": 0.00019847534745687085, "loss": 3.074542999267578, "step": 3510, "token_acc": 0.2896674140982699 }, { "epoch": 2.058047493403694, "grad_norm": 0.44611190387507604, "learning_rate": 0.0001984736610005582, "loss": 3.037825345993042, "step": 3511, "token_acc": 0.29517590067757393 }, { "epoch": 2.0586338317209028, "grad_norm": 0.503512057534701, "learning_rate": 0.00019847197361921818, "loss": 3.0500144958496094, "step": 3512, "token_acc": 0.29422766432879965 }, { "epoch": 2.059220170038112, "grad_norm": 0.46373397802087224, "learning_rate": 0.00019847028531286666, "loss": 3.036174774169922, "step": 3513, "token_acc": 0.2946828143935494 }, { "epoch": 2.059806508355321, "grad_norm": 0.4001270520429139, "learning_rate": 0.0001984685960815195, "loss": 3.0489931106567383, "step": 3514, "token_acc": 0.29345834367458584 }, { "epoch": 2.06039284667253, "grad_norm": 0.48306416006728575, "learning_rate": 0.0001984669059251926, "loss": 3.057803153991699, "step": 3515, "token_acc": 0.29279026266844005 }, { "epoch": 2.060979184989739, "grad_norm": 0.4813301321880618, "learning_rate": 0.00019846521484390177, "loss": 3.0901288986206055, "step": 3516, "token_acc": 0.28752015242561924 }, { "epoch": 2.0615655233069483, "grad_norm": 0.3701617411106914, "learning_rate": 0.0001984635228376629, "loss": 3.0545034408569336, "step": 3517, "token_acc": 0.294136307132062 }, { "epoch": 2.062151861624157, "grad_norm": 0.39510654241557325, "learning_rate": 0.00019846182990649198, "loss": 3.0678539276123047, "step": 3518, "token_acc": 0.29048785186987913 }, { "epoch": 2.062738199941366, "grad_norm": 0.3848599230601269, "learning_rate": 0.00019846013605040482, "loss": 3.051362991333008, "step": 3519, "token_acc": 0.29433504343937333 }, { "epoch": 2.063324538258575, "grad_norm": 0.47196422224442947, "learning_rate": 0.00019845844126941734, "loss": 3.066368341445923, "step": 3520, "token_acc": 0.29149874039342605 }, { "epoch": 2.0639108765757843, "grad_norm": 0.41602885467357575, "learning_rate": 0.0001984567455635455, "loss": 3.0600249767303467, "step": 3521, "token_acc": 0.2937404009241823 }, { "epoch": 2.0644972148929934, "grad_norm": 0.3666488477481219, "learning_rate": 0.0001984550489328052, "loss": 3.0707387924194336, "step": 3522, "token_acc": 0.29189663987833814 }, { "epoch": 2.065083553210202, "grad_norm": 0.39247047727059314, "learning_rate": 0.0001984533513772124, "loss": 3.0812063217163086, "step": 3523, "token_acc": 0.2898525144189089 }, { "epoch": 2.065669891527411, "grad_norm": 0.4739724032491044, "learning_rate": 0.000198451652896783, "loss": 3.0333967208862305, "step": 3524, "token_acc": 0.29536122574097257 }, { "epoch": 2.0662562298446203, "grad_norm": 0.4046881247518256, "learning_rate": 0.00019844995349153303, "loss": 3.118180751800537, "step": 3525, "token_acc": 0.28439861418010204 }, { "epoch": 2.0668425681618294, "grad_norm": 0.41100666233856453, "learning_rate": 0.00019844825316147837, "loss": 3.0766966342926025, "step": 3526, "token_acc": 0.2892769383593873 }, { "epoch": 2.0674289064790385, "grad_norm": 0.46547796178567485, "learning_rate": 0.00019844655190663505, "loss": 3.047614097595215, "step": 3527, "token_acc": 0.294341582500903 }, { "epoch": 2.068015244796247, "grad_norm": 0.35337948019950033, "learning_rate": 0.00019844484972701904, "loss": 3.045591354370117, "step": 3528, "token_acc": 0.29449108994356576 }, { "epoch": 2.0686015831134563, "grad_norm": 0.4224475103668469, "learning_rate": 0.0001984431466226463, "loss": 3.0759239196777344, "step": 3529, "token_acc": 0.2911184509717808 }, { "epoch": 2.0691879214306654, "grad_norm": 0.4288655247363128, "learning_rate": 0.0001984414425935329, "loss": 3.0538578033447266, "step": 3530, "token_acc": 0.2934134629579427 }, { "epoch": 2.0697742597478745, "grad_norm": 0.4671349349305833, "learning_rate": 0.00019843973763969476, "loss": 3.075728416442871, "step": 3531, "token_acc": 0.2908591003335437 }, { "epoch": 2.0703605980650837, "grad_norm": 0.38404919381097924, "learning_rate": 0.00019843803176114794, "loss": 3.1061019897460938, "step": 3532, "token_acc": 0.28568734431519643 }, { "epoch": 2.0709469363822928, "grad_norm": 0.4100207609153704, "learning_rate": 0.00019843632495790842, "loss": 3.0284202098846436, "step": 3533, "token_acc": 0.29634538768004903 }, { "epoch": 2.0715332746995014, "grad_norm": 0.45128159799533396, "learning_rate": 0.00019843461722999231, "loss": 3.0789566040039062, "step": 3534, "token_acc": 0.29046187324898687 }, { "epoch": 2.0721196130167105, "grad_norm": 0.38901709703660614, "learning_rate": 0.0001984329085774156, "loss": 3.0560970306396484, "step": 3535, "token_acc": 0.29271873239187507 }, { "epoch": 2.0727059513339197, "grad_norm": 0.4465639630016911, "learning_rate": 0.0001984311990001944, "loss": 3.0605874061584473, "step": 3536, "token_acc": 0.2925848534185065 }, { "epoch": 2.0732922896511288, "grad_norm": 0.35969836533497596, "learning_rate": 0.0001984294884983447, "loss": 3.044510841369629, "step": 3537, "token_acc": 0.2960840079950481 }, { "epoch": 2.073878627968338, "grad_norm": 0.36275055131073514, "learning_rate": 0.00019842777707188255, "loss": 3.061445713043213, "step": 3538, "token_acc": 0.2928920044764984 }, { "epoch": 2.0744649662855466, "grad_norm": 0.4217384446246162, "learning_rate": 0.0001984260647208241, "loss": 3.050929546356201, "step": 3539, "token_acc": 0.29208985944555915 }, { "epoch": 2.0750513046027557, "grad_norm": 0.46711186579893876, "learning_rate": 0.0001984243514451854, "loss": 3.0724148750305176, "step": 3540, "token_acc": 0.2902850297529916 }, { "epoch": 2.0756376429199648, "grad_norm": 0.3748401946186969, "learning_rate": 0.00019842263724498252, "loss": 3.0598361492156982, "step": 3541, "token_acc": 0.29168408703341925 }, { "epoch": 2.076223981237174, "grad_norm": 0.43450602566105617, "learning_rate": 0.00019842092212023164, "loss": 3.0598678588867188, "step": 3542, "token_acc": 0.29293968737458453 }, { "epoch": 2.076810319554383, "grad_norm": 0.4359906955665724, "learning_rate": 0.0001984192060709488, "loss": 3.0696616172790527, "step": 3543, "token_acc": 0.29211011531225833 }, { "epoch": 2.077396657871592, "grad_norm": 0.3959562260704288, "learning_rate": 0.00019841748909715014, "loss": 3.1017396450042725, "step": 3544, "token_acc": 0.28789396891037483 }, { "epoch": 2.077982996188801, "grad_norm": 0.44461100843793183, "learning_rate": 0.00019841577119885178, "loss": 3.035391330718994, "step": 3545, "token_acc": 0.29514798431866746 }, { "epoch": 2.07856933450601, "grad_norm": 0.3532264613789898, "learning_rate": 0.00019841405237606987, "loss": 3.0907740592956543, "step": 3546, "token_acc": 0.2869562303794467 }, { "epoch": 2.079155672823219, "grad_norm": 0.4557584814246335, "learning_rate": 0.00019841233262882056, "loss": 3.084862232208252, "step": 3547, "token_acc": 0.28934312617317165 }, { "epoch": 2.079742011140428, "grad_norm": 0.49553249109578207, "learning_rate": 0.00019841061195711998, "loss": 3.0846166610717773, "step": 3548, "token_acc": 0.2892670938341002 }, { "epoch": 2.0803283494576372, "grad_norm": 0.4253727510143276, "learning_rate": 0.00019840889036098434, "loss": 3.095020055770874, "step": 3549, "token_acc": 0.28618513563594217 }, { "epoch": 2.080914687774846, "grad_norm": 0.5072426324183269, "learning_rate": 0.00019840716784042973, "loss": 3.0633978843688965, "step": 3550, "token_acc": 0.2922023444535381 }, { "epoch": 2.081501026092055, "grad_norm": 0.5077644718027557, "learning_rate": 0.00019840544439547243, "loss": 3.046329975128174, "step": 3551, "token_acc": 0.2928570870132882 }, { "epoch": 2.082087364409264, "grad_norm": 0.396293448052419, "learning_rate": 0.00019840372002612858, "loss": 3.0718302726745605, "step": 3552, "token_acc": 0.2918956853142913 }, { "epoch": 2.0826737027264732, "grad_norm": 0.4145590438564462, "learning_rate": 0.00019840199473241437, "loss": 3.0591797828674316, "step": 3553, "token_acc": 0.29409241618862164 }, { "epoch": 2.0832600410436823, "grad_norm": 0.4891850166619332, "learning_rate": 0.000198400268514346, "loss": 3.0417685508728027, "step": 3554, "token_acc": 0.29463054650288295 }, { "epoch": 2.0838463793608915, "grad_norm": 0.41410691911906794, "learning_rate": 0.00019839854137193976, "loss": 3.053943634033203, "step": 3555, "token_acc": 0.29240936998944117 }, { "epoch": 2.0844327176781, "grad_norm": 0.37777659197907265, "learning_rate": 0.0001983968133052118, "loss": 3.0568556785583496, "step": 3556, "token_acc": 0.29333862248084247 }, { "epoch": 2.0850190559953092, "grad_norm": 0.366472130976773, "learning_rate": 0.00019839508431417833, "loss": 3.0705909729003906, "step": 3557, "token_acc": 0.2892433806745124 }, { "epoch": 2.0856053943125183, "grad_norm": 0.3240194660734338, "learning_rate": 0.00019839335439885564, "loss": 3.0572164058685303, "step": 3558, "token_acc": 0.2940084037419833 }, { "epoch": 2.0861917326297275, "grad_norm": 0.3625460327748282, "learning_rate": 0.00019839162355926, "loss": 3.0532264709472656, "step": 3559, "token_acc": 0.2932914433561264 }, { "epoch": 2.0867780709469366, "grad_norm": 0.4250733354309948, "learning_rate": 0.0001983898917954076, "loss": 3.072566032409668, "step": 3560, "token_acc": 0.290817749767538 }, { "epoch": 2.0873644092641452, "grad_norm": 0.49356300637028977, "learning_rate": 0.0001983881591073148, "loss": 3.0586729049682617, "step": 3561, "token_acc": 0.29224468469704684 }, { "epoch": 2.0879507475813543, "grad_norm": 0.34772695162412937, "learning_rate": 0.0001983864254949978, "loss": 3.0868895053863525, "step": 3562, "token_acc": 0.2887401761554135 }, { "epoch": 2.0885370858985635, "grad_norm": 0.35740403260207454, "learning_rate": 0.0001983846909584729, "loss": 3.071143627166748, "step": 3563, "token_acc": 0.2886153719809987 }, { "epoch": 2.0891234242157726, "grad_norm": 0.4774885905723965, "learning_rate": 0.0001983829554977564, "loss": 3.059811592102051, "step": 3564, "token_acc": 0.29389422311813695 }, { "epoch": 2.0897097625329817, "grad_norm": 0.4847454805016588, "learning_rate": 0.00019838121911286462, "loss": 3.1391334533691406, "step": 3565, "token_acc": 0.2814356069413033 }, { "epoch": 2.0902961008501904, "grad_norm": 0.3587535203891912, "learning_rate": 0.00019837948180381388, "loss": 2.9756364822387695, "step": 3566, "token_acc": 0.30496048826788513 }, { "epoch": 2.0908824391673995, "grad_norm": 0.390749577880432, "learning_rate": 0.00019837774357062046, "loss": 3.079336166381836, "step": 3567, "token_acc": 0.2900676104167093 }, { "epoch": 2.0914687774846086, "grad_norm": 0.6380019264262023, "learning_rate": 0.0001983760044133007, "loss": 3.0843658447265625, "step": 3568, "token_acc": 0.2902829855146074 }, { "epoch": 2.0920551158018177, "grad_norm": 0.5799534810194714, "learning_rate": 0.00019837426433187092, "loss": 3.0439553260803223, "step": 3569, "token_acc": 0.2951409162579062 }, { "epoch": 2.092641454119027, "grad_norm": 0.40703731287461004, "learning_rate": 0.0001983725233263475, "loss": 3.046616315841675, "step": 3570, "token_acc": 0.2942870987864201 }, { "epoch": 2.093227792436236, "grad_norm": 0.5884535435377783, "learning_rate": 0.0001983707813967468, "loss": 3.0519230365753174, "step": 3571, "token_acc": 0.293330434509278 }, { "epoch": 2.0938141307534446, "grad_norm": 0.7312989872820534, "learning_rate": 0.00019836903854308514, "loss": 3.0527243614196777, "step": 3572, "token_acc": 0.2947487193722418 }, { "epoch": 2.0944004690706537, "grad_norm": 0.5193068659164491, "learning_rate": 0.00019836729476537893, "loss": 3.0395874977111816, "step": 3573, "token_acc": 0.2959683803038354 }, { "epoch": 2.094986807387863, "grad_norm": 0.5114592300729086, "learning_rate": 0.00019836555006364455, "loss": 3.0629146099090576, "step": 3574, "token_acc": 0.2909723401545385 }, { "epoch": 2.095573145705072, "grad_norm": 0.5611570010123049, "learning_rate": 0.00019836380443789836, "loss": 3.0437188148498535, "step": 3575, "token_acc": 0.2947131670255222 }, { "epoch": 2.096159484022281, "grad_norm": 0.3803346739074631, "learning_rate": 0.00019836205788815677, "loss": 3.07240629196167, "step": 3576, "token_acc": 0.29029812262903415 }, { "epoch": 2.0967458223394897, "grad_norm": 0.5190646534061584, "learning_rate": 0.00019836031041443623, "loss": 3.052419662475586, "step": 3577, "token_acc": 0.2956201283206957 }, { "epoch": 2.097332160656699, "grad_norm": 0.3816142594046331, "learning_rate": 0.00019835856201675306, "loss": 3.02846097946167, "step": 3578, "token_acc": 0.2976310870031565 }, { "epoch": 2.097918498973908, "grad_norm": 0.5663487880740444, "learning_rate": 0.00019835681269512377, "loss": 3.1270220279693604, "step": 3579, "token_acc": 0.28162552237934163 }, { "epoch": 2.098504837291117, "grad_norm": 0.45960083021039966, "learning_rate": 0.00019835506244956475, "loss": 3.0054173469543457, "step": 3580, "token_acc": 0.30061533805118157 }, { "epoch": 2.099091175608326, "grad_norm": 0.4427309389453107, "learning_rate": 0.00019835331128009246, "loss": 3.041897773742676, "step": 3581, "token_acc": 0.2945209058583184 }, { "epoch": 2.099677513925535, "grad_norm": 0.4573966337718266, "learning_rate": 0.00019835155918672333, "loss": 3.0773720741271973, "step": 3582, "token_acc": 0.2885612635437443 }, { "epoch": 2.100263852242744, "grad_norm": 0.4249680322796581, "learning_rate": 0.00019834980616947388, "loss": 3.079244613647461, "step": 3583, "token_acc": 0.29033944765154196 }, { "epoch": 2.100850190559953, "grad_norm": 0.3891381530475288, "learning_rate": 0.00019834805222836046, "loss": 3.045846462249756, "step": 3584, "token_acc": 0.2951125642321624 }, { "epoch": 2.101436528877162, "grad_norm": 0.34748584613210326, "learning_rate": 0.00019834629736339968, "loss": 3.005671501159668, "step": 3585, "token_acc": 0.29949670803564177 }, { "epoch": 2.1020228671943713, "grad_norm": 0.44467570604991064, "learning_rate": 0.00019834454157460792, "loss": 3.073763370513916, "step": 3586, "token_acc": 0.2902493151183876 }, { "epoch": 2.1026092055115804, "grad_norm": 0.3458869544268884, "learning_rate": 0.00019834278486200173, "loss": 3.090545415878296, "step": 3587, "token_acc": 0.2888066756883443 }, { "epoch": 2.103195543828789, "grad_norm": 0.3900961667263977, "learning_rate": 0.00019834102722559758, "loss": 3.091229200363159, "step": 3588, "token_acc": 0.2885511324323078 }, { "epoch": 2.103781882145998, "grad_norm": 0.3320571433168456, "learning_rate": 0.00019833926866541198, "loss": 3.06968355178833, "step": 3589, "token_acc": 0.2912585402238717 }, { "epoch": 2.1043682204632073, "grad_norm": 0.3838331464453844, "learning_rate": 0.0001983375091814615, "loss": 3.0774216651916504, "step": 3590, "token_acc": 0.2901162507280102 }, { "epoch": 2.1049545587804164, "grad_norm": 0.41270502186090874, "learning_rate": 0.00019833574877376262, "loss": 3.0986461639404297, "step": 3591, "token_acc": 0.2887607917456307 }, { "epoch": 2.1055408970976255, "grad_norm": 0.3003580476476052, "learning_rate": 0.0001983339874423319, "loss": 3.118112564086914, "step": 3592, "token_acc": 0.2849725548312168 }, { "epoch": 2.106127235414834, "grad_norm": 0.4178560943863483, "learning_rate": 0.00019833222518718583, "loss": 3.03887939453125, "step": 3593, "token_acc": 0.2962801343916902 }, { "epoch": 2.1067135737320433, "grad_norm": 0.38321917436233666, "learning_rate": 0.00019833046200834107, "loss": 3.068979263305664, "step": 3594, "token_acc": 0.2901594531868983 }, { "epoch": 2.1072999120492524, "grad_norm": 0.3756314445747655, "learning_rate": 0.0001983286979058141, "loss": 3.1027374267578125, "step": 3595, "token_acc": 0.286743620214658 }, { "epoch": 2.1078862503664615, "grad_norm": 0.32342494623087414, "learning_rate": 0.0001983269328796215, "loss": 3.083435297012329, "step": 3596, "token_acc": 0.29027164009254364 }, { "epoch": 2.1084725886836706, "grad_norm": 0.37319058647254666, "learning_rate": 0.00019832516692977988, "loss": 3.0633890628814697, "step": 3597, "token_acc": 0.2916057334424412 }, { "epoch": 2.1090589270008797, "grad_norm": 0.4336198739366461, "learning_rate": 0.0001983234000563058, "loss": 3.0593342781066895, "step": 3598, "token_acc": 0.29293118752245495 }, { "epoch": 2.1096452653180884, "grad_norm": 0.32690073017604865, "learning_rate": 0.00019832163225921585, "loss": 3.117563247680664, "step": 3599, "token_acc": 0.28314599168368987 }, { "epoch": 2.1102316036352975, "grad_norm": 0.3650514687629308, "learning_rate": 0.00019831986353852668, "loss": 3.080416679382324, "step": 3600, "token_acc": 0.28977178319271435 }, { "epoch": 2.1108179419525066, "grad_norm": 0.44222125901793113, "learning_rate": 0.00019831809389425487, "loss": 3.0483977794647217, "step": 3601, "token_acc": 0.2936824368114064 }, { "epoch": 2.1114042802697157, "grad_norm": 0.3303022505116269, "learning_rate": 0.00019831632332641705, "loss": 3.019620418548584, "step": 3602, "token_acc": 0.29712843988971543 }, { "epoch": 2.111990618586925, "grad_norm": 0.4310503879767, "learning_rate": 0.00019831455183502987, "loss": 3.0526235103607178, "step": 3603, "token_acc": 0.2936478526876064 }, { "epoch": 2.1125769569041335, "grad_norm": 0.3252515406254481, "learning_rate": 0.00019831277942010996, "loss": 3.005171537399292, "step": 3604, "token_acc": 0.3017371700667372 }, { "epoch": 2.1131632952213426, "grad_norm": 0.4063407584818642, "learning_rate": 0.00019831100608167393, "loss": 3.0962395668029785, "step": 3605, "token_acc": 0.2898247611172116 }, { "epoch": 2.1137496335385517, "grad_norm": 0.401776452029666, "learning_rate": 0.0001983092318197385, "loss": 3.0379395484924316, "step": 3606, "token_acc": 0.2968602661264974 }, { "epoch": 2.114335971855761, "grad_norm": 0.45533308632482494, "learning_rate": 0.00019830745663432033, "loss": 3.047060489654541, "step": 3607, "token_acc": 0.2928545838763441 }, { "epoch": 2.11492231017297, "grad_norm": 0.38065552204836894, "learning_rate": 0.00019830568052543604, "loss": 3.055933952331543, "step": 3608, "token_acc": 0.29325146180943207 }, { "epoch": 2.115508648490179, "grad_norm": 0.3793836552798932, "learning_rate": 0.00019830390349310237, "loss": 3.0560786724090576, "step": 3609, "token_acc": 0.29247514516061657 }, { "epoch": 2.1160949868073877, "grad_norm": 0.34437856825947905, "learning_rate": 0.00019830212553733598, "loss": 3.0915746688842773, "step": 3610, "token_acc": 0.2892308702095056 }, { "epoch": 2.116681325124597, "grad_norm": 0.32393239925124906, "learning_rate": 0.00019830034665815357, "loss": 3.0861353874206543, "step": 3611, "token_acc": 0.28766087306173427 }, { "epoch": 2.117267663441806, "grad_norm": 0.3560461128357273, "learning_rate": 0.0001982985668555719, "loss": 3.0503063201904297, "step": 3612, "token_acc": 0.2921358264896557 }, { "epoch": 2.117854001759015, "grad_norm": 0.3320827010446719, "learning_rate": 0.00019829678612960766, "loss": 3.023803472518921, "step": 3613, "token_acc": 0.29769529437364495 }, { "epoch": 2.118440340076224, "grad_norm": 0.36805577620539576, "learning_rate": 0.00019829500448027753, "loss": 3.077540874481201, "step": 3614, "token_acc": 0.29110032746741815 }, { "epoch": 2.119026678393433, "grad_norm": 0.30927608147347996, "learning_rate": 0.0001982932219075983, "loss": 3.0324454307556152, "step": 3615, "token_acc": 0.29671131236388637 }, { "epoch": 2.119613016710642, "grad_norm": 0.33998613401633365, "learning_rate": 0.00019829143841158673, "loss": 3.0263547897338867, "step": 3616, "token_acc": 0.29628313150815516 }, { "epoch": 2.120199355027851, "grad_norm": 0.3396585255933413, "learning_rate": 0.00019828965399225953, "loss": 3.0975794792175293, "step": 3617, "token_acc": 0.2871586301540765 }, { "epoch": 2.12078569334506, "grad_norm": 0.36105248356697384, "learning_rate": 0.00019828786864963346, "loss": 3.059741735458374, "step": 3618, "token_acc": 0.2938550148957299 }, { "epoch": 2.1213720316622693, "grad_norm": 0.33108308210637405, "learning_rate": 0.00019828608238372532, "loss": 3.068727970123291, "step": 3619, "token_acc": 0.28999059891953044 }, { "epoch": 2.121958369979478, "grad_norm": 0.3608578403710765, "learning_rate": 0.00019828429519455187, "loss": 3.0625953674316406, "step": 3620, "token_acc": 0.29295049379322013 }, { "epoch": 2.122544708296687, "grad_norm": 0.4259711691064494, "learning_rate": 0.00019828250708212993, "loss": 3.092543601989746, "step": 3621, "token_acc": 0.2874105797193742 }, { "epoch": 2.123131046613896, "grad_norm": 0.341405840844254, "learning_rate": 0.00019828071804647626, "loss": 3.0377516746520996, "step": 3622, "token_acc": 0.2950345631802328 }, { "epoch": 2.1237173849311053, "grad_norm": 0.4702461284941566, "learning_rate": 0.00019827892808760766, "loss": 3.068410873413086, "step": 3623, "token_acc": 0.2917305352720484 }, { "epoch": 2.1243037232483144, "grad_norm": 0.4682700889031738, "learning_rate": 0.00019827713720554097, "loss": 3.0613012313842773, "step": 3624, "token_acc": 0.2921854644668456 }, { "epoch": 2.1248900615655235, "grad_norm": 0.33271664388555416, "learning_rate": 0.000198275345400293, "loss": 3.0392584800720215, "step": 3625, "token_acc": 0.2965059206496532 }, { "epoch": 2.125476399882732, "grad_norm": 0.43401411141698526, "learning_rate": 0.00019827355267188065, "loss": 3.0414907932281494, "step": 3626, "token_acc": 0.2959803243409423 }, { "epoch": 2.1260627381999413, "grad_norm": 0.43417332445628765, "learning_rate": 0.00019827175902032063, "loss": 2.9982614517211914, "step": 3627, "token_acc": 0.3014506581665152 }, { "epoch": 2.1266490765171504, "grad_norm": 0.4581844679578655, "learning_rate": 0.00019826996444562988, "loss": 3.056980609893799, "step": 3628, "token_acc": 0.2935469278474155 }, { "epoch": 2.1272354148343595, "grad_norm": 0.44801763389926275, "learning_rate": 0.00019826816894782525, "loss": 3.05255126953125, "step": 3629, "token_acc": 0.2929825891654879 }, { "epoch": 2.1278217531515686, "grad_norm": 0.4038462941930441, "learning_rate": 0.00019826637252692356, "loss": 3.0721967220306396, "step": 3630, "token_acc": 0.2903196482233252 }, { "epoch": 2.1284080914687773, "grad_norm": 0.49993955074044333, "learning_rate": 0.00019826457518294172, "loss": 3.086122512817383, "step": 3631, "token_acc": 0.28825710108604846 }, { "epoch": 2.1289944297859864, "grad_norm": 0.40774511724742474, "learning_rate": 0.00019826277691589663, "loss": 3.057485580444336, "step": 3632, "token_acc": 0.2919997121459009 }, { "epoch": 2.1295807681031955, "grad_norm": 0.33267034489594965, "learning_rate": 0.00019826097772580517, "loss": 3.0426793098449707, "step": 3633, "token_acc": 0.2927497341906739 }, { "epoch": 2.1301671064204046, "grad_norm": 0.3668040859612643, "learning_rate": 0.0001982591776126842, "loss": 3.0451996326446533, "step": 3634, "token_acc": 0.2947234168142339 }, { "epoch": 2.1307534447376137, "grad_norm": 0.3871607421722465, "learning_rate": 0.00019825737657655067, "loss": 3.0688562393188477, "step": 3635, "token_acc": 0.2916939141579278 }, { "epoch": 2.1313397830548224, "grad_norm": 0.4196305514267362, "learning_rate": 0.0001982555746174215, "loss": 3.067142963409424, "step": 3636, "token_acc": 0.292745619200713 }, { "epoch": 2.1319261213720315, "grad_norm": 0.42732608281155054, "learning_rate": 0.0001982537717353136, "loss": 3.072443962097168, "step": 3637, "token_acc": 0.2909729998098578 }, { "epoch": 2.1325124596892406, "grad_norm": 0.34238518770614135, "learning_rate": 0.00019825196793024391, "loss": 3.088256359100342, "step": 3638, "token_acc": 0.2889087585962052 }, { "epoch": 2.1330987980064497, "grad_norm": 0.3673543886394881, "learning_rate": 0.0001982501632022294, "loss": 3.078174591064453, "step": 3639, "token_acc": 0.28934118473298936 }, { "epoch": 2.133685136323659, "grad_norm": 0.3707717997723761, "learning_rate": 0.000198248357551287, "loss": 3.109175682067871, "step": 3640, "token_acc": 0.284658011273742 }, { "epoch": 2.134271474640868, "grad_norm": 0.36075496666507634, "learning_rate": 0.00019824655097743367, "loss": 3.0754547119140625, "step": 3641, "token_acc": 0.2921208131773927 }, { "epoch": 2.1348578129580766, "grad_norm": 0.37531363618475966, "learning_rate": 0.00019824474348068637, "loss": 3.0740091800689697, "step": 3642, "token_acc": 0.29018498930369846 }, { "epoch": 2.1354441512752858, "grad_norm": 0.4778193315338914, "learning_rate": 0.00019824293506106206, "loss": 3.0912137031555176, "step": 3643, "token_acc": 0.2896417318355617 }, { "epoch": 2.136030489592495, "grad_norm": 0.47419736043746874, "learning_rate": 0.00019824112571857782, "loss": 3.0936503410339355, "step": 3644, "token_acc": 0.2892625004539017 }, { "epoch": 2.136616827909704, "grad_norm": 0.37064465349918374, "learning_rate": 0.00019823931545325053, "loss": 3.093658208847046, "step": 3645, "token_acc": 0.2884854673313202 }, { "epoch": 2.137203166226913, "grad_norm": 0.42386620265996405, "learning_rate": 0.0001982375042650973, "loss": 3.0095455646514893, "step": 3646, "token_acc": 0.2975479631134509 }, { "epoch": 2.1377895045441218, "grad_norm": 0.38172988276294245, "learning_rate": 0.00019823569215413503, "loss": 3.098313808441162, "step": 3647, "token_acc": 0.28812614862004854 }, { "epoch": 2.138375842861331, "grad_norm": 0.3086022099696844, "learning_rate": 0.00019823387912038087, "loss": 3.024193286895752, "step": 3648, "token_acc": 0.2973437992872654 }, { "epoch": 2.13896218117854, "grad_norm": 0.4149553561911551, "learning_rate": 0.00019823206516385175, "loss": 3.102437734603882, "step": 3649, "token_acc": 0.2871082475186682 }, { "epoch": 2.139548519495749, "grad_norm": 0.35447322722730157, "learning_rate": 0.00019823025028456478, "loss": 3.039538860321045, "step": 3650, "token_acc": 0.2963027653486353 }, { "epoch": 2.140134857812958, "grad_norm": 0.3255488570801878, "learning_rate": 0.00019822843448253694, "loss": 3.057581901550293, "step": 3651, "token_acc": 0.29423873722289995 }, { "epoch": 2.1407211961301673, "grad_norm": 0.35937595188739524, "learning_rate": 0.00019822661775778535, "loss": 3.068493366241455, "step": 3652, "token_acc": 0.2912323808421418 }, { "epoch": 2.141307534447376, "grad_norm": 0.3656363996058169, "learning_rate": 0.00019822480011032702, "loss": 3.123793125152588, "step": 3653, "token_acc": 0.2831064512280795 }, { "epoch": 2.141893872764585, "grad_norm": 0.3319438930487994, "learning_rate": 0.0001982229815401791, "loss": 3.050457715988159, "step": 3654, "token_acc": 0.2943593819162207 }, { "epoch": 2.142480211081794, "grad_norm": 0.36744461951942464, "learning_rate": 0.00019822116204735858, "loss": 3.0768117904663086, "step": 3655, "token_acc": 0.28981598109126677 }, { "epoch": 2.1430665493990033, "grad_norm": 0.34098719091200086, "learning_rate": 0.0001982193416318826, "loss": 3.019047260284424, "step": 3656, "token_acc": 0.2988497291877813 }, { "epoch": 2.1436528877162124, "grad_norm": 0.3734420531063816, "learning_rate": 0.0001982175202937683, "loss": 3.0314571857452393, "step": 3657, "token_acc": 0.2969687103517598 }, { "epoch": 2.144239226033421, "grad_norm": 0.42365889381392163, "learning_rate": 0.0001982156980330327, "loss": 3.0838067531585693, "step": 3658, "token_acc": 0.28869940999896493 }, { "epoch": 2.14482556435063, "grad_norm": 0.38891779796554593, "learning_rate": 0.000198213874849693, "loss": 3.061216354370117, "step": 3659, "token_acc": 0.291639099595341 }, { "epoch": 2.1454119026678393, "grad_norm": 0.3325639042483432, "learning_rate": 0.00019821205074376625, "loss": 3.082282304763794, "step": 3660, "token_acc": 0.2883842363549314 }, { "epoch": 2.1459982409850484, "grad_norm": 0.3796427915147779, "learning_rate": 0.00019821022571526965, "loss": 3.0447168350219727, "step": 3661, "token_acc": 0.2934772797518593 }, { "epoch": 2.1465845793022575, "grad_norm": 0.42503563264359, "learning_rate": 0.0001982083997642203, "loss": 3.074496269226074, "step": 3662, "token_acc": 0.2913147753630993 }, { "epoch": 2.1471709176194667, "grad_norm": 0.4017721397348817, "learning_rate": 0.0001982065728906354, "loss": 3.087440013885498, "step": 3663, "token_acc": 0.2895735220605646 }, { "epoch": 2.1477572559366753, "grad_norm": 0.39635501228307246, "learning_rate": 0.00019820474509453208, "loss": 3.038784980773926, "step": 3664, "token_acc": 0.29654691111341086 }, { "epoch": 2.1483435942538844, "grad_norm": 0.4024113592292357, "learning_rate": 0.0001982029163759275, "loss": 3.0895156860351562, "step": 3665, "token_acc": 0.28952099237591566 }, { "epoch": 2.1489299325710935, "grad_norm": 0.44816104698756043, "learning_rate": 0.00019820108673483886, "loss": 3.0261764526367188, "step": 3666, "token_acc": 0.29754149285524095 }, { "epoch": 2.1495162708883027, "grad_norm": 0.5295161669300592, "learning_rate": 0.00019819925617128333, "loss": 3.055893659591675, "step": 3667, "token_acc": 0.2922143921059951 }, { "epoch": 2.1501026092055118, "grad_norm": 0.3745309728424183, "learning_rate": 0.0001981974246852781, "loss": 3.045581340789795, "step": 3668, "token_acc": 0.2951537127711179 }, { "epoch": 2.1506889475227204, "grad_norm": 0.46366028984048285, "learning_rate": 0.00019819559227684041, "loss": 3.039191722869873, "step": 3669, "token_acc": 0.2964182206817488 }, { "epoch": 2.1512752858399296, "grad_norm": 0.46228103744879745, "learning_rate": 0.00019819375894598745, "loss": 3.114645004272461, "step": 3670, "token_acc": 0.2831469669188854 }, { "epoch": 2.1518616241571387, "grad_norm": 0.3885683672920312, "learning_rate": 0.00019819192469273643, "loss": 3.0345194339752197, "step": 3671, "token_acc": 0.2951403904568877 }, { "epoch": 2.1524479624743478, "grad_norm": 0.391563814427664, "learning_rate": 0.0001981900895171046, "loss": 3.062636375427246, "step": 3672, "token_acc": 0.2917323983230324 }, { "epoch": 2.153034300791557, "grad_norm": 0.35677312329398886, "learning_rate": 0.0001981882534191092, "loss": 3.0529580116271973, "step": 3673, "token_acc": 0.2935443357233669 }, { "epoch": 2.1536206391087656, "grad_norm": 0.36994516324867055, "learning_rate": 0.00019818641639876745, "loss": 3.1113665103912354, "step": 3674, "token_acc": 0.2855378192782019 }, { "epoch": 2.1542069774259747, "grad_norm": 0.35141992234043656, "learning_rate": 0.00019818457845609665, "loss": 3.0312695503234863, "step": 3675, "token_acc": 0.29815066016783515 }, { "epoch": 2.154793315743184, "grad_norm": 0.3352608846684348, "learning_rate": 0.00019818273959111403, "loss": 3.055251359939575, "step": 3676, "token_acc": 0.2929994477719723 }, { "epoch": 2.155379654060393, "grad_norm": 0.3835726200052877, "learning_rate": 0.00019818089980383686, "loss": 3.064302921295166, "step": 3677, "token_acc": 0.29110212655902795 }, { "epoch": 2.155965992377602, "grad_norm": 0.4107215412375538, "learning_rate": 0.00019817905909428246, "loss": 3.0144827365875244, "step": 3678, "token_acc": 0.29772714736374833 }, { "epoch": 2.1565523306948107, "grad_norm": 0.3996105029647825, "learning_rate": 0.0001981772174624681, "loss": 3.096673011779785, "step": 3679, "token_acc": 0.28871941043396254 }, { "epoch": 2.15713866901202, "grad_norm": 0.42256106857756087, "learning_rate": 0.00019817537490841102, "loss": 3.072201728820801, "step": 3680, "token_acc": 0.2923764787289252 }, { "epoch": 2.157725007329229, "grad_norm": 0.43398330282105035, "learning_rate": 0.00019817353143212864, "loss": 3.045921802520752, "step": 3681, "token_acc": 0.294512248707754 }, { "epoch": 2.158311345646438, "grad_norm": 0.38959889749447035, "learning_rate": 0.00019817168703363823, "loss": 3.0750393867492676, "step": 3682, "token_acc": 0.2915563169793132 }, { "epoch": 2.158897683963647, "grad_norm": 0.40338758755030873, "learning_rate": 0.00019816984171295708, "loss": 3.1409642696380615, "step": 3683, "token_acc": 0.2817941674770178 }, { "epoch": 2.1594840222808562, "grad_norm": 0.4803805033609024, "learning_rate": 0.00019816799547010255, "loss": 3.0706191062927246, "step": 3684, "token_acc": 0.29218219470403345 }, { "epoch": 2.160070360598065, "grad_norm": 0.41681756888313753, "learning_rate": 0.000198166148305092, "loss": 3.067589282989502, "step": 3685, "token_acc": 0.28988539109580935 }, { "epoch": 2.160656698915274, "grad_norm": 0.4722609300380041, "learning_rate": 0.00019816430021794279, "loss": 3.0518429279327393, "step": 3686, "token_acc": 0.294074548468628 }, { "epoch": 2.161243037232483, "grad_norm": 0.512334638116427, "learning_rate": 0.0001981624512086722, "loss": 3.054753065109253, "step": 3687, "token_acc": 0.2924440385192344 }, { "epoch": 2.1618293755496922, "grad_norm": 0.43203731777618976, "learning_rate": 0.0001981606012772977, "loss": 3.097407817840576, "step": 3688, "token_acc": 0.2886756882589548 }, { "epoch": 2.1624157138669013, "grad_norm": 0.49109976001930006, "learning_rate": 0.00019815875042383663, "loss": 3.067046642303467, "step": 3689, "token_acc": 0.29054734331194076 }, { "epoch": 2.16300205218411, "grad_norm": 0.41444076177104566, "learning_rate": 0.00019815689864830635, "loss": 3.0211291313171387, "step": 3690, "token_acc": 0.29784430133670087 }, { "epoch": 2.163588390501319, "grad_norm": 0.41823770915807945, "learning_rate": 0.00019815504595072428, "loss": 3.05775785446167, "step": 3691, "token_acc": 0.2954785125329384 }, { "epoch": 2.1641747288185282, "grad_norm": 0.3505367231140321, "learning_rate": 0.00019815319233110784, "loss": 3.0847840309143066, "step": 3692, "token_acc": 0.28727815071343954 }, { "epoch": 2.1647610671357373, "grad_norm": 0.38823383222485475, "learning_rate": 0.00019815133778947438, "loss": 3.060549736022949, "step": 3693, "token_acc": 0.29188239766158525 }, { "epoch": 2.1653474054529465, "grad_norm": 0.38876453238600234, "learning_rate": 0.00019814948232584135, "loss": 3.081352710723877, "step": 3694, "token_acc": 0.28898431134161956 }, { "epoch": 2.1659337437701556, "grad_norm": 0.34525571729830784, "learning_rate": 0.00019814762594022624, "loss": 3.085944175720215, "step": 3695, "token_acc": 0.2898330377722016 }, { "epoch": 2.1665200820873642, "grad_norm": 0.3991600859950033, "learning_rate": 0.00019814576863264646, "loss": 3.0598087310791016, "step": 3696, "token_acc": 0.2908605857757875 }, { "epoch": 2.1671064204045734, "grad_norm": 0.3223574454318465, "learning_rate": 0.00019814391040311936, "loss": 3.0664358139038086, "step": 3697, "token_acc": 0.29078196534218365 }, { "epoch": 2.1676927587217825, "grad_norm": 0.40365444226727976, "learning_rate": 0.00019814205125166253, "loss": 3.0346033573150635, "step": 3698, "token_acc": 0.29554123577404773 }, { "epoch": 2.1682790970389916, "grad_norm": 0.3736109211610393, "learning_rate": 0.00019814019117829335, "loss": 3.033038377761841, "step": 3699, "token_acc": 0.2984612110468904 }, { "epoch": 2.1688654353562007, "grad_norm": 0.44397360630406835, "learning_rate": 0.00019813833018302935, "loss": 3.0904383659362793, "step": 3700, "token_acc": 0.28873367509653275 }, { "epoch": 2.1694517736734094, "grad_norm": 0.33196705625696693, "learning_rate": 0.00019813646826588794, "loss": 3.027376174926758, "step": 3701, "token_acc": 0.29806078785082013 }, { "epoch": 2.1700381119906185, "grad_norm": 0.32084385391430037, "learning_rate": 0.00019813460542688667, "loss": 3.019136905670166, "step": 3702, "token_acc": 0.297492864306102 }, { "epoch": 2.1706244503078276, "grad_norm": 0.3707520187668173, "learning_rate": 0.000198132741666043, "loss": 3.0417094230651855, "step": 3703, "token_acc": 0.2953370317331078 }, { "epoch": 2.1712107886250367, "grad_norm": 0.34314510502736817, "learning_rate": 0.0001981308769833745, "loss": 3.0925612449645996, "step": 3704, "token_acc": 0.2871026767322786 }, { "epoch": 2.171797126942246, "grad_norm": 0.3903723409006208, "learning_rate": 0.00019812901137889862, "loss": 3.080919027328491, "step": 3705, "token_acc": 0.28975915854582107 }, { "epoch": 2.172383465259455, "grad_norm": 0.38624853678336335, "learning_rate": 0.0001981271448526329, "loss": 3.0907652378082275, "step": 3706, "token_acc": 0.28893884618889937 }, { "epoch": 2.1729698035766636, "grad_norm": 0.3537372371911025, "learning_rate": 0.0001981252774045949, "loss": 3.0456995964050293, "step": 3707, "token_acc": 0.2950165346045653 }, { "epoch": 2.1735561418938727, "grad_norm": 0.4416946674237238, "learning_rate": 0.00019812340903480212, "loss": 3.0639238357543945, "step": 3708, "token_acc": 0.2912564735361303 }, { "epoch": 2.174142480211082, "grad_norm": 0.42151114482919977, "learning_rate": 0.00019812153974327215, "loss": 3.069520950317383, "step": 3709, "token_acc": 0.2913897195151553 }, { "epoch": 2.174728818528291, "grad_norm": 0.3907630218206638, "learning_rate": 0.00019811966953002256, "loss": 3.044097423553467, "step": 3710, "token_acc": 0.2941923600463518 }, { "epoch": 2.1753151568455, "grad_norm": 0.48432813726910307, "learning_rate": 0.00019811779839507088, "loss": 3.0825676918029785, "step": 3711, "token_acc": 0.28926997674851435 }, { "epoch": 2.1759014951627087, "grad_norm": 0.3411011523866471, "learning_rate": 0.00019811592633843468, "loss": 3.1120285987854004, "step": 3712, "token_acc": 0.2856225382167172 }, { "epoch": 2.176487833479918, "grad_norm": 0.3884136175374284, "learning_rate": 0.00019811405336013155, "loss": 3.0508508682250977, "step": 3713, "token_acc": 0.29531432083904263 }, { "epoch": 2.177074171797127, "grad_norm": 0.35211602105428624, "learning_rate": 0.00019811217946017916, "loss": 3.058058261871338, "step": 3714, "token_acc": 0.2938839083768557 }, { "epoch": 2.177660510114336, "grad_norm": 0.33612920765302057, "learning_rate": 0.000198110304638595, "loss": 3.0568323135375977, "step": 3715, "token_acc": 0.290863650679156 }, { "epoch": 2.178246848431545, "grad_norm": 0.360766931547209, "learning_rate": 0.00019810842889539675, "loss": 3.0815136432647705, "step": 3716, "token_acc": 0.2905791015204376 }, { "epoch": 2.1788331867487543, "grad_norm": 0.4020471899748261, "learning_rate": 0.000198106552230602, "loss": 3.06553316116333, "step": 3717, "token_acc": 0.2923001327241021 }, { "epoch": 2.179419525065963, "grad_norm": 0.42272187443519893, "learning_rate": 0.00019810467464422842, "loss": 3.0341875553131104, "step": 3718, "token_acc": 0.29733858539266506 }, { "epoch": 2.180005863383172, "grad_norm": 0.3360927921504579, "learning_rate": 0.00019810279613629358, "loss": 3.051051139831543, "step": 3719, "token_acc": 0.29595057293800087 }, { "epoch": 2.180592201700381, "grad_norm": 0.39729716729628617, "learning_rate": 0.00019810091670681518, "loss": 3.0622317790985107, "step": 3720, "token_acc": 0.2912510805890941 }, { "epoch": 2.1811785400175903, "grad_norm": 0.32908310056703416, "learning_rate": 0.0001980990363558109, "loss": 3.054521083831787, "step": 3721, "token_acc": 0.2921703505833829 }, { "epoch": 2.1817648783347994, "grad_norm": 0.386307670285044, "learning_rate": 0.0001980971550832983, "loss": 3.0914382934570312, "step": 3722, "token_acc": 0.2895025584216912 }, { "epoch": 2.182351216652008, "grad_norm": 0.313379061385587, "learning_rate": 0.00019809527288929517, "loss": 3.1284141540527344, "step": 3723, "token_acc": 0.28403632430311665 }, { "epoch": 2.182937554969217, "grad_norm": 0.4012839364386284, "learning_rate": 0.0001980933897738191, "loss": 3.079728364944458, "step": 3724, "token_acc": 0.29032503575787 }, { "epoch": 2.1835238932864263, "grad_norm": 0.3376288134715868, "learning_rate": 0.00019809150573688782, "loss": 3.0606932640075684, "step": 3725, "token_acc": 0.2940896670912368 }, { "epoch": 2.1841102316036354, "grad_norm": 0.37850435503879637, "learning_rate": 0.00019808962077851904, "loss": 3.089174509048462, "step": 3726, "token_acc": 0.28817984538179847 }, { "epoch": 2.1846965699208445, "grad_norm": 0.3417610880713508, "learning_rate": 0.00019808773489873044, "loss": 3.0899174213409424, "step": 3727, "token_acc": 0.28772069888307916 }, { "epoch": 2.185282908238053, "grad_norm": 0.3448155256888338, "learning_rate": 0.00019808584809753973, "loss": 3.073763370513916, "step": 3728, "token_acc": 0.2902577979965322 }, { "epoch": 2.1858692465552623, "grad_norm": 0.37869887681035297, "learning_rate": 0.0001980839603749647, "loss": 3.096000909805298, "step": 3729, "token_acc": 0.2867157637629594 }, { "epoch": 2.1864555848724714, "grad_norm": 0.30815461213676887, "learning_rate": 0.000198082071731023, "loss": 3.082163095474243, "step": 3730, "token_acc": 0.2895739185747386 }, { "epoch": 2.1870419231896805, "grad_norm": 0.3798261165595876, "learning_rate": 0.0001980801821657324, "loss": 3.0781822204589844, "step": 3731, "token_acc": 0.2920762308794104 }, { "epoch": 2.1876282615068896, "grad_norm": 0.38542476451329444, "learning_rate": 0.00019807829167911066, "loss": 3.0817389488220215, "step": 3732, "token_acc": 0.2891235556101292 }, { "epoch": 2.1882145998240983, "grad_norm": 0.3513885841387477, "learning_rate": 0.00019807640027117552, "loss": 3.0607097148895264, "step": 3733, "token_acc": 0.29277806342781737 }, { "epoch": 2.1888009381413074, "grad_norm": 0.41253787986230267, "learning_rate": 0.00019807450794194479, "loss": 3.0622215270996094, "step": 3734, "token_acc": 0.292786076673758 }, { "epoch": 2.1893872764585165, "grad_norm": 0.4603926510231856, "learning_rate": 0.00019807261469143616, "loss": 3.1241812705993652, "step": 3735, "token_acc": 0.28356866414875226 }, { "epoch": 2.1899736147757256, "grad_norm": 0.43023403271799454, "learning_rate": 0.0001980707205196675, "loss": 3.0950329303741455, "step": 3736, "token_acc": 0.289215398804732 }, { "epoch": 2.1905599530929347, "grad_norm": 0.39486298422908156, "learning_rate": 0.00019806882542665658, "loss": 3.0755863189697266, "step": 3737, "token_acc": 0.289848019280402 }, { "epoch": 2.191146291410144, "grad_norm": 0.4274911388865813, "learning_rate": 0.0001980669294124212, "loss": 3.053633689880371, "step": 3738, "token_acc": 0.2946253276592132 }, { "epoch": 2.1917326297273525, "grad_norm": 0.35166222960225946, "learning_rate": 0.00019806503247697915, "loss": 3.119694232940674, "step": 3739, "token_acc": 0.28351120089426696 }, { "epoch": 2.1923189680445616, "grad_norm": 0.3398753185384973, "learning_rate": 0.00019806313462034827, "loss": 3.097565174102783, "step": 3740, "token_acc": 0.28844821614132315 }, { "epoch": 2.1929053063617707, "grad_norm": 0.35258555656756, "learning_rate": 0.00019806123584254637, "loss": 3.084041118621826, "step": 3741, "token_acc": 0.2908209720884709 }, { "epoch": 2.19349164467898, "grad_norm": 0.42318864486046637, "learning_rate": 0.0001980593361435913, "loss": 3.0764129161834717, "step": 3742, "token_acc": 0.2896307161708884 }, { "epoch": 2.194077982996189, "grad_norm": 0.3976186552131438, "learning_rate": 0.0001980574355235009, "loss": 3.0898749828338623, "step": 3743, "token_acc": 0.28834941437490325 }, { "epoch": 2.1946643213133976, "grad_norm": 0.3839498437849161, "learning_rate": 0.00019805553398229308, "loss": 3.068784236907959, "step": 3744, "token_acc": 0.29113553374720536 }, { "epoch": 2.1952506596306067, "grad_norm": 0.43724637168886465, "learning_rate": 0.0001980536315199856, "loss": 3.0914196968078613, "step": 3745, "token_acc": 0.28861965310653986 }, { "epoch": 2.195836997947816, "grad_norm": 0.43664382706208227, "learning_rate": 0.00019805172813659638, "loss": 3.111490249633789, "step": 3746, "token_acc": 0.2863194515501893 }, { "epoch": 2.196423336265025, "grad_norm": 0.31336303931539444, "learning_rate": 0.0001980498238321433, "loss": 3.097074508666992, "step": 3747, "token_acc": 0.2879774102795686 }, { "epoch": 2.197009674582234, "grad_norm": 0.43244391044620856, "learning_rate": 0.00019804791860664428, "loss": 3.108273983001709, "step": 3748, "token_acc": 0.2849440632497388 }, { "epoch": 2.197596012899443, "grad_norm": 0.38702353495385483, "learning_rate": 0.00019804601246011715, "loss": 3.087984085083008, "step": 3749, "token_acc": 0.28965369701831795 }, { "epoch": 2.198182351216652, "grad_norm": 0.4649827179467996, "learning_rate": 0.00019804410539257984, "loss": 3.1016461849212646, "step": 3750, "token_acc": 0.2874370685697149 }, { "epoch": 2.198768689533861, "grad_norm": 0.4312944788086562, "learning_rate": 0.00019804219740405033, "loss": 3.0383925437927246, "step": 3751, "token_acc": 0.29606947056280386 }, { "epoch": 2.19935502785107, "grad_norm": 0.3723340407649122, "learning_rate": 0.00019804028849454644, "loss": 3.0107102394104004, "step": 3752, "token_acc": 0.29872457718259815 }, { "epoch": 2.199941366168279, "grad_norm": 0.3469114638423638, "learning_rate": 0.0001980383786640862, "loss": 3.0981040000915527, "step": 3753, "token_acc": 0.2874047811733272 }, { "epoch": 2.2005277044854883, "grad_norm": 0.4506402245473934, "learning_rate": 0.00019803646791268745, "loss": 3.0969290733337402, "step": 3754, "token_acc": 0.28787476987255656 }, { "epoch": 2.201114042802697, "grad_norm": 0.46638260040785406, "learning_rate": 0.00019803455624036823, "loss": 3.027515411376953, "step": 3755, "token_acc": 0.29801522892406235 }, { "epoch": 2.201700381119906, "grad_norm": 0.33156631027238403, "learning_rate": 0.0001980326436471464, "loss": 3.046071767807007, "step": 3756, "token_acc": 0.29211779875956434 }, { "epoch": 2.202286719437115, "grad_norm": 0.45936327928293064, "learning_rate": 0.00019803073013304005, "loss": 3.0666890144348145, "step": 3757, "token_acc": 0.29142374356481754 }, { "epoch": 2.2028730577543243, "grad_norm": 0.4330485638394041, "learning_rate": 0.00019802881569806706, "loss": 3.0573019981384277, "step": 3758, "token_acc": 0.2932921376132419 }, { "epoch": 2.2034593960715334, "grad_norm": 0.32823989054508146, "learning_rate": 0.00019802690034224544, "loss": 3.039001941680908, "step": 3759, "token_acc": 0.2973704437443842 }, { "epoch": 2.2040457343887425, "grad_norm": 0.3985716694670369, "learning_rate": 0.00019802498406559319, "loss": 3.0624570846557617, "step": 3760, "token_acc": 0.2942314959779733 }, { "epoch": 2.204632072705951, "grad_norm": 0.3475653878603628, "learning_rate": 0.0001980230668681283, "loss": 3.035620927810669, "step": 3761, "token_acc": 0.29664889769896335 }, { "epoch": 2.2052184110231603, "grad_norm": 0.303161854563871, "learning_rate": 0.00019802114874986878, "loss": 3.047999382019043, "step": 3762, "token_acc": 0.2938517105855886 }, { "epoch": 2.2058047493403694, "grad_norm": 0.3183014408933996, "learning_rate": 0.00019801922971083267, "loss": 3.0662879943847656, "step": 3763, "token_acc": 0.2949339884929763 }, { "epoch": 2.2063910876575785, "grad_norm": 0.30922080298438437, "learning_rate": 0.00019801730975103798, "loss": 3.089285373687744, "step": 3764, "token_acc": 0.2897957524435264 }, { "epoch": 2.2069774259747876, "grad_norm": 0.3647627126866834, "learning_rate": 0.00019801538887050276, "loss": 3.0610663890838623, "step": 3765, "token_acc": 0.29263268093089784 }, { "epoch": 2.2075637642919963, "grad_norm": 0.3548869738237354, "learning_rate": 0.00019801346706924496, "loss": 3.097532272338867, "step": 3766, "token_acc": 0.2893190312194055 }, { "epoch": 2.2081501026092054, "grad_norm": 0.386845850131625, "learning_rate": 0.0001980115443472828, "loss": 3.076988458633423, "step": 3767, "token_acc": 0.2919435906513681 }, { "epoch": 2.2087364409264145, "grad_norm": 0.39030053704045653, "learning_rate": 0.0001980096207046342, "loss": 3.042177677154541, "step": 3768, "token_acc": 0.29525644968029946 }, { "epoch": 2.2093227792436236, "grad_norm": 0.3761438614966452, "learning_rate": 0.00019800769614131732, "loss": 3.0546975135803223, "step": 3769, "token_acc": 0.295493929725487 }, { "epoch": 2.2099091175608327, "grad_norm": 0.3405743866745887, "learning_rate": 0.00019800577065735018, "loss": 3.1010794639587402, "step": 3770, "token_acc": 0.2866519756918455 }, { "epoch": 2.210495455878042, "grad_norm": 0.41410058291171886, "learning_rate": 0.0001980038442527509, "loss": 3.045254707336426, "step": 3771, "token_acc": 0.2944036184564366 }, { "epoch": 2.2110817941952505, "grad_norm": 0.4401809710796626, "learning_rate": 0.00019800191692753756, "loss": 3.090334892272949, "step": 3772, "token_acc": 0.2887820109418976 }, { "epoch": 2.2116681325124596, "grad_norm": 0.38983100982597424, "learning_rate": 0.00019799998868172826, "loss": 3.06935453414917, "step": 3773, "token_acc": 0.2912610884364482 }, { "epoch": 2.2122544708296688, "grad_norm": 0.29262096958798306, "learning_rate": 0.00019799805951534113, "loss": 3.0704498291015625, "step": 3774, "token_acc": 0.29252268168594997 }, { "epoch": 2.212840809146878, "grad_norm": 0.3892065196542264, "learning_rate": 0.00019799612942839428, "loss": 3.072063446044922, "step": 3775, "token_acc": 0.28996862356321096 }, { "epoch": 2.213427147464087, "grad_norm": 0.36885457081080436, "learning_rate": 0.00019799419842090585, "loss": 3.0799636840820312, "step": 3776, "token_acc": 0.29031433682596475 }, { "epoch": 2.2140134857812956, "grad_norm": 0.39705518386303346, "learning_rate": 0.00019799226649289397, "loss": 3.0894603729248047, "step": 3777, "token_acc": 0.287961105148163 }, { "epoch": 2.2145998240985048, "grad_norm": 0.38423808063994297, "learning_rate": 0.00019799033364437677, "loss": 3.0624430179595947, "step": 3778, "token_acc": 0.29273675914968667 }, { "epoch": 2.215186162415714, "grad_norm": 0.3979961271454017, "learning_rate": 0.00019798839987537247, "loss": 3.086780548095703, "step": 3779, "token_acc": 0.29031007957700494 }, { "epoch": 2.215772500732923, "grad_norm": 0.3749297890915773, "learning_rate": 0.00019798646518589917, "loss": 3.1032357215881348, "step": 3780, "token_acc": 0.28681809337525216 }, { "epoch": 2.216358839050132, "grad_norm": 0.33741139449465946, "learning_rate": 0.00019798452957597512, "loss": 3.097425937652588, "step": 3781, "token_acc": 0.2862956873656254 }, { "epoch": 2.2169451773673408, "grad_norm": 0.32814606864043283, "learning_rate": 0.0001979825930456184, "loss": 3.0357799530029297, "step": 3782, "token_acc": 0.29361347340336835 }, { "epoch": 2.21753151568455, "grad_norm": 0.35110870899416013, "learning_rate": 0.00019798065559484726, "loss": 3.0791003704071045, "step": 3783, "token_acc": 0.2904191383988618 }, { "epoch": 2.218117854001759, "grad_norm": 0.33338815320793347, "learning_rate": 0.00019797871722367986, "loss": 3.023411273956299, "step": 3784, "token_acc": 0.29736474903320975 }, { "epoch": 2.218704192318968, "grad_norm": 0.32189336014873654, "learning_rate": 0.0001979767779321345, "loss": 3.054868221282959, "step": 3785, "token_acc": 0.29375726530664387 }, { "epoch": 2.219290530636177, "grad_norm": 0.3950931748868594, "learning_rate": 0.0001979748377202293, "loss": 3.0988640785217285, "step": 3786, "token_acc": 0.2864457880236215 }, { "epoch": 2.219876868953386, "grad_norm": 0.35024807643860684, "learning_rate": 0.00019797289658798252, "loss": 3.0997066497802734, "step": 3787, "token_acc": 0.28544134307038627 }, { "epoch": 2.220463207270595, "grad_norm": 0.3157189785983546, "learning_rate": 0.00019797095453541244, "loss": 3.0601439476013184, "step": 3788, "token_acc": 0.29199822072851284 }, { "epoch": 2.221049545587804, "grad_norm": 0.3721884559213196, "learning_rate": 0.0001979690115625372, "loss": 3.0136845111846924, "step": 3789, "token_acc": 0.2987582272352564 }, { "epoch": 2.221635883905013, "grad_norm": 0.3563329142879425, "learning_rate": 0.00019796706766937513, "loss": 3.0651297569274902, "step": 3790, "token_acc": 0.29267789972690267 }, { "epoch": 2.2222222222222223, "grad_norm": 0.32371765088238985, "learning_rate": 0.00019796512285594447, "loss": 3.068263292312622, "step": 3791, "token_acc": 0.29246109402665177 }, { "epoch": 2.2228085605394314, "grad_norm": 0.3576982355119628, "learning_rate": 0.0001979631771222635, "loss": 3.0744235515594482, "step": 3792, "token_acc": 0.2912931966168587 }, { "epoch": 2.22339489885664, "grad_norm": 0.3509447133289146, "learning_rate": 0.0001979612304683505, "loss": 3.0494871139526367, "step": 3793, "token_acc": 0.29288084913996837 }, { "epoch": 2.223981237173849, "grad_norm": 0.3245086267603309, "learning_rate": 0.00019795928289422375, "loss": 3.0446319580078125, "step": 3794, "token_acc": 0.2952821348974849 }, { "epoch": 2.2245675754910583, "grad_norm": 0.2887129474105757, "learning_rate": 0.00019795733439990153, "loss": 3.083066940307617, "step": 3795, "token_acc": 0.29071148546364944 }, { "epoch": 2.2251539138082674, "grad_norm": 0.3746965333089583, "learning_rate": 0.0001979553849854021, "loss": 3.096168279647827, "step": 3796, "token_acc": 0.2875678476585271 }, { "epoch": 2.2257402521254765, "grad_norm": 0.4970990265567727, "learning_rate": 0.0001979534346507439, "loss": 3.102025032043457, "step": 3797, "token_acc": 0.28624045484727756 }, { "epoch": 2.226326590442685, "grad_norm": 0.528823959950421, "learning_rate": 0.00019795148339594513, "loss": 3.1168415546417236, "step": 3798, "token_acc": 0.2835878135157938 }, { "epoch": 2.2269129287598943, "grad_norm": 0.37567138498554514, "learning_rate": 0.00019794953122102417, "loss": 3.0929999351501465, "step": 3799, "token_acc": 0.28861375482612334 }, { "epoch": 2.2274992670771034, "grad_norm": 0.4782532200071338, "learning_rate": 0.00019794757812599938, "loss": 3.065145492553711, "step": 3800, "token_acc": 0.2919840760241428 }, { "epoch": 2.2280856053943126, "grad_norm": 0.3529207924501019, "learning_rate": 0.00019794562411088907, "loss": 3.035465955734253, "step": 3801, "token_acc": 0.29645383812850684 }, { "epoch": 2.2286719437115217, "grad_norm": 0.4843789820636367, "learning_rate": 0.0001979436691757116, "loss": 3.0939836502075195, "step": 3802, "token_acc": 0.28905246300916304 }, { "epoch": 2.2292582820287308, "grad_norm": 0.32421219430694725, "learning_rate": 0.00019794171332048532, "loss": 3.0550537109375, "step": 3803, "token_acc": 0.2923750065304843 }, { "epoch": 2.2298446203459394, "grad_norm": 0.41660244963102266, "learning_rate": 0.00019793975654522865, "loss": 3.0699100494384766, "step": 3804, "token_acc": 0.291048301435472 }, { "epoch": 2.2304309586631486, "grad_norm": 0.39986855016388834, "learning_rate": 0.00019793779884995992, "loss": 3.0800912380218506, "step": 3805, "token_acc": 0.2904607195164564 }, { "epoch": 2.2310172969803577, "grad_norm": 0.3917467610845519, "learning_rate": 0.00019793584023469754, "loss": 3.0499680042266846, "step": 3806, "token_acc": 0.2944743265778527 }, { "epoch": 2.231603635297567, "grad_norm": 0.39406904650667385, "learning_rate": 0.00019793388069945994, "loss": 3.1288533210754395, "step": 3807, "token_acc": 0.2828698410015765 }, { "epoch": 2.232189973614776, "grad_norm": 0.3974274998257378, "learning_rate": 0.00019793192024426546, "loss": 3.0647244453430176, "step": 3808, "token_acc": 0.29065758570858685 }, { "epoch": 2.2327763119319846, "grad_norm": 0.35364735209597536, "learning_rate": 0.00019792995886913257, "loss": 3.072148323059082, "step": 3809, "token_acc": 0.29011482542273326 }, { "epoch": 2.2333626502491937, "grad_norm": 0.3485360375514206, "learning_rate": 0.0001979279965740797, "loss": 3.080418348312378, "step": 3810, "token_acc": 0.2898313014861661 }, { "epoch": 2.233948988566403, "grad_norm": 0.4027758447025723, "learning_rate": 0.00019792603335912524, "loss": 3.0551085472106934, "step": 3811, "token_acc": 0.2929281457204357 }, { "epoch": 2.234535326883612, "grad_norm": 0.37345352377432484, "learning_rate": 0.0001979240692242877, "loss": 3.04703426361084, "step": 3812, "token_acc": 0.2945132982314027 }, { "epoch": 2.235121665200821, "grad_norm": 0.38550662001234454, "learning_rate": 0.0001979221041695854, "loss": 3.114452838897705, "step": 3813, "token_acc": 0.28602288409647986 }, { "epoch": 2.23570800351803, "grad_norm": 0.3949782844491148, "learning_rate": 0.00019792013819503693, "loss": 3.0616583824157715, "step": 3814, "token_acc": 0.2910285624849123 }, { "epoch": 2.236294341835239, "grad_norm": 0.3184934679251132, "learning_rate": 0.00019791817130066072, "loss": 3.1092820167541504, "step": 3815, "token_acc": 0.28525305162989606 }, { "epoch": 2.236880680152448, "grad_norm": 0.39104097290584805, "learning_rate": 0.00019791620348647522, "loss": 3.0384013652801514, "step": 3816, "token_acc": 0.2947135282440177 }, { "epoch": 2.237467018469657, "grad_norm": 0.3085213492631322, "learning_rate": 0.00019791423475249892, "loss": 3.0341625213623047, "step": 3817, "token_acc": 0.29787547682162024 }, { "epoch": 2.238053356786866, "grad_norm": 0.32723313844278035, "learning_rate": 0.00019791226509875034, "loss": 3.032844066619873, "step": 3818, "token_acc": 0.2974975041939833 }, { "epoch": 2.2386396951040752, "grad_norm": 0.34137692023776933, "learning_rate": 0.00019791029452524797, "loss": 3.0497708320617676, "step": 3819, "token_acc": 0.294521695865085 }, { "epoch": 2.239226033421284, "grad_norm": 0.3248472053422265, "learning_rate": 0.00019790832303201032, "loss": 3.042205810546875, "step": 3820, "token_acc": 0.29325077867325955 }, { "epoch": 2.239812371738493, "grad_norm": 0.36807091289750077, "learning_rate": 0.00019790635061905592, "loss": 3.0656116008758545, "step": 3821, "token_acc": 0.29280753305729934 }, { "epoch": 2.240398710055702, "grad_norm": 0.3697817276162584, "learning_rate": 0.00019790437728640329, "loss": 3.081299304962158, "step": 3822, "token_acc": 0.2899944442974682 }, { "epoch": 2.2409850483729112, "grad_norm": 0.34097419942137264, "learning_rate": 0.00019790240303407092, "loss": 3.0831947326660156, "step": 3823, "token_acc": 0.2899033144339468 }, { "epoch": 2.2415713866901203, "grad_norm": 0.35510218559109447, "learning_rate": 0.00019790042786207743, "loss": 3.051969528198242, "step": 3824, "token_acc": 0.29546849203924275 }, { "epoch": 2.2421577250073295, "grad_norm": 0.3226829422229039, "learning_rate": 0.00019789845177044132, "loss": 3.0572009086608887, "step": 3825, "token_acc": 0.29258560005020684 }, { "epoch": 2.242744063324538, "grad_norm": 0.3602060258290102, "learning_rate": 0.00019789647475918122, "loss": 3.0947394371032715, "step": 3826, "token_acc": 0.28806298483269654 }, { "epoch": 2.2433304016417472, "grad_norm": 0.3786246733709363, "learning_rate": 0.00019789449682831563, "loss": 3.0828568935394287, "step": 3827, "token_acc": 0.2879579940784604 }, { "epoch": 2.2439167399589564, "grad_norm": 0.5105043078199382, "learning_rate": 0.00019789251797786315, "loss": 3.097623348236084, "step": 3828, "token_acc": 0.2876481104228163 }, { "epoch": 2.2445030782761655, "grad_norm": 0.5331372829565559, "learning_rate": 0.00019789053820784238, "loss": 3.027785301208496, "step": 3829, "token_acc": 0.2957140666530362 }, { "epoch": 2.2450894165933746, "grad_norm": 0.34342607185948726, "learning_rate": 0.0001978885575182719, "loss": 3.069899082183838, "step": 3830, "token_acc": 0.29045358287696915 }, { "epoch": 2.2456757549105832, "grad_norm": 0.42637796526843513, "learning_rate": 0.00019788657590917038, "loss": 3.0789177417755127, "step": 3831, "token_acc": 0.28990223013522015 }, { "epoch": 2.2462620932277924, "grad_norm": 0.36022269529952483, "learning_rate": 0.0001978845933805563, "loss": 3.0939173698425293, "step": 3832, "token_acc": 0.28790971366114665 }, { "epoch": 2.2468484315450015, "grad_norm": 0.39269173541876246, "learning_rate": 0.00019788260993244843, "loss": 3.062408924102783, "step": 3833, "token_acc": 0.29059209989390355 }, { "epoch": 2.2474347698622106, "grad_norm": 0.31103159161968225, "learning_rate": 0.0001978806255648653, "loss": 3.0612998008728027, "step": 3834, "token_acc": 0.2915513857885721 }, { "epoch": 2.2480211081794197, "grad_norm": 0.3808013998924554, "learning_rate": 0.00019787864027782562, "loss": 3.0871999263763428, "step": 3835, "token_acc": 0.28910718601466207 }, { "epoch": 2.2486074464966284, "grad_norm": 0.31738546832779685, "learning_rate": 0.00019787665407134801, "loss": 3.0428075790405273, "step": 3836, "token_acc": 0.2943471007057312 }, { "epoch": 2.2491937848138375, "grad_norm": 0.3885261989418085, "learning_rate": 0.0001978746669454511, "loss": 3.0900988578796387, "step": 3837, "token_acc": 0.2885441958981247 }, { "epoch": 2.2497801231310466, "grad_norm": 0.45066357840273746, "learning_rate": 0.00019787267890015363, "loss": 3.0723986625671387, "step": 3838, "token_acc": 0.2923333271717575 }, { "epoch": 2.2503664614482557, "grad_norm": 0.4983521815503289, "learning_rate": 0.00019787068993547418, "loss": 3.0541491508483887, "step": 3839, "token_acc": 0.2945795259324838 }, { "epoch": 2.250952799765465, "grad_norm": 0.44011693361142934, "learning_rate": 0.00019786870005143148, "loss": 3.0687098503112793, "step": 3840, "token_acc": 0.29009782356882896 }, { "epoch": 2.2515391380826735, "grad_norm": 0.3332672507034587, "learning_rate": 0.0001978667092480442, "loss": 3.050999164581299, "step": 3841, "token_acc": 0.2927275158259582 }, { "epoch": 2.2521254763998826, "grad_norm": 0.33316882156129396, "learning_rate": 0.0001978647175253311, "loss": 3.0840840339660645, "step": 3842, "token_acc": 0.29060568826103866 }, { "epoch": 2.2527118147170917, "grad_norm": 0.3803384958834979, "learning_rate": 0.00019786272488331088, "loss": 3.082376480102539, "step": 3843, "token_acc": 0.289594089957894 }, { "epoch": 2.253298153034301, "grad_norm": 0.4251140913978163, "learning_rate": 0.0001978607313220022, "loss": 3.053457260131836, "step": 3844, "token_acc": 0.29341770321834454 }, { "epoch": 2.25388449135151, "grad_norm": 0.35962353817743903, "learning_rate": 0.00019785873684142382, "loss": 3.0669326782226562, "step": 3845, "token_acc": 0.2906754631383887 }, { "epoch": 2.254470829668719, "grad_norm": 0.4387624314495781, "learning_rate": 0.00019785674144159448, "loss": 3.0842366218566895, "step": 3846, "token_acc": 0.2890290709889671 }, { "epoch": 2.2550571679859277, "grad_norm": 0.35356744404633866, "learning_rate": 0.0001978547451225329, "loss": 3.0830812454223633, "step": 3847, "token_acc": 0.29014837767779994 }, { "epoch": 2.255643506303137, "grad_norm": 0.3477602693635744, "learning_rate": 0.00019785274788425788, "loss": 3.0593464374542236, "step": 3848, "token_acc": 0.293080381679594 }, { "epoch": 2.256229844620346, "grad_norm": 0.35894957550749595, "learning_rate": 0.00019785074972678812, "loss": 3.036929130554199, "step": 3849, "token_acc": 0.29453205483794276 }, { "epoch": 2.256816182937555, "grad_norm": 0.42093371558242826, "learning_rate": 0.00019784875065014242, "loss": 3.0954580307006836, "step": 3850, "token_acc": 0.2863352363979236 }, { "epoch": 2.257402521254764, "grad_norm": 0.3809209028622022, "learning_rate": 0.0001978467506543396, "loss": 3.0760891437530518, "step": 3851, "token_acc": 0.29075386597938147 }, { "epoch": 2.257988859571973, "grad_norm": 0.43697845784981676, "learning_rate": 0.00019784474973939838, "loss": 3.0397307872772217, "step": 3852, "token_acc": 0.29563900441351215 }, { "epoch": 2.258575197889182, "grad_norm": 0.4293624524475749, "learning_rate": 0.0001978427479053376, "loss": 3.085088014602661, "step": 3853, "token_acc": 0.28990424970086004 }, { "epoch": 2.259161536206391, "grad_norm": 0.46268425793005175, "learning_rate": 0.00019784074515217604, "loss": 3.090554714202881, "step": 3854, "token_acc": 0.29035512138076713 }, { "epoch": 2.2597478745236, "grad_norm": 0.40647065091778983, "learning_rate": 0.0001978387414799325, "loss": 3.062056541442871, "step": 3855, "token_acc": 0.2921633820510225 }, { "epoch": 2.2603342128408093, "grad_norm": 0.37803857639264177, "learning_rate": 0.00019783673688862586, "loss": 3.0450124740600586, "step": 3856, "token_acc": 0.29378786168491555 }, { "epoch": 2.2609205511580184, "grad_norm": 0.39589980240767714, "learning_rate": 0.0001978347313782749, "loss": 3.0835564136505127, "step": 3857, "token_acc": 0.28886139126472965 }, { "epoch": 2.261506889475227, "grad_norm": 0.3950371376683415, "learning_rate": 0.00019783272494889844, "loss": 3.087594509124756, "step": 3858, "token_acc": 0.2871967087976178 }, { "epoch": 2.262093227792436, "grad_norm": 0.35490649937903, "learning_rate": 0.00019783071760051538, "loss": 3.073307752609253, "step": 3859, "token_acc": 0.2903033875766009 }, { "epoch": 2.2626795661096453, "grad_norm": 0.4002758399775946, "learning_rate": 0.00019782870933314457, "loss": 3.059218168258667, "step": 3860, "token_acc": 0.29250263416546646 }, { "epoch": 2.2632659044268544, "grad_norm": 0.3943013465477426, "learning_rate": 0.00019782670014680486, "loss": 3.0819945335388184, "step": 3861, "token_acc": 0.2900453661697991 }, { "epoch": 2.2638522427440635, "grad_norm": 0.3869606624731942, "learning_rate": 0.00019782469004151515, "loss": 3.0478851795196533, "step": 3862, "token_acc": 0.29412500814703774 }, { "epoch": 2.264438581061272, "grad_norm": 0.380158075377517, "learning_rate": 0.00019782267901729425, "loss": 3.011404037475586, "step": 3863, "token_acc": 0.29967902034504634 }, { "epoch": 2.2650249193784813, "grad_norm": 0.3743676319930695, "learning_rate": 0.00019782066707416113, "loss": 3.0840048789978027, "step": 3864, "token_acc": 0.29036241134196356 }, { "epoch": 2.2656112576956904, "grad_norm": 0.42456445124439374, "learning_rate": 0.0001978186542121346, "loss": 3.1118805408477783, "step": 3865, "token_acc": 0.2845688650290346 }, { "epoch": 2.2661975960128995, "grad_norm": 0.4683345251303445, "learning_rate": 0.0001978166404312337, "loss": 3.05707049369812, "step": 3866, "token_acc": 0.2921084164899667 }, { "epoch": 2.2667839343301086, "grad_norm": 0.4168564471174331, "learning_rate": 0.00019781462573147725, "loss": 3.061349630355835, "step": 3867, "token_acc": 0.2907291959728967 }, { "epoch": 2.2673702726473177, "grad_norm": 0.3897562301307626, "learning_rate": 0.0001978126101128842, "loss": 3.031933307647705, "step": 3868, "token_acc": 0.2967982359657668 }, { "epoch": 2.2679566109645264, "grad_norm": 0.4335121229192419, "learning_rate": 0.00019781059357547345, "loss": 3.0813913345336914, "step": 3869, "token_acc": 0.289440160386766 }, { "epoch": 2.2685429492817355, "grad_norm": 0.3989462236388706, "learning_rate": 0.000197808576119264, "loss": 3.056244373321533, "step": 3870, "token_acc": 0.29418721433647393 }, { "epoch": 2.2691292875989446, "grad_norm": 0.41244545306837865, "learning_rate": 0.00019780655774427478, "loss": 3.1044414043426514, "step": 3871, "token_acc": 0.2865758296937521 }, { "epoch": 2.2697156259161537, "grad_norm": 0.40007065004324244, "learning_rate": 0.00019780453845052475, "loss": 3.0269341468811035, "step": 3872, "token_acc": 0.2985389844446699 }, { "epoch": 2.270301964233363, "grad_norm": 0.3455341494471005, "learning_rate": 0.00019780251823803285, "loss": 3.0718140602111816, "step": 3873, "token_acc": 0.2918600097632676 }, { "epoch": 2.2708883025505715, "grad_norm": 0.35191370222477264, "learning_rate": 0.0001978004971068181, "loss": 3.105452537536621, "step": 3874, "token_acc": 0.2871991577929339 }, { "epoch": 2.2714746408677806, "grad_norm": 0.369239621976737, "learning_rate": 0.00019779847505689948, "loss": 3.074030876159668, "step": 3875, "token_acc": 0.2907204365160919 }, { "epoch": 2.2720609791849897, "grad_norm": 0.34297005557027793, "learning_rate": 0.00019779645208829595, "loss": 3.0372865200042725, "step": 3876, "token_acc": 0.29658090572627727 }, { "epoch": 2.272647317502199, "grad_norm": 0.34038265953210595, "learning_rate": 0.00019779442820102653, "loss": 3.0855207443237305, "step": 3877, "token_acc": 0.2888565526380751 }, { "epoch": 2.273233655819408, "grad_norm": 0.3572219542224862, "learning_rate": 0.00019779240339511025, "loss": 3.1044750213623047, "step": 3878, "token_acc": 0.2871144883014916 }, { "epoch": 2.273819994136617, "grad_norm": 0.37208387117372316, "learning_rate": 0.00019779037767056611, "loss": 3.040769577026367, "step": 3879, "token_acc": 0.29628549692379874 }, { "epoch": 2.2744063324538257, "grad_norm": 0.38764424822561205, "learning_rate": 0.00019778835102741315, "loss": 3.0973968505859375, "step": 3880, "token_acc": 0.2881991620645655 }, { "epoch": 2.274992670771035, "grad_norm": 0.34203350960186524, "learning_rate": 0.0001977863234656704, "loss": 3.0668859481811523, "step": 3881, "token_acc": 0.2923337234746323 }, { "epoch": 2.275579009088244, "grad_norm": 0.3339718170418426, "learning_rate": 0.00019778429498535692, "loss": 3.0442793369293213, "step": 3882, "token_acc": 0.29317082669782357 }, { "epoch": 2.276165347405453, "grad_norm": 0.3772828287091329, "learning_rate": 0.00019778226558649176, "loss": 3.0768418312072754, "step": 3883, "token_acc": 0.2891283146293293 }, { "epoch": 2.2767516857226617, "grad_norm": 0.3514258317399042, "learning_rate": 0.00019778023526909397, "loss": 3.0617470741271973, "step": 3884, "token_acc": 0.29051059565547876 }, { "epoch": 2.277338024039871, "grad_norm": 0.3730563696832072, "learning_rate": 0.00019777820403318262, "loss": 3.075042247772217, "step": 3885, "token_acc": 0.2908457325591896 }, { "epoch": 2.27792436235708, "grad_norm": 0.32662730412792623, "learning_rate": 0.00019777617187877682, "loss": 3.1011276245117188, "step": 3886, "token_acc": 0.28689720938240865 }, { "epoch": 2.278510700674289, "grad_norm": 0.31635983598614803, "learning_rate": 0.0001977741388058956, "loss": 3.071061611175537, "step": 3887, "token_acc": 0.290070867354799 }, { "epoch": 2.279097038991498, "grad_norm": 0.33721267383444226, "learning_rate": 0.00019777210481455813, "loss": 3.1046292781829834, "step": 3888, "token_acc": 0.2880738953131714 }, { "epoch": 2.2796833773087073, "grad_norm": 0.3196585850021692, "learning_rate": 0.00019777006990478349, "loss": 3.103748321533203, "step": 3889, "token_acc": 0.28723306661230313 }, { "epoch": 2.280269715625916, "grad_norm": 0.33120359335203414, "learning_rate": 0.0001977680340765908, "loss": 3.0703234672546387, "step": 3890, "token_acc": 0.2924486015628797 }, { "epoch": 2.280856053943125, "grad_norm": 0.301223330458454, "learning_rate": 0.00019776599732999913, "loss": 3.012737274169922, "step": 3891, "token_acc": 0.2993307568568336 }, { "epoch": 2.281442392260334, "grad_norm": 0.34457857747464643, "learning_rate": 0.00019776395966502767, "loss": 3.124857187271118, "step": 3892, "token_acc": 0.28540306757977685 }, { "epoch": 2.2820287305775433, "grad_norm": 0.33680874522147286, "learning_rate": 0.00019776192108169558, "loss": 3.0902318954467773, "step": 3893, "token_acc": 0.28752942925168085 }, { "epoch": 2.2826150688947524, "grad_norm": 0.29992755828963785, "learning_rate": 0.00019775988158002194, "loss": 3.072676181793213, "step": 3894, "token_acc": 0.29093481667764826 }, { "epoch": 2.283201407211961, "grad_norm": 0.32217562503923125, "learning_rate": 0.00019775784116002595, "loss": 3.06217622756958, "step": 3895, "token_acc": 0.2922255724879798 }, { "epoch": 2.28378774552917, "grad_norm": 0.36714026462731875, "learning_rate": 0.0001977557998217268, "loss": 3.069631576538086, "step": 3896, "token_acc": 0.2911714425268642 }, { "epoch": 2.2843740838463793, "grad_norm": 0.35990979575254617, "learning_rate": 0.00019775375756514362, "loss": 3.0887770652770996, "step": 3897, "token_acc": 0.289622237826688 }, { "epoch": 2.2849604221635884, "grad_norm": 0.3330263531585479, "learning_rate": 0.00019775171439029562, "loss": 3.0638556480407715, "step": 3898, "token_acc": 0.29386143123710834 }, { "epoch": 2.2855467604807975, "grad_norm": 0.2963750565116905, "learning_rate": 0.00019774967029720196, "loss": 3.1000118255615234, "step": 3899, "token_acc": 0.28890363648676104 }, { "epoch": 2.2861330987980066, "grad_norm": 0.3354805972963505, "learning_rate": 0.0001977476252858819, "loss": 3.084341049194336, "step": 3900, "token_acc": 0.2889458094846578 }, { "epoch": 2.2867194371152153, "grad_norm": 0.38336428578348897, "learning_rate": 0.0001977455793563546, "loss": 3.0917348861694336, "step": 3901, "token_acc": 0.2873119958922573 }, { "epoch": 2.2873057754324244, "grad_norm": 0.3492427682653205, "learning_rate": 0.00019774353250863932, "loss": 3.0789947509765625, "step": 3902, "token_acc": 0.2897766186518221 }, { "epoch": 2.2878921137496335, "grad_norm": 0.3280790319423024, "learning_rate": 0.00019774148474275524, "loss": 3.060258388519287, "step": 3903, "token_acc": 0.2928114930182599 }, { "epoch": 2.2884784520668426, "grad_norm": 0.37595969062209994, "learning_rate": 0.00019773943605872163, "loss": 3.0321550369262695, "step": 3904, "token_acc": 0.2983767905351547 }, { "epoch": 2.2890647903840518, "grad_norm": 0.41823000450298575, "learning_rate": 0.0001977373864565577, "loss": 3.1204299926757812, "step": 3905, "token_acc": 0.28475537227325365 }, { "epoch": 2.2896511287012604, "grad_norm": 0.39220491819646247, "learning_rate": 0.00019773533593628274, "loss": 3.037033796310425, "step": 3906, "token_acc": 0.29528027227621273 }, { "epoch": 2.2902374670184695, "grad_norm": 0.3761338896329433, "learning_rate": 0.00019773328449791601, "loss": 3.0608339309692383, "step": 3907, "token_acc": 0.29134067613297576 }, { "epoch": 2.2908238053356786, "grad_norm": 0.4725410650045336, "learning_rate": 0.00019773123214147679, "loss": 3.0568594932556152, "step": 3908, "token_acc": 0.2913151074154603 }, { "epoch": 2.2914101436528878, "grad_norm": 0.3158851759938663, "learning_rate": 0.0001977291788669843, "loss": 3.0284476280212402, "step": 3909, "token_acc": 0.29672530733361596 }, { "epoch": 2.291996481970097, "grad_norm": 0.41928382392857605, "learning_rate": 0.00019772712467445788, "loss": 3.0885353088378906, "step": 3910, "token_acc": 0.2878898543642635 }, { "epoch": 2.292582820287306, "grad_norm": 0.39580765785621064, "learning_rate": 0.0001977250695639168, "loss": 3.077415943145752, "step": 3911, "token_acc": 0.28921388469970977 }, { "epoch": 2.2931691586045146, "grad_norm": 0.4880177730409939, "learning_rate": 0.00019772301353538038, "loss": 3.0579886436462402, "step": 3912, "token_acc": 0.2918747524764806 }, { "epoch": 2.2937554969217238, "grad_norm": 0.3548938789884596, "learning_rate": 0.00019772095658886793, "loss": 3.06215500831604, "step": 3913, "token_acc": 0.2917418144159296 }, { "epoch": 2.294341835238933, "grad_norm": 0.4000728819185458, "learning_rate": 0.0001977188987243988, "loss": 3.070331573486328, "step": 3914, "token_acc": 0.29202924315051604 }, { "epoch": 2.294928173556142, "grad_norm": 0.41387056754323365, "learning_rate": 0.00019771683994199228, "loss": 3.0733089447021484, "step": 3915, "token_acc": 0.29079799552525876 }, { "epoch": 2.295514511873351, "grad_norm": 0.368425537632172, "learning_rate": 0.00019771478024166773, "loss": 3.0414767265319824, "step": 3916, "token_acc": 0.2939419557656969 }, { "epoch": 2.2961008501905598, "grad_norm": 0.4792296598248775, "learning_rate": 0.00019771271962344447, "loss": 3.090397357940674, "step": 3917, "token_acc": 0.2880425252369274 }, { "epoch": 2.296687188507769, "grad_norm": 0.5752338769050087, "learning_rate": 0.0001977106580873419, "loss": 3.1029324531555176, "step": 3918, "token_acc": 0.28563329494176143 }, { "epoch": 2.297273526824978, "grad_norm": 0.4579930708991789, "learning_rate": 0.00019770859563337934, "loss": 3.087873935699463, "step": 3919, "token_acc": 0.2874762808349146 }, { "epoch": 2.297859865142187, "grad_norm": 0.44713112297759794, "learning_rate": 0.00019770653226157617, "loss": 3.0868358612060547, "step": 3920, "token_acc": 0.28901353194489143 }, { "epoch": 2.298446203459396, "grad_norm": 0.48385659959776484, "learning_rate": 0.00019770446797195187, "loss": 3.090729236602783, "step": 3921, "token_acc": 0.2905875785948246 }, { "epoch": 2.2990325417766053, "grad_norm": 0.380021781282166, "learning_rate": 0.0001977024027645257, "loss": 3.08174467086792, "step": 3922, "token_acc": 0.2890153299361966 }, { "epoch": 2.299618880093814, "grad_norm": 0.3288134995101452, "learning_rate": 0.0001977003366393171, "loss": 3.0719780921936035, "step": 3923, "token_acc": 0.29157037848580203 }, { "epoch": 2.300205218411023, "grad_norm": 0.33002493717042414, "learning_rate": 0.0001976982695963455, "loss": 3.0802690982818604, "step": 3924, "token_acc": 0.2887173187369119 }, { "epoch": 2.300791556728232, "grad_norm": 0.29661916318359, "learning_rate": 0.0001976962016356303, "loss": 3.035904884338379, "step": 3925, "token_acc": 0.2945318222271227 }, { "epoch": 2.3013778950454413, "grad_norm": 0.2888831355919122, "learning_rate": 0.00019769413275719098, "loss": 3.07951021194458, "step": 3926, "token_acc": 0.2898424586582524 }, { "epoch": 2.3019642333626504, "grad_norm": 0.35350921861194695, "learning_rate": 0.0001976920629610469, "loss": 3.0688276290893555, "step": 3927, "token_acc": 0.28918185548451997 }, { "epoch": 2.302550571679859, "grad_norm": 0.35487593752783886, "learning_rate": 0.00019768999224721752, "loss": 3.0326390266418457, "step": 3928, "token_acc": 0.29618882000769936 }, { "epoch": 2.303136909997068, "grad_norm": 0.30245074871517424, "learning_rate": 0.0001976879206157223, "loss": 3.0536086559295654, "step": 3929, "token_acc": 0.29366098377969796 }, { "epoch": 2.3037232483142773, "grad_norm": 0.37021968241025716, "learning_rate": 0.0001976858480665807, "loss": 3.063889741897583, "step": 3930, "token_acc": 0.29233408135365885 }, { "epoch": 2.3043095866314864, "grad_norm": 0.35570209454030016, "learning_rate": 0.0001976837745998122, "loss": 3.0275192260742188, "step": 3931, "token_acc": 0.29613200406999185 }, { "epoch": 2.3048959249486956, "grad_norm": 0.45177582271090566, "learning_rate": 0.00019768170021543626, "loss": 3.029999256134033, "step": 3932, "token_acc": 0.29564851185292446 }, { "epoch": 2.3054822632659047, "grad_norm": 0.5483197251038302, "learning_rate": 0.00019767962491347237, "loss": 3.078869104385376, "step": 3933, "token_acc": 0.2902977359660912 }, { "epoch": 2.3060686015831133, "grad_norm": 0.44266582453522674, "learning_rate": 0.00019767754869394005, "loss": 3.041951894760132, "step": 3934, "token_acc": 0.29301954274264536 }, { "epoch": 2.3066549399003224, "grad_norm": 0.31380464085717574, "learning_rate": 0.00019767547155685877, "loss": 3.084629535675049, "step": 3935, "token_acc": 0.28886242892126346 }, { "epoch": 2.3072412782175316, "grad_norm": 0.39543992471987766, "learning_rate": 0.00019767339350224808, "loss": 3.045207977294922, "step": 3936, "token_acc": 0.29422917229442425 }, { "epoch": 2.3078276165347407, "grad_norm": 0.46559447260023007, "learning_rate": 0.00019767131453012743, "loss": 3.1069793701171875, "step": 3937, "token_acc": 0.28597872428068294 }, { "epoch": 2.3084139548519493, "grad_norm": 0.5168921879271132, "learning_rate": 0.00019766923464051642, "loss": 3.089266777038574, "step": 3938, "token_acc": 0.2876342421460436 }, { "epoch": 2.3090002931691584, "grad_norm": 0.3508368107430926, "learning_rate": 0.00019766715383343457, "loss": 3.0697221755981445, "step": 3939, "token_acc": 0.29131249175570506 }, { "epoch": 2.3095866314863676, "grad_norm": 0.3805241050145065, "learning_rate": 0.0001976650721089014, "loss": 3.09342098236084, "step": 3940, "token_acc": 0.28739591781386337 }, { "epoch": 2.3101729698035767, "grad_norm": 0.42671834029274397, "learning_rate": 0.00019766298946693648, "loss": 3.0487489700317383, "step": 3941, "token_acc": 0.29326043466764246 }, { "epoch": 2.310759308120786, "grad_norm": 0.35535042637947967, "learning_rate": 0.00019766090590755936, "loss": 3.034674644470215, "step": 3942, "token_acc": 0.2965640023217256 }, { "epoch": 2.311345646437995, "grad_norm": 0.3157537783986005, "learning_rate": 0.00019765882143078968, "loss": 3.0724880695343018, "step": 3943, "token_acc": 0.29286366839111716 }, { "epoch": 2.3119319847552036, "grad_norm": 0.3623918428308919, "learning_rate": 0.00019765673603664693, "loss": 3.0392496585845947, "step": 3944, "token_acc": 0.29375016289191797 }, { "epoch": 2.3125183230724127, "grad_norm": 0.2952976239043702, "learning_rate": 0.00019765464972515076, "loss": 3.06953763961792, "step": 3945, "token_acc": 0.2895000790545081 }, { "epoch": 2.313104661389622, "grad_norm": 0.3403753795015596, "learning_rate": 0.00019765256249632075, "loss": 3.0562973022460938, "step": 3946, "token_acc": 0.292378230939083 }, { "epoch": 2.313690999706831, "grad_norm": 0.30309617389538196, "learning_rate": 0.00019765047435017647, "loss": 3.1248483657836914, "step": 3947, "token_acc": 0.2838542470943101 }, { "epoch": 2.31427733802404, "grad_norm": 0.3548060391917873, "learning_rate": 0.0001976483852867376, "loss": 3.083674907684326, "step": 3948, "token_acc": 0.290798976164146 }, { "epoch": 2.3148636763412487, "grad_norm": 0.37399859025584714, "learning_rate": 0.00019764629530602372, "loss": 3.050013780593872, "step": 3949, "token_acc": 0.2936035816626335 }, { "epoch": 2.315450014658458, "grad_norm": 0.30935216047609504, "learning_rate": 0.00019764420440805447, "loss": 3.1083614826202393, "step": 3950, "token_acc": 0.28609565477625026 }, { "epoch": 2.316036352975667, "grad_norm": 0.4139210947749515, "learning_rate": 0.0001976421125928495, "loss": 3.095822334289551, "step": 3951, "token_acc": 0.28736773662635057 }, { "epoch": 2.316622691292876, "grad_norm": 0.3820220865098993, "learning_rate": 0.00019764001986042847, "loss": 3.069571018218994, "step": 3952, "token_acc": 0.2908962369587932 }, { "epoch": 2.317209029610085, "grad_norm": 0.3271748428578365, "learning_rate": 0.00019763792621081103, "loss": 3.0487797260284424, "step": 3953, "token_acc": 0.2947320713352165 }, { "epoch": 2.3177953679272942, "grad_norm": 0.3777244235634175, "learning_rate": 0.00019763583164401682, "loss": 3.0504446029663086, "step": 3954, "token_acc": 0.2940915797454866 }, { "epoch": 2.318381706244503, "grad_norm": 0.3556832797408692, "learning_rate": 0.00019763373616006556, "loss": 3.0575156211853027, "step": 3955, "token_acc": 0.2922865650187381 }, { "epoch": 2.318968044561712, "grad_norm": 0.34913945763265647, "learning_rate": 0.0001976316397589769, "loss": 3.0987510681152344, "step": 3956, "token_acc": 0.288177359801569 }, { "epoch": 2.319554382878921, "grad_norm": 0.440126868231471, "learning_rate": 0.00019762954244077055, "loss": 3.0395970344543457, "step": 3957, "token_acc": 0.2957527461595652 }, { "epoch": 2.3201407211961302, "grad_norm": 0.3509503185405221, "learning_rate": 0.0001976274442054662, "loss": 3.11032772064209, "step": 3958, "token_acc": 0.28468988473591955 }, { "epoch": 2.3207270595133394, "grad_norm": 0.3501176029079926, "learning_rate": 0.00019762534505308356, "loss": 3.08274245262146, "step": 3959, "token_acc": 0.28878287990480533 }, { "epoch": 2.321313397830548, "grad_norm": 0.31064228163735674, "learning_rate": 0.00019762324498364236, "loss": 3.0849123001098633, "step": 3960, "token_acc": 0.2906457581903237 }, { "epoch": 2.321899736147757, "grad_norm": 0.40329396017342894, "learning_rate": 0.00019762114399716232, "loss": 3.0859622955322266, "step": 3961, "token_acc": 0.28828072707496655 }, { "epoch": 2.3224860744649662, "grad_norm": 0.33496347279919436, "learning_rate": 0.00019761904209366317, "loss": 3.072143077850342, "step": 3962, "token_acc": 0.2910849119088621 }, { "epoch": 2.3230724127821754, "grad_norm": 0.31563389722342244, "learning_rate": 0.00019761693927316469, "loss": 3.0302629470825195, "step": 3963, "token_acc": 0.2973203686660531 }, { "epoch": 2.3236587510993845, "grad_norm": 0.3391197355017933, "learning_rate": 0.00019761483553568657, "loss": 3.086792469024658, "step": 3964, "token_acc": 0.2884605844771272 }, { "epoch": 2.3242450894165936, "grad_norm": 0.3309174055092619, "learning_rate": 0.00019761273088124862, "loss": 3.0333189964294434, "step": 3965, "token_acc": 0.295703423053648 }, { "epoch": 2.3248314277338022, "grad_norm": 0.30668035491719037, "learning_rate": 0.00019761062530987062, "loss": 3.0695810317993164, "step": 3966, "token_acc": 0.2917963840022063 }, { "epoch": 2.3254177660510114, "grad_norm": 0.32186820375791175, "learning_rate": 0.00019760851882157234, "loss": 3.092996120452881, "step": 3967, "token_acc": 0.28774220235783143 }, { "epoch": 2.3260041043682205, "grad_norm": 0.3754082831054445, "learning_rate": 0.0001976064114163735, "loss": 3.0739240646362305, "step": 3968, "token_acc": 0.2905131372501527 }, { "epoch": 2.3265904426854296, "grad_norm": 0.31148615337814417, "learning_rate": 0.000197604303094294, "loss": 3.09426212310791, "step": 3969, "token_acc": 0.2887943685920468 }, { "epoch": 2.3271767810026387, "grad_norm": 0.3138526101525699, "learning_rate": 0.00019760219385535357, "loss": 3.0846781730651855, "step": 3970, "token_acc": 0.28889640622538754 }, { "epoch": 2.3277631193198474, "grad_norm": 0.3615140072700527, "learning_rate": 0.00019760008369957205, "loss": 3.08321213722229, "step": 3971, "token_acc": 0.2888261803031269 }, { "epoch": 2.3283494576370565, "grad_norm": 0.38084727284845, "learning_rate": 0.00019759797262696927, "loss": 3.093390464782715, "step": 3972, "token_acc": 0.28630255911888564 }, { "epoch": 2.3289357959542656, "grad_norm": 0.33983768672838577, "learning_rate": 0.00019759586063756505, "loss": 3.101447582244873, "step": 3973, "token_acc": 0.2863724941026106 }, { "epoch": 2.3295221342714747, "grad_norm": 0.33771826777627234, "learning_rate": 0.00019759374773137923, "loss": 3.1065969467163086, "step": 3974, "token_acc": 0.2843523520202387 }, { "epoch": 2.330108472588684, "grad_norm": 0.32914220404682254, "learning_rate": 0.00019759163390843166, "loss": 3.0686240196228027, "step": 3975, "token_acc": 0.29178785059151935 }, { "epoch": 2.330694810905893, "grad_norm": 0.2927059725789342, "learning_rate": 0.0001975895191687422, "loss": 3.0464584827423096, "step": 3976, "token_acc": 0.2947440742013054 }, { "epoch": 2.3312811492231016, "grad_norm": 0.37207018854899204, "learning_rate": 0.00019758740351233072, "loss": 3.040398597717285, "step": 3977, "token_acc": 0.29562048583144657 }, { "epoch": 2.3318674875403107, "grad_norm": 0.36751652393791073, "learning_rate": 0.00019758528693921706, "loss": 3.056946277618408, "step": 3978, "token_acc": 0.29244770901459444 }, { "epoch": 2.33245382585752, "grad_norm": 0.37590298052767956, "learning_rate": 0.00019758316944942114, "loss": 3.0540237426757812, "step": 3979, "token_acc": 0.293398796547552 }, { "epoch": 2.333040164174729, "grad_norm": 0.38418453359137894, "learning_rate": 0.00019758105104296283, "loss": 3.027588129043579, "step": 3980, "token_acc": 0.29923165394076867 }, { "epoch": 2.333626502491938, "grad_norm": 0.4380591346155051, "learning_rate": 0.00019757893171986203, "loss": 3.1071159839630127, "step": 3981, "token_acc": 0.2857442573423933 }, { "epoch": 2.3342128408091467, "grad_norm": 0.35478252859661724, "learning_rate": 0.00019757681148013868, "loss": 3.085390567779541, "step": 3982, "token_acc": 0.289410124817842 }, { "epoch": 2.334799179126356, "grad_norm": 0.40210671407512893, "learning_rate": 0.00019757469032381266, "loss": 3.0347161293029785, "step": 3983, "token_acc": 0.2951946159571457 }, { "epoch": 2.335385517443565, "grad_norm": 0.3634283300651795, "learning_rate": 0.0001975725682509039, "loss": 3.0693979263305664, "step": 3984, "token_acc": 0.2900009425622919 }, { "epoch": 2.335971855760774, "grad_norm": 0.3304167996947664, "learning_rate": 0.00019757044526143235, "loss": 3.048264980316162, "step": 3985, "token_acc": 0.29586135920902706 }, { "epoch": 2.336558194077983, "grad_norm": 0.3192896611692682, "learning_rate": 0.00019756832135541796, "loss": 3.062061309814453, "step": 3986, "token_acc": 0.29373342903987093 }, { "epoch": 2.3371445323951923, "grad_norm": 0.4365166073229798, "learning_rate": 0.00019756619653288064, "loss": 3.024679660797119, "step": 3987, "token_acc": 0.29773537429230446 }, { "epoch": 2.337730870712401, "grad_norm": 0.460682966241692, "learning_rate": 0.0001975640707938404, "loss": 3.0796947479248047, "step": 3988, "token_acc": 0.2877266222939887 }, { "epoch": 2.33831720902961, "grad_norm": 0.31069688326522954, "learning_rate": 0.00019756194413831716, "loss": 3.0663650035858154, "step": 3989, "token_acc": 0.292067858780376 }, { "epoch": 2.338903547346819, "grad_norm": 0.4254019623003263, "learning_rate": 0.00019755981656633095, "loss": 3.072375774383545, "step": 3990, "token_acc": 0.289774355717019 }, { "epoch": 2.3394898856640283, "grad_norm": 0.4180347623050922, "learning_rate": 0.0001975576880779017, "loss": 3.052222967147827, "step": 3991, "token_acc": 0.2926384296710621 }, { "epoch": 2.340076223981237, "grad_norm": 0.3397134867987974, "learning_rate": 0.00019755555867304945, "loss": 3.0734167098999023, "step": 3992, "token_acc": 0.2887498779227855 }, { "epoch": 2.340662562298446, "grad_norm": 0.37739143006517256, "learning_rate": 0.0001975534283517942, "loss": 3.044403314590454, "step": 3993, "token_acc": 0.2937327133523069 }, { "epoch": 2.341248900615655, "grad_norm": 0.4565405374669542, "learning_rate": 0.0001975512971141559, "loss": 3.0844674110412598, "step": 3994, "token_acc": 0.288412790560224 }, { "epoch": 2.3418352389328643, "grad_norm": 0.360839361569355, "learning_rate": 0.00019754916496015463, "loss": 3.0569329261779785, "step": 3995, "token_acc": 0.29257328372102565 }, { "epoch": 2.3424215772500734, "grad_norm": 0.37213110531571003, "learning_rate": 0.0001975470318898104, "loss": 3.079150438308716, "step": 3996, "token_acc": 0.29086718463719935 }, { "epoch": 2.3430079155672825, "grad_norm": 0.3539666838712452, "learning_rate": 0.00019754489790314327, "loss": 3.0526463985443115, "step": 3997, "token_acc": 0.29498118110341687 }, { "epoch": 2.343594253884491, "grad_norm": 0.3564909839532416, "learning_rate": 0.00019754276300017326, "loss": 3.0762414932250977, "step": 3998, "token_acc": 0.2892633996073788 }, { "epoch": 2.3441805922017003, "grad_norm": 0.34385310366567623, "learning_rate": 0.00019754062718092043, "loss": 3.0470519065856934, "step": 3999, "token_acc": 0.29430920307524217 }, { "epoch": 2.3447669305189094, "grad_norm": 0.37361099402393605, "learning_rate": 0.00019753849044540483, "loss": 3.015875816345215, "step": 4000, "token_acc": 0.29831418284732614 }, { "epoch": 2.3453532688361185, "grad_norm": 0.38353953015617503, "learning_rate": 0.0001975363527936466, "loss": 3.0424609184265137, "step": 4001, "token_acc": 0.29575657568703506 }, { "epoch": 2.3459396071533276, "grad_norm": 0.4132729845447788, "learning_rate": 0.0001975342142256657, "loss": 3.100404739379883, "step": 4002, "token_acc": 0.2857146605857517 }, { "epoch": 2.3465259454705363, "grad_norm": 0.4200571316947381, "learning_rate": 0.00019753207474148234, "loss": 3.0756542682647705, "step": 4003, "token_acc": 0.29013264662884425 }, { "epoch": 2.3471122837877454, "grad_norm": 0.30704287318361, "learning_rate": 0.00019752993434111652, "loss": 3.1128041744232178, "step": 4004, "token_acc": 0.28493046473214984 }, { "epoch": 2.3476986221049545, "grad_norm": 0.3463450225968444, "learning_rate": 0.0001975277930245884, "loss": 3.088301181793213, "step": 4005, "token_acc": 0.289170695369655 }, { "epoch": 2.3482849604221636, "grad_norm": 0.33521686160970243, "learning_rate": 0.00019752565079191815, "loss": 3.11116361618042, "step": 4006, "token_acc": 0.2855750590020229 }, { "epoch": 2.3488712987393727, "grad_norm": 0.42626517629359884, "learning_rate": 0.00019752350764312574, "loss": 3.0494749546051025, "step": 4007, "token_acc": 0.2940345746014735 }, { "epoch": 2.349457637056582, "grad_norm": 0.47921665366143085, "learning_rate": 0.00019752136357823144, "loss": 3.05031681060791, "step": 4008, "token_acc": 0.29465879620867624 }, { "epoch": 2.3500439753737905, "grad_norm": 0.36943211416243066, "learning_rate": 0.00019751921859725532, "loss": 3.1038665771484375, "step": 4009, "token_acc": 0.2869698219748311 }, { "epoch": 2.3506303136909996, "grad_norm": 0.30127973139343633, "learning_rate": 0.00019751707270021756, "loss": 3.085176467895508, "step": 4010, "token_acc": 0.2884002169197397 }, { "epoch": 2.3512166520082087, "grad_norm": 0.3731755540011698, "learning_rate": 0.00019751492588713831, "loss": 3.0522384643554688, "step": 4011, "token_acc": 0.29326455401386253 }, { "epoch": 2.351802990325418, "grad_norm": 0.3491736705298053, "learning_rate": 0.0001975127781580377, "loss": 3.070321559906006, "step": 4012, "token_acc": 0.29081333506291346 }, { "epoch": 2.352389328642627, "grad_norm": 0.3930004697026496, "learning_rate": 0.000197510629512936, "loss": 3.0925259590148926, "step": 4013, "token_acc": 0.28640986053870593 }, { "epoch": 2.3529756669598356, "grad_norm": 0.3579399373517333, "learning_rate": 0.0001975084799518533, "loss": 3.072746753692627, "step": 4014, "token_acc": 0.29037428781724506 }, { "epoch": 2.3535620052770447, "grad_norm": 0.36180353325130354, "learning_rate": 0.0001975063294748098, "loss": 3.065969944000244, "step": 4015, "token_acc": 0.2904752809510781 }, { "epoch": 2.354148343594254, "grad_norm": 0.3831685656311953, "learning_rate": 0.00019750417808182577, "loss": 3.0430374145507812, "step": 4016, "token_acc": 0.29508434628604907 }, { "epoch": 2.354734681911463, "grad_norm": 0.48109924320896097, "learning_rate": 0.00019750202577292135, "loss": 3.078310966491699, "step": 4017, "token_acc": 0.2875778507530291 }, { "epoch": 2.355321020228672, "grad_norm": 0.3661878565261176, "learning_rate": 0.00019749987254811678, "loss": 3.0924625396728516, "step": 4018, "token_acc": 0.28921013881085456 }, { "epoch": 2.355907358545881, "grad_norm": 0.3323767354235495, "learning_rate": 0.0001974977184074323, "loss": 3.0396461486816406, "step": 4019, "token_acc": 0.2962768305758194 }, { "epoch": 2.35649369686309, "grad_norm": 0.35590729016155426, "learning_rate": 0.00019749556335088813, "loss": 3.0730013847351074, "step": 4020, "token_acc": 0.29127455868890173 }, { "epoch": 2.357080035180299, "grad_norm": 0.39505741672431927, "learning_rate": 0.00019749340737850455, "loss": 3.095806121826172, "step": 4021, "token_acc": 0.28693255454071853 }, { "epoch": 2.357666373497508, "grad_norm": 0.3931286101181088, "learning_rate": 0.00019749125049030176, "loss": 3.0822367668151855, "step": 4022, "token_acc": 0.2881409772380501 }, { "epoch": 2.358252711814717, "grad_norm": 0.33811271720159, "learning_rate": 0.00019748909268630006, "loss": 3.047116279602051, "step": 4023, "token_acc": 0.29533874718010317 }, { "epoch": 2.3588390501319263, "grad_norm": 0.3779504439344047, "learning_rate": 0.00019748693396651966, "loss": 3.0572638511657715, "step": 4024, "token_acc": 0.29200676675829984 }, { "epoch": 2.359425388449135, "grad_norm": 0.327363311130571, "learning_rate": 0.0001974847743309809, "loss": 3.0716371536254883, "step": 4025, "token_acc": 0.2926272303864231 }, { "epoch": 2.360011726766344, "grad_norm": 0.35657928337489797, "learning_rate": 0.00019748261377970405, "loss": 3.079113721847534, "step": 4026, "token_acc": 0.2899916916980891 }, { "epoch": 2.360598065083553, "grad_norm": 0.41645378136936156, "learning_rate": 0.0001974804523127094, "loss": 3.0654830932617188, "step": 4027, "token_acc": 0.2919762740261795 }, { "epoch": 2.3611844034007623, "grad_norm": 0.39155550318973775, "learning_rate": 0.00019747828993001726, "loss": 3.078742504119873, "step": 4028, "token_acc": 0.29030880777531093 }, { "epoch": 2.3617707417179714, "grad_norm": 0.3349853077166782, "learning_rate": 0.00019747612663164793, "loss": 3.089872360229492, "step": 4029, "token_acc": 0.28676440818640053 }, { "epoch": 2.3623570800351805, "grad_norm": 0.3516355739479714, "learning_rate": 0.00019747396241762174, "loss": 3.0489275455474854, "step": 4030, "token_acc": 0.2926922681472342 }, { "epoch": 2.362943418352389, "grad_norm": 0.34540238508652715, "learning_rate": 0.00019747179728795905, "loss": 3.0936219692230225, "step": 4031, "token_acc": 0.28934985470578584 }, { "epoch": 2.3635297566695983, "grad_norm": 0.3396951313757897, "learning_rate": 0.00019746963124268017, "loss": 3.0573508739471436, "step": 4032, "token_acc": 0.29258172380836445 }, { "epoch": 2.3641160949868074, "grad_norm": 0.352775218091441, "learning_rate": 0.00019746746428180544, "loss": 3.0272040367126465, "step": 4033, "token_acc": 0.2970396413317087 }, { "epoch": 2.3647024333040165, "grad_norm": 0.32972058822562744, "learning_rate": 0.0001974652964053552, "loss": 3.0845980644226074, "step": 4034, "token_acc": 0.2878482466905141 }, { "epoch": 2.3652887716212256, "grad_norm": 0.41054318855458716, "learning_rate": 0.00019746312761334984, "loss": 3.0185203552246094, "step": 4035, "token_acc": 0.2995907223382485 }, { "epoch": 2.3658751099384343, "grad_norm": 0.3054874564322032, "learning_rate": 0.0001974609579058097, "loss": 3.086775064468384, "step": 4036, "token_acc": 0.2880820919117935 }, { "epoch": 2.3664614482556434, "grad_norm": 0.373699783046111, "learning_rate": 0.00019745878728275526, "loss": 3.0533313751220703, "step": 4037, "token_acc": 0.2936753281542995 }, { "epoch": 2.3670477865728525, "grad_norm": 0.3822475943474728, "learning_rate": 0.00019745661574420683, "loss": 3.0833749771118164, "step": 4038, "token_acc": 0.29057075000716925 }, { "epoch": 2.3676341248900616, "grad_norm": 0.43347225058887195, "learning_rate": 0.00019745444329018476, "loss": 3.0364768505096436, "step": 4039, "token_acc": 0.29621901787494187 }, { "epoch": 2.3682204632072708, "grad_norm": 0.2987437226451078, "learning_rate": 0.00019745226992070957, "loss": 3.0547916889190674, "step": 4040, "token_acc": 0.2934936550010884 }, { "epoch": 2.36880680152448, "grad_norm": 0.3701991464576738, "learning_rate": 0.00019745009563580158, "loss": 3.0611977577209473, "step": 4041, "token_acc": 0.29276847343357054 }, { "epoch": 2.3693931398416885, "grad_norm": 0.4346137899255046, "learning_rate": 0.00019744792043548131, "loss": 3.0678791999816895, "step": 4042, "token_acc": 0.28965079858145015 }, { "epoch": 2.3699794781588976, "grad_norm": 0.34573849844965404, "learning_rate": 0.00019744574431976913, "loss": 3.0146141052246094, "step": 4043, "token_acc": 0.299043251781686 }, { "epoch": 2.3705658164761068, "grad_norm": 0.34549166340211396, "learning_rate": 0.00019744356728868546, "loss": 3.0047109127044678, "step": 4044, "token_acc": 0.2996667387560158 }, { "epoch": 2.371152154793316, "grad_norm": 0.3189120652155531, "learning_rate": 0.0001974413893422508, "loss": 3.05513596534729, "step": 4045, "token_acc": 0.2935813953488372 }, { "epoch": 2.3717384931105245, "grad_norm": 0.35265155484296584, "learning_rate": 0.0001974392104804856, "loss": 3.060643196105957, "step": 4046, "token_acc": 0.2915285889525623 }, { "epoch": 2.3723248314277336, "grad_norm": 0.3312010661027504, "learning_rate": 0.00019743703070341031, "loss": 3.0882105827331543, "step": 4047, "token_acc": 0.2879098647818853 }, { "epoch": 2.3729111697449428, "grad_norm": 0.3645549961071063, "learning_rate": 0.0001974348500110454, "loss": 3.045698881149292, "step": 4048, "token_acc": 0.29487339858856887 }, { "epoch": 2.373497508062152, "grad_norm": 0.34492936738773283, "learning_rate": 0.00019743266840341138, "loss": 3.0732100009918213, "step": 4049, "token_acc": 0.2907530565113851 }, { "epoch": 2.374083846379361, "grad_norm": 0.27640734855562965, "learning_rate": 0.00019743048588052872, "loss": 3.1030168533325195, "step": 4050, "token_acc": 0.2869292225315003 }, { "epoch": 2.37467018469657, "grad_norm": 0.3983753935949502, "learning_rate": 0.00019742830244241793, "loss": 3.065342426300049, "step": 4051, "token_acc": 0.29336490720460784 }, { "epoch": 2.3752565230137788, "grad_norm": 0.31185930933014083, "learning_rate": 0.0001974261180890996, "loss": 3.053422451019287, "step": 4052, "token_acc": 0.29468053665900745 }, { "epoch": 2.375842861330988, "grad_norm": 0.3197135069383617, "learning_rate": 0.0001974239328205941, "loss": 3.0621118545532227, "step": 4053, "token_acc": 0.29194192082637827 }, { "epoch": 2.376429199648197, "grad_norm": 0.39122664859307044, "learning_rate": 0.00019742174663692203, "loss": 3.084141731262207, "step": 4054, "token_acc": 0.28973876932573867 }, { "epoch": 2.377015537965406, "grad_norm": 0.41626362090386515, "learning_rate": 0.00019741955953810395, "loss": 3.0489418506622314, "step": 4055, "token_acc": 0.29564469750298067 }, { "epoch": 2.377601876282615, "grad_norm": 0.37015406747247503, "learning_rate": 0.00019741737152416036, "loss": 3.0619077682495117, "step": 4056, "token_acc": 0.29191145033332805 }, { "epoch": 2.378188214599824, "grad_norm": 0.3181942191240523, "learning_rate": 0.00019741518259511187, "loss": 3.0216617584228516, "step": 4057, "token_acc": 0.29683271157925534 }, { "epoch": 2.378774552917033, "grad_norm": 0.3514363996627795, "learning_rate": 0.000197412992750979, "loss": 3.075828790664673, "step": 4058, "token_acc": 0.2909657038749614 }, { "epoch": 2.379360891234242, "grad_norm": 0.349406071338303, "learning_rate": 0.00019741080199178233, "loss": 3.0800254344940186, "step": 4059, "token_acc": 0.29020382647351645 }, { "epoch": 2.379947229551451, "grad_norm": 0.32957404637611476, "learning_rate": 0.0001974086103175424, "loss": 3.052034616470337, "step": 4060, "token_acc": 0.2935027760770708 }, { "epoch": 2.3805335678686603, "grad_norm": 0.34928983401039476, "learning_rate": 0.0001974064177282799, "loss": 3.086732864379883, "step": 4061, "token_acc": 0.28873960232970164 }, { "epoch": 2.3811199061858694, "grad_norm": 0.3808710384732253, "learning_rate": 0.00019740422422401531, "loss": 3.0616822242736816, "step": 4062, "token_acc": 0.2920802584738794 }, { "epoch": 2.381706244503078, "grad_norm": 0.34980090771970274, "learning_rate": 0.0001974020298047693, "loss": 3.0339975357055664, "step": 4063, "token_acc": 0.29591441921031497 }, { "epoch": 2.382292582820287, "grad_norm": 0.3736204739614879, "learning_rate": 0.00019739983447056249, "loss": 3.016005516052246, "step": 4064, "token_acc": 0.2977012510617546 }, { "epoch": 2.3828789211374963, "grad_norm": 0.30737906401497556, "learning_rate": 0.00019739763822141545, "loss": 3.06400990486145, "step": 4065, "token_acc": 0.29100658614385483 }, { "epoch": 2.3834652594547054, "grad_norm": 0.33083162449094927, "learning_rate": 0.00019739544105734888, "loss": 3.0641822814941406, "step": 4066, "token_acc": 0.2911175580221998 }, { "epoch": 2.3840515977719146, "grad_norm": 0.4035405708881859, "learning_rate": 0.00019739324297838337, "loss": 3.1035690307617188, "step": 4067, "token_acc": 0.2875145778095913 }, { "epoch": 2.3846379360891232, "grad_norm": 0.3810975307925663, "learning_rate": 0.00019739104398453958, "loss": 3.060784339904785, "step": 4068, "token_acc": 0.2923531228385404 }, { "epoch": 2.3852242744063323, "grad_norm": 0.35022919064215496, "learning_rate": 0.00019738884407583814, "loss": 3.045821189880371, "step": 4069, "token_acc": 0.29527681052095317 }, { "epoch": 2.3858106127235414, "grad_norm": 0.2754435580939001, "learning_rate": 0.0001973866432522998, "loss": 3.030118703842163, "step": 4070, "token_acc": 0.2973155948249999 }, { "epoch": 2.3863969510407506, "grad_norm": 0.3811984634491082, "learning_rate": 0.00019738444151394516, "loss": 3.0654428005218506, "step": 4071, "token_acc": 0.2905192696424429 }, { "epoch": 2.3869832893579597, "grad_norm": 0.48580361423973595, "learning_rate": 0.00019738223886079487, "loss": 3.0588324069976807, "step": 4072, "token_acc": 0.29053195631237066 }, { "epoch": 2.387569627675169, "grad_norm": 0.5161100308390308, "learning_rate": 0.0001973800352928697, "loss": 3.027744770050049, "step": 4073, "token_acc": 0.2952268830664751 }, { "epoch": 2.3881559659923774, "grad_norm": 0.369702203119271, "learning_rate": 0.00019737783081019036, "loss": 3.0774636268615723, "step": 4074, "token_acc": 0.29210474376823653 }, { "epoch": 2.3887423043095866, "grad_norm": 0.34796003103025214, "learning_rate": 0.0001973756254127775, "loss": 3.083624839782715, "step": 4075, "token_acc": 0.2905960863554185 }, { "epoch": 2.3893286426267957, "grad_norm": 0.4789495072196346, "learning_rate": 0.00019737341910065182, "loss": 3.072279453277588, "step": 4076, "token_acc": 0.2904567491707068 }, { "epoch": 2.389914980944005, "grad_norm": 0.4530878018597216, "learning_rate": 0.00019737121187383407, "loss": 3.0857090950012207, "step": 4077, "token_acc": 0.28888513672898286 }, { "epoch": 2.390501319261214, "grad_norm": 0.40261593006764923, "learning_rate": 0.00019736900373234503, "loss": 3.063866138458252, "step": 4078, "token_acc": 0.292847459288176 }, { "epoch": 2.3910876575784226, "grad_norm": 0.429483274924287, "learning_rate": 0.0001973667946762054, "loss": 3.058635711669922, "step": 4079, "token_acc": 0.29353800494805043 }, { "epoch": 2.3916739958956317, "grad_norm": 0.3807741827597395, "learning_rate": 0.00019736458470543593, "loss": 3.026242256164551, "step": 4080, "token_acc": 0.29548366972954004 }, { "epoch": 2.392260334212841, "grad_norm": 0.5044305111070406, "learning_rate": 0.0001973623738200574, "loss": 3.0663278102874756, "step": 4081, "token_acc": 0.29209834220890524 }, { "epoch": 2.39284667253005, "grad_norm": 0.4321753551820758, "learning_rate": 0.00019736016202009053, "loss": 3.0572004318237305, "step": 4082, "token_acc": 0.29191245426567514 }, { "epoch": 2.393433010847259, "grad_norm": 0.44689884797087115, "learning_rate": 0.00019735794930555618, "loss": 3.0680956840515137, "step": 4083, "token_acc": 0.2908540093398363 }, { "epoch": 2.394019349164468, "grad_norm": 0.3841559342714359, "learning_rate": 0.00019735573567647508, "loss": 3.053382396697998, "step": 4084, "token_acc": 0.29260940251998374 }, { "epoch": 2.394605687481677, "grad_norm": 0.4815775001158555, "learning_rate": 0.000197353521132868, "loss": 3.0845413208007812, "step": 4085, "token_acc": 0.28775227350731897 }, { "epoch": 2.395192025798886, "grad_norm": 0.36144887283273275, "learning_rate": 0.00019735130567475579, "loss": 3.0509257316589355, "step": 4086, "token_acc": 0.29350153469680507 }, { "epoch": 2.395778364116095, "grad_norm": 0.3859121847675607, "learning_rate": 0.00019734908930215924, "loss": 3.1050310134887695, "step": 4087, "token_acc": 0.28709304160311366 }, { "epoch": 2.396364702433304, "grad_norm": 0.32370890561199395, "learning_rate": 0.00019734687201509917, "loss": 3.02191424369812, "step": 4088, "token_acc": 0.29807665012477386 }, { "epoch": 2.3969510407505132, "grad_norm": 0.367394004580195, "learning_rate": 0.00019734465381359646, "loss": 3.0496182441711426, "step": 4089, "token_acc": 0.29433242389508635 }, { "epoch": 2.397537379067722, "grad_norm": 0.31435121089871226, "learning_rate": 0.00019734243469767186, "loss": 3.0896310806274414, "step": 4090, "token_acc": 0.28738281209264493 }, { "epoch": 2.398123717384931, "grad_norm": 0.3524199001755509, "learning_rate": 0.00019734021466734627, "loss": 3.10182523727417, "step": 4091, "token_acc": 0.28656505421294304 }, { "epoch": 2.39871005570214, "grad_norm": 0.3044099532406422, "learning_rate": 0.00019733799372264054, "loss": 3.067777156829834, "step": 4092, "token_acc": 0.2913250129841515 }, { "epoch": 2.3992963940193492, "grad_norm": 0.3019869864125417, "learning_rate": 0.0001973357718635755, "loss": 3.0055742263793945, "step": 4093, "token_acc": 0.29962626690836647 }, { "epoch": 2.3998827323365584, "grad_norm": 0.31222147639654224, "learning_rate": 0.00019733354909017204, "loss": 3.0861077308654785, "step": 4094, "token_acc": 0.28815394734731203 }, { "epoch": 2.4004690706537675, "grad_norm": 0.36231464577666816, "learning_rate": 0.00019733132540245108, "loss": 3.080594301223755, "step": 4095, "token_acc": 0.2894749670940379 }, { "epoch": 2.401055408970976, "grad_norm": 0.3065063954729968, "learning_rate": 0.00019732910080043342, "loss": 3.1249656677246094, "step": 4096, "token_acc": 0.28354588336414144 }, { "epoch": 2.4016417472881852, "grad_norm": 0.3621373442185483, "learning_rate": 0.00019732687528414006, "loss": 3.073049306869507, "step": 4097, "token_acc": 0.2896589747999474 }, { "epoch": 2.4022280856053944, "grad_norm": 0.32959188632789627, "learning_rate": 0.00019732464885359186, "loss": 3.0778708457946777, "step": 4098, "token_acc": 0.2895755477653437 }, { "epoch": 2.4028144239226035, "grad_norm": 0.30329079378524876, "learning_rate": 0.00019732242150880967, "loss": 3.087095022201538, "step": 4099, "token_acc": 0.28905990776298224 }, { "epoch": 2.403400762239812, "grad_norm": 0.3396481053098095, "learning_rate": 0.00019732019324981455, "loss": 3.0798850059509277, "step": 4100, "token_acc": 0.2901997614031525 }, { "epoch": 2.4039871005570213, "grad_norm": 0.3175410560600566, "learning_rate": 0.0001973179640766273, "loss": 3.0958402156829834, "step": 4101, "token_acc": 0.2885734687482341 }, { "epoch": 2.4045734388742304, "grad_norm": 0.3036458834613522, "learning_rate": 0.00019731573398926896, "loss": 3.0448741912841797, "step": 4102, "token_acc": 0.29506101699256015 }, { "epoch": 2.4051597771914395, "grad_norm": 0.35794590630812734, "learning_rate": 0.0001973135029877604, "loss": 3.0634560585021973, "step": 4103, "token_acc": 0.290748252396199 }, { "epoch": 2.4057461155086486, "grad_norm": 0.40810975726245907, "learning_rate": 0.00019731127107212263, "loss": 3.1196486949920654, "step": 4104, "token_acc": 0.28546552154985333 }, { "epoch": 2.4063324538258577, "grad_norm": 0.37404703842103976, "learning_rate": 0.00019730903824237655, "loss": 3.0985782146453857, "step": 4105, "token_acc": 0.2873534947663295 }, { "epoch": 2.4069187921430664, "grad_norm": 0.3294698250429917, "learning_rate": 0.00019730680449854323, "loss": 3.0477705001831055, "step": 4106, "token_acc": 0.2959189304437254 }, { "epoch": 2.4075051304602755, "grad_norm": 0.3352970478197089, "learning_rate": 0.00019730456984064362, "loss": 3.042135715484619, "step": 4107, "token_acc": 0.2948903040198235 }, { "epoch": 2.4080914687774846, "grad_norm": 0.38868138584621537, "learning_rate": 0.00019730233426869863, "loss": 3.0357906818389893, "step": 4108, "token_acc": 0.2951099211945912 }, { "epoch": 2.4086778070946937, "grad_norm": 0.399042247796688, "learning_rate": 0.00019730009778272937, "loss": 3.0694785118103027, "step": 4109, "token_acc": 0.2914336310402214 }, { "epoch": 2.409264145411903, "grad_norm": 0.302635024218202, "learning_rate": 0.0001972978603827568, "loss": 3.0530624389648438, "step": 4110, "token_acc": 0.2929539497544827 }, { "epoch": 2.4098504837291115, "grad_norm": 0.29388734814551826, "learning_rate": 0.00019729562206880193, "loss": 3.051225423812866, "step": 4111, "token_acc": 0.2926953020134228 }, { "epoch": 2.4104368220463206, "grad_norm": 0.39619104895108326, "learning_rate": 0.00019729338284088583, "loss": 3.0711686611175537, "step": 4112, "token_acc": 0.28973295608697336 }, { "epoch": 2.4110231603635297, "grad_norm": 0.34309009998653844, "learning_rate": 0.00019729114269902948, "loss": 3.0238823890686035, "step": 4113, "token_acc": 0.2985431417135006 }, { "epoch": 2.411609498680739, "grad_norm": 0.3224358113733176, "learning_rate": 0.00019728890164325393, "loss": 3.065692901611328, "step": 4114, "token_acc": 0.29268561386021746 }, { "epoch": 2.412195836997948, "grad_norm": 0.3107994413040964, "learning_rate": 0.0001972866596735803, "loss": 3.0782065391540527, "step": 4115, "token_acc": 0.2911000199221986 }, { "epoch": 2.412782175315157, "grad_norm": 0.31777169765460433, "learning_rate": 0.00019728441679002954, "loss": 3.097886562347412, "step": 4116, "token_acc": 0.2865172175434872 }, { "epoch": 2.4133685136323657, "grad_norm": 0.3743217794705939, "learning_rate": 0.00019728217299262282, "loss": 3.057987689971924, "step": 4117, "token_acc": 0.2942075350773582 }, { "epoch": 2.413954851949575, "grad_norm": 0.3986550537729199, "learning_rate": 0.00019727992828138114, "loss": 3.070807456970215, "step": 4118, "token_acc": 0.28964028288661303 }, { "epoch": 2.414541190266784, "grad_norm": 0.39077653382619687, "learning_rate": 0.00019727768265632563, "loss": 3.0946884155273438, "step": 4119, "token_acc": 0.28825668210241856 }, { "epoch": 2.415127528583993, "grad_norm": 0.4492547497691847, "learning_rate": 0.00019727543611747737, "loss": 3.0969133377075195, "step": 4120, "token_acc": 0.2855234579734677 }, { "epoch": 2.415713866901202, "grad_norm": 0.3518914393279753, "learning_rate": 0.00019727318866485748, "loss": 3.068103075027466, "step": 4121, "token_acc": 0.29150716395056325 }, { "epoch": 2.416300205218411, "grad_norm": 0.31176922519806244, "learning_rate": 0.00019727094029848706, "loss": 3.055452346801758, "step": 4122, "token_acc": 0.2929140573793147 }, { "epoch": 2.41688654353562, "grad_norm": 0.3098967678578531, "learning_rate": 0.00019726869101838724, "loss": 3.0346579551696777, "step": 4123, "token_acc": 0.29467759755336015 }, { "epoch": 2.417472881852829, "grad_norm": 0.38102398840965657, "learning_rate": 0.0001972664408245791, "loss": 3.0992612838745117, "step": 4124, "token_acc": 0.2863387774347281 }, { "epoch": 2.418059220170038, "grad_norm": 0.3855656998064484, "learning_rate": 0.00019726418971708384, "loss": 3.096004009246826, "step": 4125, "token_acc": 0.2860356162000273 }, { "epoch": 2.4186455584872473, "grad_norm": 0.3505684632959905, "learning_rate": 0.0001972619376959226, "loss": 3.0726895332336426, "step": 4126, "token_acc": 0.29149255515560457 }, { "epoch": 2.4192318968044564, "grad_norm": 0.39080868730996693, "learning_rate": 0.00019725968476111652, "loss": 3.068895101547241, "step": 4127, "token_acc": 0.28996430580516047 }, { "epoch": 2.419818235121665, "grad_norm": 0.3309174377376033, "learning_rate": 0.00019725743091268672, "loss": 3.0411195755004883, "step": 4128, "token_acc": 0.29428724965380404 }, { "epoch": 2.420404573438874, "grad_norm": 0.33577124031501776, "learning_rate": 0.00019725517615065444, "loss": 3.0655517578125, "step": 4129, "token_acc": 0.2920482066961917 }, { "epoch": 2.4209909117560833, "grad_norm": 0.32180739995738616, "learning_rate": 0.00019725292047504084, "loss": 3.0562782287597656, "step": 4130, "token_acc": 0.2916351999042912 }, { "epoch": 2.4215772500732924, "grad_norm": 0.38895569592641843, "learning_rate": 0.0001972506638858671, "loss": 3.048586130142212, "step": 4131, "token_acc": 0.29324345460466095 }, { "epoch": 2.4221635883905015, "grad_norm": 0.36371703847741926, "learning_rate": 0.0001972484063831544, "loss": 3.040928363800049, "step": 4132, "token_acc": 0.2956311746497446 }, { "epoch": 2.42274992670771, "grad_norm": 0.28469923903113414, "learning_rate": 0.00019724614796692397, "loss": 3.079281806945801, "step": 4133, "token_acc": 0.29095520762785015 }, { "epoch": 2.4233362650249193, "grad_norm": 0.36273128920223086, "learning_rate": 0.00019724388863719703, "loss": 3.025628089904785, "step": 4134, "token_acc": 0.2965192001995218 }, { "epoch": 2.4239226033421284, "grad_norm": 0.3570521791072007, "learning_rate": 0.0001972416283939948, "loss": 3.0567104816436768, "step": 4135, "token_acc": 0.2926176922381856 }, { "epoch": 2.4245089416593375, "grad_norm": 0.34870473101868044, "learning_rate": 0.00019723936723733848, "loss": 3.0123395919799805, "step": 4136, "token_acc": 0.299218596233823 }, { "epoch": 2.4250952799765466, "grad_norm": 0.3971293349911346, "learning_rate": 0.00019723710516724935, "loss": 3.111453056335449, "step": 4137, "token_acc": 0.2853437941544416 }, { "epoch": 2.4256816182937557, "grad_norm": 0.36110919750980897, "learning_rate": 0.00019723484218374865, "loss": 3.0785531997680664, "step": 4138, "token_acc": 0.2899353374074712 }, { "epoch": 2.4262679566109644, "grad_norm": 0.33600988158608386, "learning_rate": 0.00019723257828685765, "loss": 3.0615620613098145, "step": 4139, "token_acc": 0.29270153296868673 }, { "epoch": 2.4268542949281735, "grad_norm": 0.3933557955203856, "learning_rate": 0.00019723031347659758, "loss": 3.048780679702759, "step": 4140, "token_acc": 0.2940633466177132 }, { "epoch": 2.4274406332453826, "grad_norm": 0.3837126450145685, "learning_rate": 0.00019722804775298974, "loss": 3.0851893424987793, "step": 4141, "token_acc": 0.28838084363768474 }, { "epoch": 2.4280269715625917, "grad_norm": 0.3324783939656905, "learning_rate": 0.0001972257811160554, "loss": 3.0466110706329346, "step": 4142, "token_acc": 0.2926182655251791 }, { "epoch": 2.4286133098798004, "grad_norm": 0.3478762704169274, "learning_rate": 0.00019722351356581586, "loss": 3.0483133792877197, "step": 4143, "token_acc": 0.2948984068220969 }, { "epoch": 2.4291996481970095, "grad_norm": 0.3069792766496504, "learning_rate": 0.00019722124510229244, "loss": 3.065051317214966, "step": 4144, "token_acc": 0.2908824360335046 }, { "epoch": 2.4297859865142186, "grad_norm": 0.36254686565607297, "learning_rate": 0.0001972189757255064, "loss": 3.0799670219421387, "step": 4145, "token_acc": 0.29067325198636323 }, { "epoch": 2.4303723248314277, "grad_norm": 0.36935567914896816, "learning_rate": 0.00019721670543547908, "loss": 3.106159210205078, "step": 4146, "token_acc": 0.2846457553137973 }, { "epoch": 2.430958663148637, "grad_norm": 0.2964785609075707, "learning_rate": 0.00019721443423223185, "loss": 3.071908473968506, "step": 4147, "token_acc": 0.2901360978474581 }, { "epoch": 2.431545001465846, "grad_norm": 0.33990902720305727, "learning_rate": 0.000197212162115786, "loss": 3.049102783203125, "step": 4148, "token_acc": 0.294573433173412 }, { "epoch": 2.432131339783055, "grad_norm": 0.34388051718932294, "learning_rate": 0.00019720988908616288, "loss": 3.123523473739624, "step": 4149, "token_acc": 0.2840472310199563 }, { "epoch": 2.4327176781002637, "grad_norm": 0.3391233561606437, "learning_rate": 0.00019720761514338385, "loss": 3.0774483680725098, "step": 4150, "token_acc": 0.2912793178095323 }, { "epoch": 2.433304016417473, "grad_norm": 0.3827550094511301, "learning_rate": 0.00019720534028747023, "loss": 3.109848976135254, "step": 4151, "token_acc": 0.2850491157108364 }, { "epoch": 2.433890354734682, "grad_norm": 0.395406057378136, "learning_rate": 0.00019720306451844345, "loss": 3.0666651725769043, "step": 4152, "token_acc": 0.29243923704860775 }, { "epoch": 2.434476693051891, "grad_norm": 0.33043570258587207, "learning_rate": 0.00019720078783632485, "loss": 3.0184414386749268, "step": 4153, "token_acc": 0.2987525882262023 }, { "epoch": 2.4350630313690997, "grad_norm": 0.3005273790691233, "learning_rate": 0.00019719851024113585, "loss": 3.0411694049835205, "step": 4154, "token_acc": 0.2944877148633673 }, { "epoch": 2.435649369686309, "grad_norm": 0.32225554289521124, "learning_rate": 0.0001971962317328978, "loss": 3.0806777477264404, "step": 4155, "token_acc": 0.2903598560683219 }, { "epoch": 2.436235708003518, "grad_norm": 0.40039868801302597, "learning_rate": 0.00019719395231163213, "loss": 3.0519824028015137, "step": 4156, "token_acc": 0.293357453376935 }, { "epoch": 2.436822046320727, "grad_norm": 0.3390099732280655, "learning_rate": 0.00019719167197736025, "loss": 3.105412721633911, "step": 4157, "token_acc": 0.28408085232659214 }, { "epoch": 2.437408384637936, "grad_norm": 0.34405013110154337, "learning_rate": 0.00019718939073010358, "loss": 3.063797950744629, "step": 4158, "token_acc": 0.29279636802336656 }, { "epoch": 2.4379947229551453, "grad_norm": 0.36500065444887214, "learning_rate": 0.0001971871085698836, "loss": 3.10847806930542, "step": 4159, "token_acc": 0.2860872122950217 }, { "epoch": 2.438581061272354, "grad_norm": 0.3187053065425624, "learning_rate": 0.0001971848254967216, "loss": 3.0630266666412354, "step": 4160, "token_acc": 0.29082313786027497 }, { "epoch": 2.439167399589563, "grad_norm": 0.33857458848324584, "learning_rate": 0.00019718254151063918, "loss": 3.052246570587158, "step": 4161, "token_acc": 0.2940540258792562 }, { "epoch": 2.439753737906772, "grad_norm": 0.3179457269775509, "learning_rate": 0.00019718025661165776, "loss": 3.072429656982422, "step": 4162, "token_acc": 0.29034941763727123 }, { "epoch": 2.4403400762239813, "grad_norm": 0.30982036407592634, "learning_rate": 0.00019717797079979872, "loss": 3.072239398956299, "step": 4163, "token_acc": 0.2904579133023296 }, { "epoch": 2.4409264145411904, "grad_norm": 0.3382823840619922, "learning_rate": 0.00019717568407508362, "loss": 3.066800832748413, "step": 4164, "token_acc": 0.29034281361097003 }, { "epoch": 2.441512752858399, "grad_norm": 0.3502895905393527, "learning_rate": 0.00019717339643753393, "loss": 3.0801172256469727, "step": 4165, "token_acc": 0.2905032450031264 }, { "epoch": 2.442099091175608, "grad_norm": 0.3267901712832557, "learning_rate": 0.0001971711078871711, "loss": 3.0572760105133057, "step": 4166, "token_acc": 0.2924925370841633 }, { "epoch": 2.4426854294928173, "grad_norm": 0.2941538290492267, "learning_rate": 0.00019716881842401666, "loss": 3.1201086044311523, "step": 4167, "token_acc": 0.2841321382786749 }, { "epoch": 2.4432717678100264, "grad_norm": 0.37166343604598084, "learning_rate": 0.00019716652804809213, "loss": 3.0689282417297363, "step": 4168, "token_acc": 0.2922231580636105 }, { "epoch": 2.4438581061272355, "grad_norm": 0.47423055990950175, "learning_rate": 0.00019716423675941898, "loss": 3.1105926036834717, "step": 4169, "token_acc": 0.28560351107131016 }, { "epoch": 2.4444444444444446, "grad_norm": 0.4801363226790007, "learning_rate": 0.00019716194455801875, "loss": 3.09112548828125, "step": 4170, "token_acc": 0.2895343922349478 }, { "epoch": 2.4450307827616533, "grad_norm": 0.3155882264602728, "learning_rate": 0.000197159651443913, "loss": 3.0731444358825684, "step": 4171, "token_acc": 0.29129294559448804 }, { "epoch": 2.4456171210788624, "grad_norm": 0.39674761151744875, "learning_rate": 0.0001971573574171232, "loss": 3.089888572692871, "step": 4172, "token_acc": 0.2871859881094699 }, { "epoch": 2.4462034593960715, "grad_norm": 0.3415928025320668, "learning_rate": 0.000197155062477671, "loss": 3.0826191902160645, "step": 4173, "token_acc": 0.28909895802120433 }, { "epoch": 2.4467897977132806, "grad_norm": 0.392140189451025, "learning_rate": 0.0001971527666255779, "loss": 3.0924389362335205, "step": 4174, "token_acc": 0.2880713360995019 }, { "epoch": 2.4473761360304898, "grad_norm": 0.34498438867737363, "learning_rate": 0.00019715046986086546, "loss": 3.0283966064453125, "step": 4175, "token_acc": 0.2980381199901945 }, { "epoch": 2.4479624743476984, "grad_norm": 0.402308367945163, "learning_rate": 0.00019714817218355525, "loss": 3.1077630519866943, "step": 4176, "token_acc": 0.28314003356330414 }, { "epoch": 2.4485488126649075, "grad_norm": 0.40895309108295946, "learning_rate": 0.00019714587359366892, "loss": 3.064359188079834, "step": 4177, "token_acc": 0.29165812127618174 }, { "epoch": 2.4491351509821166, "grad_norm": 0.3906467171937156, "learning_rate": 0.00019714357409122797, "loss": 3.0735812187194824, "step": 4178, "token_acc": 0.2896861058941919 }, { "epoch": 2.4497214892993258, "grad_norm": 0.37923065082947793, "learning_rate": 0.0001971412736762541, "loss": 3.0578784942626953, "step": 4179, "token_acc": 0.29231884439992617 }, { "epoch": 2.450307827616535, "grad_norm": 0.3535407469192918, "learning_rate": 0.0001971389723487688, "loss": 3.045675277709961, "step": 4180, "token_acc": 0.2945538274346544 }, { "epoch": 2.450894165933744, "grad_norm": 0.3384029661148089, "learning_rate": 0.00019713667010879375, "loss": 3.067142963409424, "step": 4181, "token_acc": 0.29043250063591924 }, { "epoch": 2.4514805042509527, "grad_norm": 0.3657740464408111, "learning_rate": 0.0001971343669563506, "loss": 3.0889322757720947, "step": 4182, "token_acc": 0.28872409223152506 }, { "epoch": 2.4520668425681618, "grad_norm": 0.40084416027978254, "learning_rate": 0.00019713206289146098, "loss": 3.1017560958862305, "step": 4183, "token_acc": 0.2857810464427161 }, { "epoch": 2.452653180885371, "grad_norm": 0.3185543569110051, "learning_rate": 0.0001971297579141465, "loss": 3.083775043487549, "step": 4184, "token_acc": 0.29060125416958976 }, { "epoch": 2.45323951920258, "grad_norm": 0.37930038598613913, "learning_rate": 0.0001971274520244288, "loss": 3.050126314163208, "step": 4185, "token_acc": 0.2910102499210684 }, { "epoch": 2.453825857519789, "grad_norm": 0.31118492689585786, "learning_rate": 0.0001971251452223296, "loss": 3.004601001739502, "step": 4186, "token_acc": 0.30126722682741003 }, { "epoch": 2.4544121958369978, "grad_norm": 0.3790168078683845, "learning_rate": 0.00019712283750787055, "loss": 3.053802967071533, "step": 4187, "token_acc": 0.29303486688547564 }, { "epoch": 2.454998534154207, "grad_norm": 0.41641676076270695, "learning_rate": 0.0001971205288810733, "loss": 3.118191957473755, "step": 4188, "token_acc": 0.2840567209043978 }, { "epoch": 2.455584872471416, "grad_norm": 0.31704039251858346, "learning_rate": 0.00019711821934195956, "loss": 3.0292046070098877, "step": 4189, "token_acc": 0.29699596927523003 }, { "epoch": 2.456171210788625, "grad_norm": 0.3862951830052431, "learning_rate": 0.000197115908890551, "loss": 3.09126353263855, "step": 4190, "token_acc": 0.28726345927363345 }, { "epoch": 2.456757549105834, "grad_norm": 0.39928113181654984, "learning_rate": 0.00019711359752686938, "loss": 2.996582508087158, "step": 4191, "token_acc": 0.29963132098285616 }, { "epoch": 2.4573438874230433, "grad_norm": 0.3638933434570716, "learning_rate": 0.00019711128525093635, "loss": 3.0232417583465576, "step": 4192, "token_acc": 0.2977536056808513 }, { "epoch": 2.457930225740252, "grad_norm": 0.34467744356244073, "learning_rate": 0.00019710897206277363, "loss": 3.0724964141845703, "step": 4193, "token_acc": 0.29059954949349764 }, { "epoch": 2.458516564057461, "grad_norm": 0.4193261392216931, "learning_rate": 0.000197106657962403, "loss": 3.0611634254455566, "step": 4194, "token_acc": 0.2911900609454875 }, { "epoch": 2.45910290237467, "grad_norm": 0.3358736270166866, "learning_rate": 0.00019710434294984618, "loss": 3.071831226348877, "step": 4195, "token_acc": 0.29266754270696455 }, { "epoch": 2.4596892406918793, "grad_norm": 0.4862797502874084, "learning_rate": 0.0001971020270251249, "loss": 3.093724012374878, "step": 4196, "token_acc": 0.28651427707351496 }, { "epoch": 2.460275579009088, "grad_norm": 0.42867232936043154, "learning_rate": 0.00019709971018826088, "loss": 3.072751760482788, "step": 4197, "token_acc": 0.29046735795523176 }, { "epoch": 2.460861917326297, "grad_norm": 0.36915236645100996, "learning_rate": 0.00019709739243927595, "loss": 3.047736883163452, "step": 4198, "token_acc": 0.29470716282661996 }, { "epoch": 2.4614482556435062, "grad_norm": 0.3746889346217329, "learning_rate": 0.00019709507377819189, "loss": 3.0751123428344727, "step": 4199, "token_acc": 0.2896070464584206 }, { "epoch": 2.4620345939607153, "grad_norm": 0.3841745057035483, "learning_rate": 0.00019709275420503044, "loss": 3.0227293968200684, "step": 4200, "token_acc": 0.2971805455803467 }, { "epoch": 2.4626209322779244, "grad_norm": 0.34086264104313463, "learning_rate": 0.00019709043371981337, "loss": 3.060875654220581, "step": 4201, "token_acc": 0.29305365597758154 }, { "epoch": 2.4632072705951336, "grad_norm": 0.3438539940142994, "learning_rate": 0.00019708811232256251, "loss": 3.1130895614624023, "step": 4202, "token_acc": 0.2862487403207026 }, { "epoch": 2.4637936089123427, "grad_norm": 0.4128262369767911, "learning_rate": 0.0001970857900132997, "loss": 3.1298396587371826, "step": 4203, "token_acc": 0.2837900470024483 }, { "epoch": 2.4643799472295513, "grad_norm": 0.3043408648852795, "learning_rate": 0.0001970834667920467, "loss": 3.0559239387512207, "step": 4204, "token_acc": 0.29351395949218856 }, { "epoch": 2.4649662855467604, "grad_norm": 0.35043657334144934, "learning_rate": 0.00019708114265882534, "loss": 3.078447103500366, "step": 4205, "token_acc": 0.2899473147295848 }, { "epoch": 2.4655526238639696, "grad_norm": 0.39087018238146287, "learning_rate": 0.00019707881761365744, "loss": 3.044036388397217, "step": 4206, "token_acc": 0.2941168844638042 }, { "epoch": 2.4661389621811787, "grad_norm": 0.4085056434103663, "learning_rate": 0.00019707649165656493, "loss": 3.0631327629089355, "step": 4207, "token_acc": 0.29160898828903375 }, { "epoch": 2.4667253004983873, "grad_norm": 0.39348139726028375, "learning_rate": 0.00019707416478756954, "loss": 3.1001412868499756, "step": 4208, "token_acc": 0.2875622587837119 }, { "epoch": 2.4673116388155965, "grad_norm": 0.32406822916694694, "learning_rate": 0.0001970718370066932, "loss": 3.0393834114074707, "step": 4209, "token_acc": 0.294212247841233 }, { "epoch": 2.4678979771328056, "grad_norm": 0.34330933735561014, "learning_rate": 0.00019706950831395776, "loss": 3.063748836517334, "step": 4210, "token_acc": 0.291339089425369 }, { "epoch": 2.4684843154500147, "grad_norm": 0.3742329277997824, "learning_rate": 0.0001970671787093851, "loss": 3.083432197570801, "step": 4211, "token_acc": 0.28902603814770544 }, { "epoch": 2.469070653767224, "grad_norm": 0.3129515303554952, "learning_rate": 0.00019706484819299706, "loss": 3.12119197845459, "step": 4212, "token_acc": 0.28283226923745997 }, { "epoch": 2.469656992084433, "grad_norm": 0.3931215599906331, "learning_rate": 0.0001970625167648156, "loss": 3.0883567333221436, "step": 4213, "token_acc": 0.286826200462866 }, { "epoch": 2.4702433304016416, "grad_norm": 0.31525792328379915, "learning_rate": 0.00019706018442486255, "loss": 3.094846248626709, "step": 4214, "token_acc": 0.28654941448654464 }, { "epoch": 2.4708296687188507, "grad_norm": 0.3961034145120364, "learning_rate": 0.00019705785117315992, "loss": 3.084347724914551, "step": 4215, "token_acc": 0.289566771899129 }, { "epoch": 2.47141600703606, "grad_norm": 0.3596066696582057, "learning_rate": 0.0001970555170097295, "loss": 3.038320779800415, "step": 4216, "token_acc": 0.29354741857581096 }, { "epoch": 2.472002345353269, "grad_norm": 0.32150312180483404, "learning_rate": 0.00019705318193459333, "loss": 3.0578691959381104, "step": 4217, "token_acc": 0.2936278439578622 }, { "epoch": 2.472588683670478, "grad_norm": 0.3587195332214216, "learning_rate": 0.00019705084594777328, "loss": 3.031719446182251, "step": 4218, "token_acc": 0.2964798569891725 }, { "epoch": 2.4731750219876867, "grad_norm": 0.3200242024476419, "learning_rate": 0.00019704850904929131, "loss": 3.0855536460876465, "step": 4219, "token_acc": 0.28723426741562347 }, { "epoch": 2.473761360304896, "grad_norm": 0.36876044848082185, "learning_rate": 0.00019704617123916937, "loss": 3.0524094104766846, "step": 4220, "token_acc": 0.29610902156062935 }, { "epoch": 2.474347698622105, "grad_norm": 0.37855788483316366, "learning_rate": 0.00019704383251742944, "loss": 3.0626096725463867, "step": 4221, "token_acc": 0.29265976814262995 }, { "epoch": 2.474934036939314, "grad_norm": 0.38244532946259363, "learning_rate": 0.00019704149288409344, "loss": 3.053119421005249, "step": 4222, "token_acc": 0.2910223390555656 }, { "epoch": 2.475520375256523, "grad_norm": 0.35240126209972156, "learning_rate": 0.0001970391523391834, "loss": 3.0764575004577637, "step": 4223, "token_acc": 0.2894879376786543 }, { "epoch": 2.4761067135737322, "grad_norm": 0.37133657307962803, "learning_rate": 0.00019703681088272128, "loss": 3.0852982997894287, "step": 4224, "token_acc": 0.28919917156553326 }, { "epoch": 2.476693051890941, "grad_norm": 0.345985293597402, "learning_rate": 0.00019703446851472909, "loss": 3.076150894165039, "step": 4225, "token_acc": 0.29017247300832283 }, { "epoch": 2.47727939020815, "grad_norm": 0.3560659436054095, "learning_rate": 0.00019703212523522877, "loss": 3.042570114135742, "step": 4226, "token_acc": 0.29543749787647766 }, { "epoch": 2.477865728525359, "grad_norm": 0.3464179235906566, "learning_rate": 0.00019702978104424245, "loss": 3.071335554122925, "step": 4227, "token_acc": 0.29064548444858135 }, { "epoch": 2.4784520668425682, "grad_norm": 0.33198616086563704, "learning_rate": 0.00019702743594179206, "loss": 3.064554452896118, "step": 4228, "token_acc": 0.29197052038335886 }, { "epoch": 2.4790384051597774, "grad_norm": 0.3584311210096221, "learning_rate": 0.00019702508992789969, "loss": 3.090202808380127, "step": 4229, "token_acc": 0.2888358959257935 }, { "epoch": 2.479624743476986, "grad_norm": 0.38222222078627915, "learning_rate": 0.0001970227430025873, "loss": 2.983476161956787, "step": 4230, "token_acc": 0.3023422304561807 }, { "epoch": 2.480211081794195, "grad_norm": 0.3935060002683657, "learning_rate": 0.000197020395165877, "loss": 3.04555606842041, "step": 4231, "token_acc": 0.2929944550005954 }, { "epoch": 2.4807974201114043, "grad_norm": 0.2911962759615048, "learning_rate": 0.00019701804641779084, "loss": 3.080777168273926, "step": 4232, "token_acc": 0.29016147739369447 }, { "epoch": 2.4813837584286134, "grad_norm": 0.3708685431130037, "learning_rate": 0.00019701569675835084, "loss": 3.052978754043579, "step": 4233, "token_acc": 0.2930309012402967 }, { "epoch": 2.4819700967458225, "grad_norm": 0.4310917365080607, "learning_rate": 0.00019701334618757907, "loss": 3.0925943851470947, "step": 4234, "token_acc": 0.2857361497695195 }, { "epoch": 2.4825564350630316, "grad_norm": 0.3066174501027044, "learning_rate": 0.0001970109947054977, "loss": 3.057028293609619, "step": 4235, "token_acc": 0.294473452656315 }, { "epoch": 2.4831427733802403, "grad_norm": 0.35815747343620646, "learning_rate": 0.00019700864231212873, "loss": 3.091637134552002, "step": 4236, "token_acc": 0.28774892706579397 }, { "epoch": 2.4837291116974494, "grad_norm": 0.42772081640843995, "learning_rate": 0.00019700628900749426, "loss": 3.0547335147857666, "step": 4237, "token_acc": 0.2929629600377008 }, { "epoch": 2.4843154500146585, "grad_norm": 0.35350017000636264, "learning_rate": 0.00019700393479161647, "loss": 3.049715518951416, "step": 4238, "token_acc": 0.2955851520319816 }, { "epoch": 2.4849017883318676, "grad_norm": 0.3242280283403194, "learning_rate": 0.00019700157966451743, "loss": 3.0756049156188965, "step": 4239, "token_acc": 0.2884507483660296 }, { "epoch": 2.4854881266490767, "grad_norm": 0.32674039912560665, "learning_rate": 0.00019699922362621924, "loss": 3.114276885986328, "step": 4240, "token_acc": 0.284856702747494 }, { "epoch": 2.4860744649662854, "grad_norm": 0.2950398621703499, "learning_rate": 0.00019699686667674405, "loss": 3.0686473846435547, "step": 4241, "token_acc": 0.28999309200380324 }, { "epoch": 2.4866608032834945, "grad_norm": 0.3252651916377247, "learning_rate": 0.00019699450881611398, "loss": 3.041412353515625, "step": 4242, "token_acc": 0.2955505935145705 }, { "epoch": 2.4872471416007036, "grad_norm": 0.3156980311698982, "learning_rate": 0.00019699215004435124, "loss": 3.0570967197418213, "step": 4243, "token_acc": 0.2916413015602455 }, { "epoch": 2.4878334799179127, "grad_norm": 0.35878732717885875, "learning_rate": 0.00019698979036147793, "loss": 3.0648746490478516, "step": 4244, "token_acc": 0.29183389127067433 }, { "epoch": 2.488419818235122, "grad_norm": 0.3529111266813683, "learning_rate": 0.00019698742976751623, "loss": 3.0728635787963867, "step": 4245, "token_acc": 0.29015767661550174 }, { "epoch": 2.489006156552331, "grad_norm": 0.32780756168528424, "learning_rate": 0.00019698506826248835, "loss": 3.0516066551208496, "step": 4246, "token_acc": 0.2932093497162575 }, { "epoch": 2.4895924948695396, "grad_norm": 0.49324166135780145, "learning_rate": 0.00019698270584641642, "loss": 3.0820698738098145, "step": 4247, "token_acc": 0.2889302644889001 }, { "epoch": 2.4901788331867487, "grad_norm": 0.6081554803831578, "learning_rate": 0.00019698034251932264, "loss": 3.0680360794067383, "step": 4248, "token_acc": 0.29175285041889987 }, { "epoch": 2.490765171503958, "grad_norm": 0.38874890448722127, "learning_rate": 0.00019697797828122923, "loss": 3.059863328933716, "step": 4249, "token_acc": 0.29178568568301344 }, { "epoch": 2.491351509821167, "grad_norm": 0.36229122970867705, "learning_rate": 0.0001969756131321584, "loss": 3.071688652038574, "step": 4250, "token_acc": 0.29014610731667206 }, { "epoch": 2.4919378481383756, "grad_norm": 0.34572328325591845, "learning_rate": 0.0001969732470721324, "loss": 3.057602882385254, "step": 4251, "token_acc": 0.2933786700329845 }, { "epoch": 2.4925241864555847, "grad_norm": 0.3962962759369881, "learning_rate": 0.00019697088010117337, "loss": 3.062619209289551, "step": 4252, "token_acc": 0.29112977420963093 }, { "epoch": 2.493110524772794, "grad_norm": 0.3245900387261144, "learning_rate": 0.0001969685122193036, "loss": 3.086968421936035, "step": 4253, "token_acc": 0.28669597332984703 }, { "epoch": 2.493696863090003, "grad_norm": 0.35953877848511784, "learning_rate": 0.00019696614342654532, "loss": 3.055788278579712, "step": 4254, "token_acc": 0.29377657207499425 }, { "epoch": 2.494283201407212, "grad_norm": 0.3106456372865276, "learning_rate": 0.0001969637737229208, "loss": 3.0775251388549805, "step": 4255, "token_acc": 0.29064671269905135 }, { "epoch": 2.494869539724421, "grad_norm": 0.37130104272404985, "learning_rate": 0.0001969614031084523, "loss": 3.0594663619995117, "step": 4256, "token_acc": 0.2911690455384018 }, { "epoch": 2.49545587804163, "grad_norm": 0.310730363746335, "learning_rate": 0.00019695903158316205, "loss": 3.0579566955566406, "step": 4257, "token_acc": 0.292399312845963 }, { "epoch": 2.496042216358839, "grad_norm": 0.3647755546587646, "learning_rate": 0.00019695665914707235, "loss": 3.0624704360961914, "step": 4258, "token_acc": 0.29135647986821683 }, { "epoch": 2.496628554676048, "grad_norm": 0.34203754206018394, "learning_rate": 0.0001969542858002055, "loss": 3.0100512504577637, "step": 4259, "token_acc": 0.29760081941083943 }, { "epoch": 2.497214892993257, "grad_norm": 0.30909799388302905, "learning_rate": 0.0001969519115425838, "loss": 3.025747776031494, "step": 4260, "token_acc": 0.29853345937504894 }, { "epoch": 2.4978012313104663, "grad_norm": 0.35429943796887864, "learning_rate": 0.00019694953637422948, "loss": 3.1122536659240723, "step": 4261, "token_acc": 0.28549065142004076 }, { "epoch": 2.498387569627675, "grad_norm": 0.2958711474779633, "learning_rate": 0.00019694716029516497, "loss": 3.0388975143432617, "step": 4262, "token_acc": 0.29537905071233483 }, { "epoch": 2.498973907944884, "grad_norm": 0.33326262913838706, "learning_rate": 0.00019694478330541245, "loss": 3.065325975418091, "step": 4263, "token_acc": 0.291666772528983 }, { "epoch": 2.499560246262093, "grad_norm": 0.29915169741530456, "learning_rate": 0.0001969424054049944, "loss": 3.1092684268951416, "step": 4264, "token_acc": 0.28678721240790683 }, { "epoch": 2.5001465845793023, "grad_norm": 0.3586325927376122, "learning_rate": 0.00019694002659393305, "loss": 3.059685707092285, "step": 4265, "token_acc": 0.29272692922772353 }, { "epoch": 2.5007329228965114, "grad_norm": 0.2951741782608592, "learning_rate": 0.00019693764687225078, "loss": 3.0540084838867188, "step": 4266, "token_acc": 0.29301378916298704 }, { "epoch": 2.5013192612137205, "grad_norm": 0.36848177083445705, "learning_rate": 0.00019693526623996993, "loss": 3.0449187755584717, "step": 4267, "token_acc": 0.2955846940369666 }, { "epoch": 2.5019055995309296, "grad_norm": 0.38196588176099133, "learning_rate": 0.00019693288469711294, "loss": 3.069197177886963, "step": 4268, "token_acc": 0.2907751230289706 }, { "epoch": 2.5024919378481383, "grad_norm": 0.38074972265356, "learning_rate": 0.00019693050224370203, "loss": 3.067047357559204, "step": 4269, "token_acc": 0.290188198268282 }, { "epoch": 2.5030782761653474, "grad_norm": 0.29349626900861914, "learning_rate": 0.00019692811887975974, "loss": 3.0979738235473633, "step": 4270, "token_acc": 0.2871397754015111 }, { "epoch": 2.5036646144825565, "grad_norm": 0.3362206857377582, "learning_rate": 0.00019692573460530834, "loss": 3.0864076614379883, "step": 4271, "token_acc": 0.2898633302262075 }, { "epoch": 2.5042509527997656, "grad_norm": 0.30654983838389, "learning_rate": 0.00019692334942037027, "loss": 3.039487361907959, "step": 4272, "token_acc": 0.29460107762716564 }, { "epoch": 2.5048372911169743, "grad_norm": 0.3096373344717664, "learning_rate": 0.00019692096332496798, "loss": 3.075105667114258, "step": 4273, "token_acc": 0.2879035124163442 }, { "epoch": 2.5054236294341834, "grad_norm": 0.33270065585083247, "learning_rate": 0.00019691857631912377, "loss": 3.098588466644287, "step": 4274, "token_acc": 0.28900199982980174 }, { "epoch": 2.5060099677513925, "grad_norm": 0.2912978189495693, "learning_rate": 0.0001969161884028602, "loss": 3.0480523109436035, "step": 4275, "token_acc": 0.29444588769835167 }, { "epoch": 2.5065963060686016, "grad_norm": 0.3875537821570306, "learning_rate": 0.00019691379957619963, "loss": 3.0992660522460938, "step": 4276, "token_acc": 0.28795357292537016 }, { "epoch": 2.5071826443858107, "grad_norm": 0.3573293807075769, "learning_rate": 0.00019691140983916448, "loss": 3.066530227661133, "step": 4277, "token_acc": 0.29093963155705216 }, { "epoch": 2.50776898270302, "grad_norm": 0.3204927335745619, "learning_rate": 0.00019690901919177723, "loss": 3.0874500274658203, "step": 4278, "token_acc": 0.2891207428220814 }, { "epoch": 2.5083553210202285, "grad_norm": 0.39469853255710097, "learning_rate": 0.00019690662763406034, "loss": 3.0678510665893555, "step": 4279, "token_acc": 0.292323678262773 }, { "epoch": 2.5089416593374376, "grad_norm": 0.33547454289248085, "learning_rate": 0.00019690423516603627, "loss": 3.06976318359375, "step": 4280, "token_acc": 0.29084254057102277 }, { "epoch": 2.5095279976546467, "grad_norm": 0.2846069944961093, "learning_rate": 0.00019690184178772747, "loss": 3.0748887062072754, "step": 4281, "token_acc": 0.2905438016791925 }, { "epoch": 2.510114335971856, "grad_norm": 0.2884888568963429, "learning_rate": 0.00019689944749915646, "loss": 3.0691356658935547, "step": 4282, "token_acc": 0.29161151110106653 }, { "epoch": 2.5107006742890645, "grad_norm": 0.2948983658280612, "learning_rate": 0.00019689705230034572, "loss": 3.0616114139556885, "step": 4283, "token_acc": 0.2921807189089474 }, { "epoch": 2.5112870126062736, "grad_norm": 0.35217252631866836, "learning_rate": 0.00019689465619131773, "loss": 3.0825552940368652, "step": 4284, "token_acc": 0.28761349902383093 }, { "epoch": 2.5118733509234827, "grad_norm": 0.3087504648428041, "learning_rate": 0.00019689225917209502, "loss": 3.0944318771362305, "step": 4285, "token_acc": 0.2890250681631681 }, { "epoch": 2.512459689240692, "grad_norm": 0.3145846193285609, "learning_rate": 0.0001968898612427001, "loss": 3.076207160949707, "step": 4286, "token_acc": 0.2914058492948969 }, { "epoch": 2.513046027557901, "grad_norm": 0.2654875367016934, "learning_rate": 0.0001968874624031555, "loss": 3.073927164077759, "step": 4287, "token_acc": 0.2920855189750035 }, { "epoch": 2.51363236587511, "grad_norm": 0.2996510314217945, "learning_rate": 0.00019688506265348372, "loss": 3.0828936100006104, "step": 4288, "token_acc": 0.28901591721442055 }, { "epoch": 2.514218704192319, "grad_norm": 0.3479351354338647, "learning_rate": 0.00019688266199370736, "loss": 3.0713021755218506, "step": 4289, "token_acc": 0.290365323228964 }, { "epoch": 2.514805042509528, "grad_norm": 0.2952253270233297, "learning_rate": 0.00019688026042384893, "loss": 3.0414981842041016, "step": 4290, "token_acc": 0.2945317903560479 }, { "epoch": 2.515391380826737, "grad_norm": 0.29482419781438785, "learning_rate": 0.000196877857943931, "loss": 3.053361415863037, "step": 4291, "token_acc": 0.29271100805663475 }, { "epoch": 2.515977719143946, "grad_norm": 0.29923135721811017, "learning_rate": 0.00019687545455397617, "loss": 3.074878454208374, "step": 4292, "token_acc": 0.2909384444501847 }, { "epoch": 2.516564057461155, "grad_norm": 0.3153730248985017, "learning_rate": 0.00019687305025400693, "loss": 3.07342529296875, "step": 4293, "token_acc": 0.29024290470741143 }, { "epoch": 2.517150395778364, "grad_norm": 0.3277224471189852, "learning_rate": 0.00019687064504404596, "loss": 3.0775301456451416, "step": 4294, "token_acc": 0.2902218583664022 }, { "epoch": 2.517736734095573, "grad_norm": 0.38624938110520857, "learning_rate": 0.0001968682389241158, "loss": 3.0417943000793457, "step": 4295, "token_acc": 0.29495728555743017 }, { "epoch": 2.518323072412782, "grad_norm": 0.30754087268882213, "learning_rate": 0.00019686583189423905, "loss": 3.060079574584961, "step": 4296, "token_acc": 0.292336483329794 }, { "epoch": 2.518909410729991, "grad_norm": 0.36677655515284674, "learning_rate": 0.00019686342395443837, "loss": 3.0611917972564697, "step": 4297, "token_acc": 0.29271803772429106 }, { "epoch": 2.5194957490472003, "grad_norm": 0.4774930882813341, "learning_rate": 0.00019686101510473633, "loss": 3.1029200553894043, "step": 4298, "token_acc": 0.28623283342838207 }, { "epoch": 2.5200820873644094, "grad_norm": 0.5453976021391657, "learning_rate": 0.0001968586053451556, "loss": 3.0844669342041016, "step": 4299, "token_acc": 0.28902091615977105 }, { "epoch": 2.5206684256816185, "grad_norm": 0.36815015962285796, "learning_rate": 0.00019685619467571877, "loss": 3.0689823627471924, "step": 4300, "token_acc": 0.29033517273195053 }, { "epoch": 2.521254763998827, "grad_norm": 0.3548288985881831, "learning_rate": 0.00019685378309644848, "loss": 3.0501694679260254, "step": 4301, "token_acc": 0.293307648017908 }, { "epoch": 2.5218411023160363, "grad_norm": 0.31235179900632937, "learning_rate": 0.00019685137060736744, "loss": 3.057091236114502, "step": 4302, "token_acc": 0.29191728232090075 }, { "epoch": 2.5224274406332454, "grad_norm": 0.3231893918214211, "learning_rate": 0.0001968489572084983, "loss": 3.0587756633758545, "step": 4303, "token_acc": 0.2907863910068024 }, { "epoch": 2.5230137789504545, "grad_norm": 0.31483750730230553, "learning_rate": 0.0001968465428998637, "loss": 3.045316696166992, "step": 4304, "token_acc": 0.2941504803232982 }, { "epoch": 2.523600117267663, "grad_norm": 0.33571919566464925, "learning_rate": 0.0001968441276814863, "loss": 3.056135892868042, "step": 4305, "token_acc": 0.29355658305517124 }, { "epoch": 2.5241864555848723, "grad_norm": 0.3018960509991811, "learning_rate": 0.00019684171155338884, "loss": 3.066023349761963, "step": 4306, "token_acc": 0.2917258974794195 }, { "epoch": 2.5247727939020814, "grad_norm": 0.31703761013763104, "learning_rate": 0.000196839294515594, "loss": 3.057170867919922, "step": 4307, "token_acc": 0.29054298999133105 }, { "epoch": 2.5253591322192905, "grad_norm": 0.23553694752904497, "learning_rate": 0.0001968368765681245, "loss": 3.0816755294799805, "step": 4308, "token_acc": 0.28833104949115657 }, { "epoch": 2.5259454705364996, "grad_norm": 0.35316999371866, "learning_rate": 0.00019683445771100303, "loss": 3.082458257675171, "step": 4309, "token_acc": 0.28933210420507693 }, { "epoch": 2.5265318088537088, "grad_norm": 0.3305611543492661, "learning_rate": 0.00019683203794425226, "loss": 3.0755796432495117, "step": 4310, "token_acc": 0.2882406994284743 }, { "epoch": 2.527118147170918, "grad_norm": 0.3154034592729068, "learning_rate": 0.00019682961726789504, "loss": 3.040978193283081, "step": 4311, "token_acc": 0.2952644091414348 }, { "epoch": 2.5277044854881265, "grad_norm": 0.3168003929354628, "learning_rate": 0.00019682719568195402, "loss": 3.0796010494232178, "step": 4312, "token_acc": 0.28949092533018855 }, { "epoch": 2.5282908238053357, "grad_norm": 0.2833203244253017, "learning_rate": 0.00019682477318645197, "loss": 3.055309295654297, "step": 4313, "token_acc": 0.29305594756679665 }, { "epoch": 2.5288771621225448, "grad_norm": 0.3111957412581656, "learning_rate": 0.00019682234978141166, "loss": 3.133759021759033, "step": 4314, "token_acc": 0.28245084441543605 }, { "epoch": 2.529463500439754, "grad_norm": 0.319591606272368, "learning_rate": 0.0001968199254668558, "loss": 3.0856704711914062, "step": 4315, "token_acc": 0.28993842996408414 }, { "epoch": 2.5300498387569625, "grad_norm": 0.31273918054458233, "learning_rate": 0.00019681750024280728, "loss": 3.067540168762207, "step": 4316, "token_acc": 0.29115876696979964 }, { "epoch": 2.5306361770741717, "grad_norm": 0.36443284001692516, "learning_rate": 0.00019681507410928878, "loss": 3.118192195892334, "step": 4317, "token_acc": 0.2864790826908778 }, { "epoch": 2.5312225153913808, "grad_norm": 0.314060342364928, "learning_rate": 0.0001968126470663231, "loss": 3.0855259895324707, "step": 4318, "token_acc": 0.2882240621246701 }, { "epoch": 2.53180885370859, "grad_norm": 0.3338798956370352, "learning_rate": 0.00019681021911393306, "loss": 3.0896735191345215, "step": 4319, "token_acc": 0.28827129614505065 }, { "epoch": 2.532395192025799, "grad_norm": 0.340146771123498, "learning_rate": 0.00019680779025214146, "loss": 3.0404653549194336, "step": 4320, "token_acc": 0.29519595605660925 }, { "epoch": 2.532981530343008, "grad_norm": 0.30899001914948426, "learning_rate": 0.00019680536048097115, "loss": 3.045503616333008, "step": 4321, "token_acc": 0.2943045172832407 }, { "epoch": 2.533567868660217, "grad_norm": 0.34070681977036404, "learning_rate": 0.00019680292980044493, "loss": 3.047095775604248, "step": 4322, "token_acc": 0.2937312549594502 }, { "epoch": 2.534154206977426, "grad_norm": 0.40976098473253214, "learning_rate": 0.0001968004982105856, "loss": 3.033961296081543, "step": 4323, "token_acc": 0.2969539254248568 }, { "epoch": 2.534740545294635, "grad_norm": 0.39619804035293127, "learning_rate": 0.00019679806571141603, "loss": 3.080564498901367, "step": 4324, "token_acc": 0.28873699789246887 }, { "epoch": 2.535326883611844, "grad_norm": 0.2826258319258942, "learning_rate": 0.00019679563230295908, "loss": 3.0360283851623535, "step": 4325, "token_acc": 0.29602856100484704 }, { "epoch": 2.535913221929053, "grad_norm": 0.3383981438704743, "learning_rate": 0.0001967931979852376, "loss": 3.0303406715393066, "step": 4326, "token_acc": 0.29549217797643235 }, { "epoch": 2.536499560246262, "grad_norm": 0.36010800439295604, "learning_rate": 0.00019679076275827445, "loss": 3.069512367248535, "step": 4327, "token_acc": 0.2918079059469398 }, { "epoch": 2.537085898563471, "grad_norm": 0.31853265407731124, "learning_rate": 0.0001967883266220925, "loss": 3.0459797382354736, "step": 4328, "token_acc": 0.2953695654994356 }, { "epoch": 2.53767223688068, "grad_norm": 0.32174772129878987, "learning_rate": 0.00019678588957671464, "loss": 3.0687904357910156, "step": 4329, "token_acc": 0.2903558469989662 }, { "epoch": 2.538258575197889, "grad_norm": 0.37946690235253305, "learning_rate": 0.00019678345162216378, "loss": 3.1021206378936768, "step": 4330, "token_acc": 0.28620961018520685 }, { "epoch": 2.5388449135150983, "grad_norm": 0.3124169856712645, "learning_rate": 0.00019678101275846284, "loss": 3.0927093029022217, "step": 4331, "token_acc": 0.2868429764035296 }, { "epoch": 2.5394312518323074, "grad_norm": 0.34274850699130816, "learning_rate": 0.00019677857298563468, "loss": 3.0585250854492188, "step": 4332, "token_acc": 0.29139852060397603 }, { "epoch": 2.540017590149516, "grad_norm": 0.2923296091069028, "learning_rate": 0.0001967761323037022, "loss": 3.0273396968841553, "step": 4333, "token_acc": 0.2966150872878518 }, { "epoch": 2.5406039284667252, "grad_norm": 0.3248173566948958, "learning_rate": 0.0001967736907126884, "loss": 3.077577590942383, "step": 4334, "token_acc": 0.28987661348555044 }, { "epoch": 2.5411902667839343, "grad_norm": 0.3325764933676299, "learning_rate": 0.0001967712482126162, "loss": 3.0596702098846436, "step": 4335, "token_acc": 0.29220380109457844 }, { "epoch": 2.5417766051011434, "grad_norm": 0.29495416159286175, "learning_rate": 0.00019676880480350847, "loss": 3.0884342193603516, "step": 4336, "token_acc": 0.28674425630921546 }, { "epoch": 2.542362943418352, "grad_norm": 0.31906955088729705, "learning_rate": 0.00019676636048538825, "loss": 3.0372214317321777, "step": 4337, "token_acc": 0.29461517277956834 }, { "epoch": 2.5429492817355612, "grad_norm": 0.38596150792753015, "learning_rate": 0.00019676391525827848, "loss": 3.1169283390045166, "step": 4338, "token_acc": 0.2853148630615746 }, { "epoch": 2.5435356200527703, "grad_norm": 0.4171515406592132, "learning_rate": 0.00019676146912220207, "loss": 3.0613787174224854, "step": 4339, "token_acc": 0.29045808446690446 }, { "epoch": 2.5441219583699795, "grad_norm": 0.40773296826936284, "learning_rate": 0.0001967590220771821, "loss": 3.049337863922119, "step": 4340, "token_acc": 0.29383906251648084 }, { "epoch": 2.5447082966871886, "grad_norm": 0.3744202307544062, "learning_rate": 0.00019675657412324146, "loss": 3.0532612800598145, "step": 4341, "token_acc": 0.29447773950157036 }, { "epoch": 2.5452946350043977, "grad_norm": 0.42487171907621696, "learning_rate": 0.00019675412526040323, "loss": 3.073619842529297, "step": 4342, "token_acc": 0.29084579131144217 }, { "epoch": 2.545880973321607, "grad_norm": 0.4430334500454967, "learning_rate": 0.00019675167548869035, "loss": 3.0856785774230957, "step": 4343, "token_acc": 0.28847597597597596 }, { "epoch": 2.5464673116388155, "grad_norm": 0.42276574376526593, "learning_rate": 0.00019674922480812583, "loss": 3.0611069202423096, "step": 4344, "token_acc": 0.2915815626619517 }, { "epoch": 2.5470536499560246, "grad_norm": 0.2951208183344554, "learning_rate": 0.00019674677321873275, "loss": 3.053767681121826, "step": 4345, "token_acc": 0.2926701529957454 }, { "epoch": 2.5476399882732337, "grad_norm": 0.4834778382856313, "learning_rate": 0.0001967443207205341, "loss": 3.1018574237823486, "step": 4346, "token_acc": 0.2857452583020529 }, { "epoch": 2.548226326590443, "grad_norm": 0.28849019103372164, "learning_rate": 0.00019674186731355288, "loss": 3.126852035522461, "step": 4347, "token_acc": 0.2839649898927895 }, { "epoch": 2.5488126649076515, "grad_norm": 0.4404181145353068, "learning_rate": 0.0001967394129978122, "loss": 3.0917136669158936, "step": 4348, "token_acc": 0.2871915418153199 }, { "epoch": 2.5493990032248606, "grad_norm": 0.30384673166089216, "learning_rate": 0.00019673695777333512, "loss": 3.045119285583496, "step": 4349, "token_acc": 0.293204212944232 }, { "epoch": 2.5499853415420697, "grad_norm": 0.4201201654133906, "learning_rate": 0.00019673450164014463, "loss": 3.0878076553344727, "step": 4350, "token_acc": 0.28939099488486236 }, { "epoch": 2.550571679859279, "grad_norm": 0.2729500462034345, "learning_rate": 0.00019673204459826388, "loss": 3.0816612243652344, "step": 4351, "token_acc": 0.28802842012123453 }, { "epoch": 2.551158018176488, "grad_norm": 0.3952804575887593, "learning_rate": 0.0001967295866477159, "loss": 3.074885129928589, "step": 4352, "token_acc": 0.2901008722152999 }, { "epoch": 2.551744356493697, "grad_norm": 0.3109425019840235, "learning_rate": 0.00019672712778852382, "loss": 3.0515828132629395, "step": 4353, "token_acc": 0.2931193708231135 }, { "epoch": 2.552330694810906, "grad_norm": 0.345899174736339, "learning_rate": 0.0001967246680207107, "loss": 3.0541086196899414, "step": 4354, "token_acc": 0.29222225513933364 }, { "epoch": 2.552917033128115, "grad_norm": 0.38654748318407606, "learning_rate": 0.00019672220734429967, "loss": 3.111905336380005, "step": 4355, "token_acc": 0.28456465966487615 }, { "epoch": 2.553503371445324, "grad_norm": 0.33738221730942225, "learning_rate": 0.00019671974575931385, "loss": 3.0592422485351562, "step": 4356, "token_acc": 0.29193751380289334 }, { "epoch": 2.554089709762533, "grad_norm": 0.3578770150362226, "learning_rate": 0.00019671728326577635, "loss": 3.064779043197632, "step": 4357, "token_acc": 0.2917376779190185 }, { "epoch": 2.554676048079742, "grad_norm": 0.3863932125854447, "learning_rate": 0.00019671481986371027, "loss": 3.03214168548584, "step": 4358, "token_acc": 0.2957747948163584 }, { "epoch": 2.555262386396951, "grad_norm": 0.30995878812369565, "learning_rate": 0.0001967123555531388, "loss": 3.0910720825195312, "step": 4359, "token_acc": 0.28800116840405915 }, { "epoch": 2.55584872471416, "grad_norm": 0.3819498869674784, "learning_rate": 0.00019670989033408507, "loss": 3.0519962310791016, "step": 4360, "token_acc": 0.29198846682358426 }, { "epoch": 2.556435063031369, "grad_norm": 0.3721631762602892, "learning_rate": 0.00019670742420657225, "loss": 3.0353198051452637, "step": 4361, "token_acc": 0.2957572216153045 }, { "epoch": 2.557021401348578, "grad_norm": 0.3541723204248817, "learning_rate": 0.00019670495717062346, "loss": 3.1144042015075684, "step": 4362, "token_acc": 0.28357453725551884 }, { "epoch": 2.5576077396657872, "grad_norm": 0.3543586093298492, "learning_rate": 0.00019670248922626192, "loss": 3.1077284812927246, "step": 4363, "token_acc": 0.2841821775873277 }, { "epoch": 2.5581940779829964, "grad_norm": 0.314163116849466, "learning_rate": 0.00019670002037351086, "loss": 3.0905518531799316, "step": 4364, "token_acc": 0.28717304574159414 }, { "epoch": 2.5587804163002055, "grad_norm": 0.2956714527013071, "learning_rate": 0.00019669755061239337, "loss": 3.066251039505005, "step": 4365, "token_acc": 0.29134437605714486 }, { "epoch": 2.559366754617414, "grad_norm": 0.3306390850186592, "learning_rate": 0.00019669507994293266, "loss": 3.097242593765259, "step": 4366, "token_acc": 0.2856347173333266 }, { "epoch": 2.5599530929346233, "grad_norm": 0.31475363380465343, "learning_rate": 0.00019669260836515203, "loss": 3.056725025177002, "step": 4367, "token_acc": 0.2918700121304986 }, { "epoch": 2.5605394312518324, "grad_norm": 0.35817930149525296, "learning_rate": 0.0001966901358790746, "loss": 3.0887980461120605, "step": 4368, "token_acc": 0.2872980851786035 }, { "epoch": 2.5611257695690415, "grad_norm": 0.3842368806563799, "learning_rate": 0.00019668766248472362, "loss": 3.100215435028076, "step": 4369, "token_acc": 0.28725450901803606 }, { "epoch": 2.56171210788625, "grad_norm": 0.3162344267452049, "learning_rate": 0.00019668518818212238, "loss": 3.1013846397399902, "step": 4370, "token_acc": 0.2875088417212239 }, { "epoch": 2.5622984462034593, "grad_norm": 0.37850795328190984, "learning_rate": 0.0001966827129712941, "loss": 3.0717766284942627, "step": 4371, "token_acc": 0.2891443259967143 }, { "epoch": 2.5628847845206684, "grad_norm": 0.3423010292330842, "learning_rate": 0.00019668023685226195, "loss": 3.0707814693450928, "step": 4372, "token_acc": 0.2896238791785463 }, { "epoch": 2.5634711228378775, "grad_norm": 0.2829031212605263, "learning_rate": 0.0001966777598250493, "loss": 3.092684030532837, "step": 4373, "token_acc": 0.28880511813048354 }, { "epoch": 2.5640574611550866, "grad_norm": 0.37486135791478675, "learning_rate": 0.00019667528188967937, "loss": 3.078953742980957, "step": 4374, "token_acc": 0.28989758338430305 }, { "epoch": 2.5646437994722957, "grad_norm": 0.38901017750281114, "learning_rate": 0.0001966728030461754, "loss": 3.0494942665100098, "step": 4375, "token_acc": 0.29251891431568344 }, { "epoch": 2.565230137789505, "grad_norm": 0.45183970380483385, "learning_rate": 0.00019667032329456077, "loss": 3.1119227409362793, "step": 4376, "token_acc": 0.2854487589526336 }, { "epoch": 2.5658164761067135, "grad_norm": 0.35230564987409213, "learning_rate": 0.00019666784263485868, "loss": 3.070038318634033, "step": 4377, "token_acc": 0.2899215103392527 }, { "epoch": 2.5664028144239226, "grad_norm": 0.35518878657327324, "learning_rate": 0.00019666536106709246, "loss": 3.0616188049316406, "step": 4378, "token_acc": 0.29071801626330485 }, { "epoch": 2.5669891527411317, "grad_norm": 0.34082797562021155, "learning_rate": 0.00019666287859128545, "loss": 3.0412020683288574, "step": 4379, "token_acc": 0.2939832629227996 }, { "epoch": 2.567575491058341, "grad_norm": 0.35255619286108814, "learning_rate": 0.00019666039520746095, "loss": 3.0532445907592773, "step": 4380, "token_acc": 0.2925111018191634 }, { "epoch": 2.5681618293755495, "grad_norm": 0.29233296397113484, "learning_rate": 0.0001966579109156423, "loss": 3.1050941944122314, "step": 4381, "token_acc": 0.28545357304897306 }, { "epoch": 2.5687481676927586, "grad_norm": 0.3440338865386845, "learning_rate": 0.0001966554257158528, "loss": 3.0384020805358887, "step": 4382, "token_acc": 0.29407245136145393 }, { "epoch": 2.5693345060099677, "grad_norm": 0.3638994869978608, "learning_rate": 0.00019665293960811583, "loss": 3.064739942550659, "step": 4383, "token_acc": 0.2919915288708573 }, { "epoch": 2.569920844327177, "grad_norm": 0.3951104643217597, "learning_rate": 0.00019665045259245473, "loss": 3.0822486877441406, "step": 4384, "token_acc": 0.28899868859907274 }, { "epoch": 2.570507182644386, "grad_norm": 0.33588356678262443, "learning_rate": 0.00019664796466889288, "loss": 3.0625858306884766, "step": 4385, "token_acc": 0.2923373364170113 }, { "epoch": 2.571093520961595, "grad_norm": 0.33576426720680497, "learning_rate": 0.00019664547583745363, "loss": 3.082542896270752, "step": 4386, "token_acc": 0.29134112995365674 }, { "epoch": 2.5716798592788037, "grad_norm": 0.26428207277732607, "learning_rate": 0.00019664298609816037, "loss": 3.0884761810302734, "step": 4387, "token_acc": 0.28682554638594665 }, { "epoch": 2.572266197596013, "grad_norm": 0.33908583219150185, "learning_rate": 0.0001966404954510365, "loss": 3.0769479274749756, "step": 4388, "token_acc": 0.29077000628809885 }, { "epoch": 2.572852535913222, "grad_norm": 0.33852640032145337, "learning_rate": 0.00019663800389610537, "loss": 3.0566697120666504, "step": 4389, "token_acc": 0.29196055870015014 }, { "epoch": 2.573438874230431, "grad_norm": 0.29405558012155864, "learning_rate": 0.00019663551143339042, "loss": 3.0920510292053223, "step": 4390, "token_acc": 0.28844251173752045 }, { "epoch": 2.5740252125476397, "grad_norm": 0.4084063439903854, "learning_rate": 0.00019663301806291505, "loss": 3.0672731399536133, "step": 4391, "token_acc": 0.291824211720072 }, { "epoch": 2.574611550864849, "grad_norm": 0.34410287516604726, "learning_rate": 0.00019663052378470267, "loss": 3.069742441177368, "step": 4392, "token_acc": 0.2895558121774426 }, { "epoch": 2.575197889182058, "grad_norm": 0.2637874115580758, "learning_rate": 0.0001966280285987768, "loss": 3.0326242446899414, "step": 4393, "token_acc": 0.29645017255027756 }, { "epoch": 2.575784227499267, "grad_norm": 0.334124981646552, "learning_rate": 0.00019662553250516076, "loss": 3.081714391708374, "step": 4394, "token_acc": 0.2903737337163789 }, { "epoch": 2.576370565816476, "grad_norm": 0.31844492322124635, "learning_rate": 0.00019662303550387807, "loss": 3.063903331756592, "step": 4395, "token_acc": 0.2918590018316768 }, { "epoch": 2.5769569041336853, "grad_norm": 0.2949480035136617, "learning_rate": 0.00019662053759495214, "loss": 3.0998377799987793, "step": 4396, "token_acc": 0.28477315389428554 }, { "epoch": 2.5775432424508944, "grad_norm": 0.3084170075036543, "learning_rate": 0.00019661803877840645, "loss": 3.1447291374206543, "step": 4397, "token_acc": 0.27942980988350785 }, { "epoch": 2.578129580768103, "grad_norm": 0.3748756166993346, "learning_rate": 0.0001966155390542645, "loss": 3.0738232135772705, "step": 4398, "token_acc": 0.28976208036987133 }, { "epoch": 2.578715919085312, "grad_norm": 0.389510707359839, "learning_rate": 0.00019661303842254975, "loss": 3.082246780395508, "step": 4399, "token_acc": 0.2892691525423729 }, { "epoch": 2.5793022574025213, "grad_norm": 0.3414748871019285, "learning_rate": 0.0001966105368832857, "loss": 3.088263750076294, "step": 4400, "token_acc": 0.28817628772594395 }, { "epoch": 2.5798885957197304, "grad_norm": 0.40813253384065784, "learning_rate": 0.00019660803443649584, "loss": 3.0422964096069336, "step": 4401, "token_acc": 0.29398536676880743 }, { "epoch": 2.580474934036939, "grad_norm": 0.3490543368972251, "learning_rate": 0.00019660553108220366, "loss": 3.07165789604187, "step": 4402, "token_acc": 0.29213844637019354 }, { "epoch": 2.581061272354148, "grad_norm": 0.3645319784631988, "learning_rate": 0.00019660302682043268, "loss": 3.07938289642334, "step": 4403, "token_acc": 0.28949733728848787 }, { "epoch": 2.5816476106713573, "grad_norm": 0.3767381711633503, "learning_rate": 0.00019660052165120648, "loss": 3.075606107711792, "step": 4404, "token_acc": 0.28972351170739175 }, { "epoch": 2.5822339489885664, "grad_norm": 0.36943117028402317, "learning_rate": 0.00019659801557454852, "loss": 3.0139575004577637, "step": 4405, "token_acc": 0.29775966682224536 }, { "epoch": 2.5828202873057755, "grad_norm": 0.34320773816502176, "learning_rate": 0.0001965955085904824, "loss": 3.0313358306884766, "step": 4406, "token_acc": 0.2956824775277748 }, { "epoch": 2.5834066256229846, "grad_norm": 0.40876089144779704, "learning_rate": 0.0001965930006990316, "loss": 3.0329337120056152, "step": 4407, "token_acc": 0.29685239554080506 }, { "epoch": 2.5839929639401937, "grad_norm": 0.40094509642801773, "learning_rate": 0.00019659049190021973, "loss": 3.067540168762207, "step": 4408, "token_acc": 0.29222723022892205 }, { "epoch": 2.5845793022574024, "grad_norm": 0.32291604738213864, "learning_rate": 0.00019658798219407037, "loss": 3.044240951538086, "step": 4409, "token_acc": 0.2941207851824378 }, { "epoch": 2.5851656405746115, "grad_norm": 0.34848239300088957, "learning_rate": 0.00019658547158060705, "loss": 3.109755754470825, "step": 4410, "token_acc": 0.2851129963492076 }, { "epoch": 2.5857519788918206, "grad_norm": 0.36670955079564616, "learning_rate": 0.0001965829600598534, "loss": 3.013775110244751, "step": 4411, "token_acc": 0.2976083510576951 }, { "epoch": 2.5863383172090297, "grad_norm": 0.30723042181299637, "learning_rate": 0.00019658044763183296, "loss": 3.1021902561187744, "step": 4412, "token_acc": 0.28611202490512144 }, { "epoch": 2.5869246555262384, "grad_norm": 0.3748923942741152, "learning_rate": 0.00019657793429656936, "loss": 3.0385971069335938, "step": 4413, "token_acc": 0.29457802162336616 }, { "epoch": 2.5875109938434475, "grad_norm": 0.3372145824329986, "learning_rate": 0.00019657542005408623, "loss": 3.08402681350708, "step": 4414, "token_acc": 0.288815297623435 }, { "epoch": 2.5880973321606566, "grad_norm": 0.3480607292581736, "learning_rate": 0.00019657290490440713, "loss": 3.050720453262329, "step": 4415, "token_acc": 0.29246168031965364 }, { "epoch": 2.5886836704778657, "grad_norm": 0.38232386761433246, "learning_rate": 0.00019657038884755574, "loss": 3.045194149017334, "step": 4416, "token_acc": 0.2950650778724957 }, { "epoch": 2.589270008795075, "grad_norm": 0.44161308260079746, "learning_rate": 0.0001965678718835557, "loss": 3.0886642932891846, "step": 4417, "token_acc": 0.2887354879196737 }, { "epoch": 2.589856347112284, "grad_norm": 0.35795736262184297, "learning_rate": 0.0001965653540124306, "loss": 3.1190571784973145, "step": 4418, "token_acc": 0.284661424715409 }, { "epoch": 2.590442685429493, "grad_norm": 0.3766037021079068, "learning_rate": 0.00019656283523420413, "loss": 3.047330379486084, "step": 4419, "token_acc": 0.2939204320394759 }, { "epoch": 2.5910290237467017, "grad_norm": 0.35550401658543784, "learning_rate": 0.00019656031554889992, "loss": 3.069953441619873, "step": 4420, "token_acc": 0.29171317072665454 }, { "epoch": 2.591615362063911, "grad_norm": 0.5485291351804926, "learning_rate": 0.0001965577949565417, "loss": 3.06333589553833, "step": 4421, "token_acc": 0.2910507048835579 }, { "epoch": 2.59220170038112, "grad_norm": 0.3680642390723448, "learning_rate": 0.0001965552734571531, "loss": 3.068913459777832, "step": 4422, "token_acc": 0.2919476925942185 }, { "epoch": 2.592788038698329, "grad_norm": 0.4229449852100515, "learning_rate": 0.00019655275105075784, "loss": 3.0234322547912598, "step": 4423, "token_acc": 0.2989145026031933 }, { "epoch": 2.5933743770155377, "grad_norm": 0.45032929612938555, "learning_rate": 0.00019655022773737955, "loss": 3.097522020339966, "step": 4424, "token_acc": 0.2873199940532033 }, { "epoch": 2.593960715332747, "grad_norm": 2.2950824445847053, "learning_rate": 0.000196547703517042, "loss": 3.2044525146484375, "step": 4425, "token_acc": 0.2770094356215149 }, { "epoch": 2.594547053649956, "grad_norm": 2.8396545605446724, "learning_rate": 0.00019654517838976884, "loss": 3.1636412143707275, "step": 4426, "token_acc": 0.2820985229821497 }, { "epoch": 2.595133391967165, "grad_norm": 0.8294171997733754, "learning_rate": 0.00019654265235558385, "loss": 3.0723445415496826, "step": 4427, "token_acc": 0.2902975595535566 }, { "epoch": 2.595719730284374, "grad_norm": 1.4731651143309001, "learning_rate": 0.0001965401254145107, "loss": 3.100215435028076, "step": 4428, "token_acc": 0.2879225438134057 }, { "epoch": 2.5963060686015833, "grad_norm": 0.9117299372915686, "learning_rate": 0.00019653759756657323, "loss": 3.0904886722564697, "step": 4429, "token_acc": 0.2904010224530058 }, { "epoch": 2.5968924069187924, "grad_norm": 0.8977769326077327, "learning_rate": 0.00019653506881179506, "loss": 3.105140209197998, "step": 4430, "token_acc": 0.28762170611305243 }, { "epoch": 2.597478745236001, "grad_norm": 0.5054889864204122, "learning_rate": 0.0001965325391502, "loss": 3.0985279083251953, "step": 4431, "token_acc": 0.2868811145105115 }, { "epoch": 2.59806508355321, "grad_norm": 0.6355593035266394, "learning_rate": 0.00019653000858181185, "loss": 3.1147515773773193, "step": 4432, "token_acc": 0.28274495500179786 }, { "epoch": 2.5986514218704193, "grad_norm": 0.5392172003877407, "learning_rate": 0.00019652747710665437, "loss": 3.0859718322753906, "step": 4433, "token_acc": 0.28845969392105036 }, { "epoch": 2.5992377601876284, "grad_norm": 0.5208474751743597, "learning_rate": 0.00019652494472475126, "loss": 3.077242136001587, "step": 4434, "token_acc": 0.2893993013771469 }, { "epoch": 2.599824098504837, "grad_norm": 0.4556370885993351, "learning_rate": 0.00019652241143612638, "loss": 3.0691652297973633, "step": 4435, "token_acc": 0.2916762576379007 }, { "epoch": 2.600410436822046, "grad_norm": 0.32485519985349476, "learning_rate": 0.0001965198772408035, "loss": 3.0671253204345703, "step": 4436, "token_acc": 0.29105503005352873 }, { "epoch": 2.6009967751392553, "grad_norm": 0.4185910103056575, "learning_rate": 0.00019651734213880644, "loss": 3.0384130477905273, "step": 4437, "token_acc": 0.29465028325722264 }, { "epoch": 2.6015831134564644, "grad_norm": 0.3830457101204748, "learning_rate": 0.00019651480613015903, "loss": 3.0776829719543457, "step": 4438, "token_acc": 0.2898653732501256 }, { "epoch": 2.6021694517736735, "grad_norm": 2.029732030279408, "learning_rate": 0.00019651226921488504, "loss": 3.1110501289367676, "step": 4439, "token_acc": 0.28992283589045115 }, { "epoch": 2.6027557900908826, "grad_norm": 0.4157322918489538, "learning_rate": 0.00019650973139300834, "loss": 3.072413921356201, "step": 4440, "token_acc": 0.29016718312809137 }, { "epoch": 2.6033421284080913, "grad_norm": 0.3738115483296638, "learning_rate": 0.00019650719266455278, "loss": 3.0831398963928223, "step": 4441, "token_acc": 0.2906055274055062 }, { "epoch": 2.6039284667253004, "grad_norm": 0.7281054454723008, "learning_rate": 0.00019650465302954219, "loss": 3.198808431625366, "step": 4442, "token_acc": 0.28240875428093115 }, { "epoch": 2.6045148050425095, "grad_norm": 0.36734657523112113, "learning_rate": 0.0001965021124880004, "loss": 3.0845210552215576, "step": 4443, "token_acc": 0.28914049719202034 }, { "epoch": 2.6051011433597187, "grad_norm": 0.35737744432983204, "learning_rate": 0.00019649957103995132, "loss": 3.0540714263916016, "step": 4444, "token_acc": 0.29415140074649276 }, { "epoch": 2.6056874816769273, "grad_norm": 0.35772084734332854, "learning_rate": 0.0001964970286854188, "loss": 3.103898286819458, "step": 4445, "token_acc": 0.2847180639383614 }, { "epoch": 2.6062738199941364, "grad_norm": 0.3821353993324753, "learning_rate": 0.00019649448542442672, "loss": 3.075505256652832, "step": 4446, "token_acc": 0.28804238012589695 }, { "epoch": 2.6068601583113455, "grad_norm": 0.3256203646199461, "learning_rate": 0.000196491941256999, "loss": 3.071077346801758, "step": 4447, "token_acc": 0.2910581249298213 }, { "epoch": 2.6074464966285547, "grad_norm": 0.35843790969846373, "learning_rate": 0.0001964893961831595, "loss": 3.101072072982788, "step": 4448, "token_acc": 0.28657332686686227 }, { "epoch": 2.6080328349457638, "grad_norm": 0.3441457314408639, "learning_rate": 0.00019648685020293215, "loss": 3.0406718254089355, "step": 4449, "token_acc": 0.29567943872284613 }, { "epoch": 2.608619173262973, "grad_norm": 0.33621993909040554, "learning_rate": 0.00019648430331634085, "loss": 3.0481178760528564, "step": 4450, "token_acc": 0.2921173366605581 }, { "epoch": 2.609205511580182, "grad_norm": 0.2906904104418168, "learning_rate": 0.00019648175552340952, "loss": 3.066950798034668, "step": 4451, "token_acc": 0.2919402158720509 }, { "epoch": 2.6097918498973907, "grad_norm": 0.34704656141721263, "learning_rate": 0.00019647920682416215, "loss": 3.0664334297180176, "step": 4452, "token_acc": 0.2924489869586069 }, { "epoch": 2.6103781882145998, "grad_norm": 0.2909445400642656, "learning_rate": 0.0001964766572186226, "loss": 3.0556480884552, "step": 4453, "token_acc": 0.29229036660174623 }, { "epoch": 2.610964526531809, "grad_norm": 0.36327754891629727, "learning_rate": 0.0001964741067068149, "loss": 3.0715274810791016, "step": 4454, "token_acc": 0.2909199277605251 }, { "epoch": 2.611550864849018, "grad_norm": 0.5738670581320662, "learning_rate": 0.00019647155528876293, "loss": 3.050835132598877, "step": 4455, "token_acc": 0.29428908545311366 }, { "epoch": 2.6121372031662267, "grad_norm": 0.293080853181563, "learning_rate": 0.00019646900296449072, "loss": 3.0443215370178223, "step": 4456, "token_acc": 0.29371182844488214 }, { "epoch": 2.6127235414834358, "grad_norm": 0.3922183584826324, "learning_rate": 0.0001964664497340222, "loss": 3.0840888023376465, "step": 4457, "token_acc": 0.28788662241592183 }, { "epoch": 2.613309879800645, "grad_norm": 0.29859083695848426, "learning_rate": 0.00019646389559738138, "loss": 3.068246364593506, "step": 4458, "token_acc": 0.29035752090393624 }, { "epoch": 2.613896218117854, "grad_norm": 0.33772709071421625, "learning_rate": 0.00019646134055459227, "loss": 3.081051826477051, "step": 4459, "token_acc": 0.2889718948132104 }, { "epoch": 2.614482556435063, "grad_norm": 0.3536834559960459, "learning_rate": 0.00019645878460567882, "loss": 3.117042064666748, "step": 4460, "token_acc": 0.28482590949792796 }, { "epoch": 2.615068894752272, "grad_norm": 0.33771190453939687, "learning_rate": 0.0001964562277506651, "loss": 3.0764381885528564, "step": 4461, "token_acc": 0.2889233603693272 }, { "epoch": 2.6156552330694813, "grad_norm": 0.35424024724714576, "learning_rate": 0.00019645366998957507, "loss": 3.0462307929992676, "step": 4462, "token_acc": 0.2931284693248195 }, { "epoch": 2.61624157138669, "grad_norm": 0.30341854096965054, "learning_rate": 0.0001964511113224328, "loss": 3.0258350372314453, "step": 4463, "token_acc": 0.2968459137804808 }, { "epoch": 2.616827909703899, "grad_norm": 0.2982402917000955, "learning_rate": 0.0001964485517492623, "loss": 3.005655288696289, "step": 4464, "token_acc": 0.3001810802372776 }, { "epoch": 2.6174142480211082, "grad_norm": 0.26640662699611395, "learning_rate": 0.00019644599127008761, "loss": 3.0821139812469482, "step": 4465, "token_acc": 0.28849303529925596 }, { "epoch": 2.6180005863383173, "grad_norm": 0.2617223132507397, "learning_rate": 0.0001964434298849328, "loss": 3.0709388256073, "step": 4466, "token_acc": 0.29271523526922644 }, { "epoch": 2.618586924655526, "grad_norm": 0.31109899583608125, "learning_rate": 0.00019644086759382194, "loss": 3.0869507789611816, "step": 4467, "token_acc": 0.2893768888958306 }, { "epoch": 2.619173262972735, "grad_norm": 0.2812213263130152, "learning_rate": 0.00019643830439677906, "loss": 3.0369787216186523, "step": 4468, "token_acc": 0.29494939600014436 }, { "epoch": 2.6197596012899442, "grad_norm": 0.2808366453872767, "learning_rate": 0.00019643574029382829, "loss": 3.0415759086608887, "step": 4469, "token_acc": 0.295452570897067 }, { "epoch": 2.6203459396071533, "grad_norm": 0.3527981814005396, "learning_rate": 0.00019643317528499367, "loss": 3.057199001312256, "step": 4470, "token_acc": 0.2912875843086168 }, { "epoch": 2.6209322779243625, "grad_norm": 0.3252137599709574, "learning_rate": 0.00019643060937029933, "loss": 3.0361056327819824, "step": 4471, "token_acc": 0.2960444136016655 }, { "epoch": 2.6215186162415716, "grad_norm": 0.30557122713452234, "learning_rate": 0.00019642804254976936, "loss": 3.0279526710510254, "step": 4472, "token_acc": 0.2976592940260301 }, { "epoch": 2.6221049545587807, "grad_norm": 0.3343228268098006, "learning_rate": 0.00019642547482342785, "loss": 3.0579023361206055, "step": 4473, "token_acc": 0.2931988095332745 }, { "epoch": 2.6226912928759893, "grad_norm": 0.29480812293668984, "learning_rate": 0.00019642290619129894, "loss": 3.106870174407959, "step": 4474, "token_acc": 0.28628196889859914 }, { "epoch": 2.6232776311931985, "grad_norm": 0.3143597338444688, "learning_rate": 0.00019642033665340675, "loss": 3.0419559478759766, "step": 4475, "token_acc": 0.29422649790289596 }, { "epoch": 2.6238639695104076, "grad_norm": 0.34809137420782466, "learning_rate": 0.00019641776620977545, "loss": 3.068683624267578, "step": 4476, "token_acc": 0.28931212093829234 }, { "epoch": 2.6244503078276167, "grad_norm": 0.29454093956690963, "learning_rate": 0.00019641519486042914, "loss": 3.077669382095337, "step": 4477, "token_acc": 0.2887501024592873 }, { "epoch": 2.6250366461448253, "grad_norm": 0.354110602969907, "learning_rate": 0.00019641262260539202, "loss": 3.0290963649749756, "step": 4478, "token_acc": 0.29660718747928255 }, { "epoch": 2.6256229844620345, "grad_norm": 0.29489555346241825, "learning_rate": 0.00019641004944468822, "loss": 3.0545310974121094, "step": 4479, "token_acc": 0.2925549708751268 }, { "epoch": 2.6262093227792436, "grad_norm": 0.2844289747362478, "learning_rate": 0.0001964074753783419, "loss": 3.045168876647949, "step": 4480, "token_acc": 0.29424680127304825 }, { "epoch": 2.6267956610964527, "grad_norm": 0.3417634942417672, "learning_rate": 0.00019640490040637726, "loss": 3.086907386779785, "step": 4481, "token_acc": 0.2886968656075569 }, { "epoch": 2.627381999413662, "grad_norm": 0.32940580828988714, "learning_rate": 0.0001964023245288185, "loss": 3.050446033477783, "step": 4482, "token_acc": 0.2932167797247183 }, { "epoch": 2.627968337730871, "grad_norm": 0.30048130896487857, "learning_rate": 0.00019639974774568982, "loss": 3.0426900386810303, "step": 4483, "token_acc": 0.29266286334945707 }, { "epoch": 2.62855467604808, "grad_norm": 0.36789060580501176, "learning_rate": 0.00019639717005701538, "loss": 3.067312717437744, "step": 4484, "token_acc": 0.291606935095155 }, { "epoch": 2.6291410143652887, "grad_norm": 0.4460827920534385, "learning_rate": 0.00019639459146281944, "loss": 3.0965256690979004, "step": 4485, "token_acc": 0.2864489908877523 }, { "epoch": 2.629727352682498, "grad_norm": 0.33776738565909503, "learning_rate": 0.00019639201196312622, "loss": 3.0572757720947266, "step": 4486, "token_acc": 0.29244934969108377 }, { "epoch": 2.630313690999707, "grad_norm": 0.32062546011264975, "learning_rate": 0.00019638943155795993, "loss": 3.0528969764709473, "step": 4487, "token_acc": 0.29478522931780926 }, { "epoch": 2.630900029316916, "grad_norm": 0.3505622839783323, "learning_rate": 0.0001963868502473448, "loss": 3.061879873275757, "step": 4488, "token_acc": 0.2926566427518798 }, { "epoch": 2.6314863676341247, "grad_norm": 0.32965761663284, "learning_rate": 0.0001963842680313051, "loss": 3.090459108352661, "step": 4489, "token_acc": 0.28715031600629626 }, { "epoch": 2.632072705951334, "grad_norm": 0.2686659302108614, "learning_rate": 0.0001963816849098651, "loss": 3.0227527618408203, "step": 4490, "token_acc": 0.2952963223859966 }, { "epoch": 2.632659044268543, "grad_norm": 0.32480965974043335, "learning_rate": 0.00019637910088304904, "loss": 3.0999596118927, "step": 4491, "token_acc": 0.2867034512604133 }, { "epoch": 2.633245382585752, "grad_norm": 0.41912251319870225, "learning_rate": 0.0001963765159508812, "loss": 3.140795946121216, "step": 4492, "token_acc": 0.281638156319302 }, { "epoch": 2.633831720902961, "grad_norm": 0.33534683800568893, "learning_rate": 0.00019637393011338582, "loss": 3.045501708984375, "step": 4493, "token_acc": 0.2945371648471159 }, { "epoch": 2.6344180592201702, "grad_norm": 0.30462158647759885, "learning_rate": 0.0001963713433705873, "loss": 3.054668426513672, "step": 4494, "token_acc": 0.2924317401617169 }, { "epoch": 2.635004397537379, "grad_norm": 0.2705180193136794, "learning_rate": 0.00019636875572250984, "loss": 3.0756068229675293, "step": 4495, "token_acc": 0.2903723125612401 }, { "epoch": 2.635590735854588, "grad_norm": 0.303878079355125, "learning_rate": 0.00019636616716917776, "loss": 3.0634098052978516, "step": 4496, "token_acc": 0.29232823821152876 }, { "epoch": 2.636177074171797, "grad_norm": 0.3084281445601369, "learning_rate": 0.00019636357771061542, "loss": 3.0523176193237305, "step": 4497, "token_acc": 0.29193260975214647 }, { "epoch": 2.6367634124890063, "grad_norm": 0.2645435797616772, "learning_rate": 0.0001963609873468471, "loss": 3.055009365081787, "step": 4498, "token_acc": 0.2945348949158364 }, { "epoch": 2.637349750806215, "grad_norm": 0.2563803415794547, "learning_rate": 0.00019635839607789714, "loss": 3.065328598022461, "step": 4499, "token_acc": 0.2902637278634221 }, { "epoch": 2.637936089123424, "grad_norm": 0.2495922398337573, "learning_rate": 0.00019635580390378994, "loss": 3.051077365875244, "step": 4500, "token_acc": 0.29343870746644274 }, { "epoch": 2.638522427440633, "grad_norm": 0.3005039331543738, "learning_rate": 0.0001963532108245498, "loss": 3.104482650756836, "step": 4501, "token_acc": 0.2871441269157217 }, { "epoch": 2.6391087657578423, "grad_norm": 0.244025820666221, "learning_rate": 0.00019635061684020104, "loss": 3.038386821746826, "step": 4502, "token_acc": 0.29478540825540767 }, { "epoch": 2.6396951040750514, "grad_norm": 0.33492953010619697, "learning_rate": 0.00019634802195076808, "loss": 3.0687317848205566, "step": 4503, "token_acc": 0.29183692986061033 }, { "epoch": 2.6402814423922605, "grad_norm": 0.45877832460916906, "learning_rate": 0.00019634542615627529, "loss": 3.0338287353515625, "step": 4504, "token_acc": 0.29636347456473716 }, { "epoch": 2.6408677807094696, "grad_norm": 0.5157500661351426, "learning_rate": 0.00019634282945674706, "loss": 3.0727882385253906, "step": 4505, "token_acc": 0.28951413223660744 }, { "epoch": 2.6414541190266783, "grad_norm": 0.4824121877087122, "learning_rate": 0.00019634023185220777, "loss": 3.0695605278015137, "step": 4506, "token_acc": 0.28954831495106276 }, { "epoch": 2.6420404573438874, "grad_norm": 0.286016254417242, "learning_rate": 0.0001963376333426818, "loss": 3.0731043815612793, "step": 4507, "token_acc": 0.2885248050661376 }, { "epoch": 2.6426267956610965, "grad_norm": 0.33144016997840975, "learning_rate": 0.00019633503392819362, "loss": 3.0521934032440186, "step": 4508, "token_acc": 0.2935126706881073 }, { "epoch": 2.6432131339783056, "grad_norm": 0.32347629295441577, "learning_rate": 0.00019633243360876756, "loss": 3.0892527103424072, "step": 4509, "token_acc": 0.2884905232550385 }, { "epoch": 2.6437994722955143, "grad_norm": 0.3478656689219456, "learning_rate": 0.00019632983238442812, "loss": 3.067091941833496, "step": 4510, "token_acc": 0.2895294096144256 }, { "epoch": 2.6443858106127234, "grad_norm": 0.3501867607845672, "learning_rate": 0.00019632723025519972, "loss": 3.088876247406006, "step": 4511, "token_acc": 0.2876259274018534 }, { "epoch": 2.6449721489299325, "grad_norm": 0.38770102565818937, "learning_rate": 0.00019632462722110678, "loss": 3.086182117462158, "step": 4512, "token_acc": 0.28858747808729257 }, { "epoch": 2.6455584872471416, "grad_norm": 0.3904764530469908, "learning_rate": 0.00019632202328217376, "loss": 3.0932693481445312, "step": 4513, "token_acc": 0.2866015301459907 }, { "epoch": 2.6461448255643507, "grad_norm": 0.376584849692192, "learning_rate": 0.00019631941843842516, "loss": 3.069999933242798, "step": 4514, "token_acc": 0.289378201775143 }, { "epoch": 2.64673116388156, "grad_norm": 0.3075806878304348, "learning_rate": 0.00019631681268988537, "loss": 3.0741350650787354, "step": 4515, "token_acc": 0.28925101848950174 }, { "epoch": 2.647317502198769, "grad_norm": 0.34225947912693605, "learning_rate": 0.00019631420603657894, "loss": 3.085578441619873, "step": 4516, "token_acc": 0.28889610147757566 }, { "epoch": 2.6479038405159776, "grad_norm": 0.4404033423864138, "learning_rate": 0.00019631159847853034, "loss": 3.06289005279541, "step": 4517, "token_acc": 0.2918366963160765 }, { "epoch": 2.6484901788331867, "grad_norm": 0.3335503426914167, "learning_rate": 0.00019630899001576405, "loss": 3.0790886878967285, "step": 4518, "token_acc": 0.290332262503402 }, { "epoch": 2.649076517150396, "grad_norm": 0.3353423529590062, "learning_rate": 0.00019630638064830456, "loss": 3.0785131454467773, "step": 4519, "token_acc": 0.28918866965628204 }, { "epoch": 2.649662855467605, "grad_norm": 0.3248139451470929, "learning_rate": 0.0001963037703761764, "loss": 3.055438995361328, "step": 4520, "token_acc": 0.29214887369308773 }, { "epoch": 2.6502491937848136, "grad_norm": 0.34058343256543366, "learning_rate": 0.0001963011591994041, "loss": 3.054819345474243, "step": 4521, "token_acc": 0.29368104000684897 }, { "epoch": 2.6508355321020227, "grad_norm": 0.40593421952064956, "learning_rate": 0.00019629854711801216, "loss": 3.0736405849456787, "step": 4522, "token_acc": 0.28880714580688577 }, { "epoch": 2.651421870419232, "grad_norm": 0.36446617155678246, "learning_rate": 0.00019629593413202515, "loss": 3.0906848907470703, "step": 4523, "token_acc": 0.28699349838276356 }, { "epoch": 2.652008208736441, "grad_norm": 0.34285056848024126, "learning_rate": 0.0001962933202414676, "loss": 3.0449209213256836, "step": 4524, "token_acc": 0.2946073166952517 }, { "epoch": 2.65259454705365, "grad_norm": 0.313530514146754, "learning_rate": 0.00019629070544636406, "loss": 3.0669126510620117, "step": 4525, "token_acc": 0.28972421667661036 }, { "epoch": 2.653180885370859, "grad_norm": 0.30858193941186796, "learning_rate": 0.0001962880897467391, "loss": 3.036386489868164, "step": 4526, "token_acc": 0.29459631243857626 }, { "epoch": 2.6537672236880683, "grad_norm": 0.34712111926513606, "learning_rate": 0.00019628547314261727, "loss": 3.090385675430298, "step": 4527, "token_acc": 0.28859511718697883 }, { "epoch": 2.654353562005277, "grad_norm": 0.32449936222745696, "learning_rate": 0.00019628285563402318, "loss": 3.1185836791992188, "step": 4528, "token_acc": 0.28416327438078653 }, { "epoch": 2.654939900322486, "grad_norm": 0.2884555769027673, "learning_rate": 0.00019628023722098142, "loss": 3.055527687072754, "step": 4529, "token_acc": 0.29156829978927723 }, { "epoch": 2.655526238639695, "grad_norm": 0.34335689643317113, "learning_rate": 0.00019627761790351654, "loss": 3.065922975540161, "step": 4530, "token_acc": 0.2940832334013665 }, { "epoch": 2.6561125769569043, "grad_norm": 0.35574508501819285, "learning_rate": 0.0001962749976816532, "loss": 3.0582306385040283, "step": 4531, "token_acc": 0.2911504247575853 }, { "epoch": 2.656698915274113, "grad_norm": 0.2513292997031737, "learning_rate": 0.00019627237655541594, "loss": 3.1087393760681152, "step": 4532, "token_acc": 0.2871675141598827 }, { "epoch": 2.657285253591322, "grad_norm": 0.3782240492427561, "learning_rate": 0.00019626975452482947, "loss": 3.0797410011291504, "step": 4533, "token_acc": 0.2892079484024141 }, { "epoch": 2.657871591908531, "grad_norm": 0.3181669356709658, "learning_rate": 0.00019626713158991837, "loss": 3.070539951324463, "step": 4534, "token_acc": 0.29027567323284686 }, { "epoch": 2.6584579302257403, "grad_norm": 0.329932175131568, "learning_rate": 0.00019626450775070731, "loss": 3.1051158905029297, "step": 4535, "token_acc": 0.28634751773049644 }, { "epoch": 2.6590442685429494, "grad_norm": 0.41972730105232214, "learning_rate": 0.0001962618830072209, "loss": 3.0316390991210938, "step": 4536, "token_acc": 0.2967513651332702 }, { "epoch": 2.6596306068601585, "grad_norm": 0.3315416172347069, "learning_rate": 0.00019625925735948383, "loss": 3.0803680419921875, "step": 4537, "token_acc": 0.2887639933230218 }, { "epoch": 2.660216945177367, "grad_norm": 0.2924363430608812, "learning_rate": 0.00019625663080752076, "loss": 3.0637307167053223, "step": 4538, "token_acc": 0.291914218980734 }, { "epoch": 2.6608032834945763, "grad_norm": 0.42741954444102404, "learning_rate": 0.00019625400335135628, "loss": 3.122969388961792, "step": 4539, "token_acc": 0.283416222150501 }, { "epoch": 2.6613896218117854, "grad_norm": 0.39760594169462443, "learning_rate": 0.00019625137499101522, "loss": 3.1098380088806152, "step": 4540, "token_acc": 0.28538005067882094 }, { "epoch": 2.6619759601289945, "grad_norm": 0.3327046832783997, "learning_rate": 0.00019624874572652217, "loss": 3.072350263595581, "step": 4541, "token_acc": 0.29083413914194106 }, { "epoch": 2.6625622984462036, "grad_norm": 0.302333026609362, "learning_rate": 0.00019624611555790183, "loss": 3.0577166080474854, "step": 4542, "token_acc": 0.29270956292709566 }, { "epoch": 2.6631486367634123, "grad_norm": 0.3189625248580394, "learning_rate": 0.00019624348448517894, "loss": 3.022976875305176, "step": 4543, "token_acc": 0.2968805130653162 }, { "epoch": 2.6637349750806214, "grad_norm": 0.29349139824092646, "learning_rate": 0.0001962408525083782, "loss": 3.0747759342193604, "step": 4544, "token_acc": 0.2901611125558817 }, { "epoch": 2.6643213133978305, "grad_norm": 0.2951850680590209, "learning_rate": 0.00019623821962752437, "loss": 3.0890145301818848, "step": 4545, "token_acc": 0.28779748047836323 }, { "epoch": 2.6649076517150396, "grad_norm": 0.32825599759014656, "learning_rate": 0.00019623558584264206, "loss": 3.0595803260803223, "step": 4546, "token_acc": 0.2944165409010937 }, { "epoch": 2.6654939900322487, "grad_norm": 0.3872003481927382, "learning_rate": 0.0001962329511537562, "loss": 3.0966062545776367, "step": 4547, "token_acc": 0.28734901962843273 }, { "epoch": 2.666080328349458, "grad_norm": 0.30175631248902285, "learning_rate": 0.00019623031556089137, "loss": 3.0942349433898926, "step": 4548, "token_acc": 0.28668121252325696 }, { "epoch": 2.6666666666666665, "grad_norm": 0.30181930527781375, "learning_rate": 0.00019622767906407243, "loss": 3.0267717838287354, "step": 4549, "token_acc": 0.2973882897170164 }, { "epoch": 2.6672530049838756, "grad_norm": 0.2750983562882502, "learning_rate": 0.00019622504166332414, "loss": 3.0798733234405518, "step": 4550, "token_acc": 0.28871550017468267 }, { "epoch": 2.6678393433010847, "grad_norm": 0.3122238136987398, "learning_rate": 0.00019622240335867122, "loss": 3.0714807510375977, "step": 4551, "token_acc": 0.28987423802574247 }, { "epoch": 2.668425681618294, "grad_norm": 0.32047764793144706, "learning_rate": 0.00019621976415013848, "loss": 3.074183702468872, "step": 4552, "token_acc": 0.2890211835217236 }, { "epoch": 2.6690120199355025, "grad_norm": 0.4659150980283412, "learning_rate": 0.0001962171240377507, "loss": 3.0646438598632812, "step": 4553, "token_acc": 0.2908177721229358 }, { "epoch": 2.6695983582527116, "grad_norm": 0.38432784871959863, "learning_rate": 0.0001962144830215327, "loss": 3.0881080627441406, "step": 4554, "token_acc": 0.28787901967383656 }, { "epoch": 2.6701846965699207, "grad_norm": 0.3117405621375754, "learning_rate": 0.0001962118411015093, "loss": 3.0630271434783936, "step": 4555, "token_acc": 0.29029482200402246 }, { "epoch": 2.67077103488713, "grad_norm": 0.33309754223634663, "learning_rate": 0.0001962091982777053, "loss": 3.056474208831787, "step": 4556, "token_acc": 0.29094307652999474 }, { "epoch": 2.671357373204339, "grad_norm": 0.3005710929492111, "learning_rate": 0.0001962065545501455, "loss": 3.1018667221069336, "step": 4557, "token_acc": 0.28600581127345615 }, { "epoch": 2.671943711521548, "grad_norm": 0.36358990948741654, "learning_rate": 0.0001962039099188548, "loss": 3.0533246994018555, "step": 4558, "token_acc": 0.29397992535756506 }, { "epoch": 2.672530049838757, "grad_norm": 0.3618927866280974, "learning_rate": 0.00019620126438385795, "loss": 3.094520092010498, "step": 4559, "token_acc": 0.2867888427311946 }, { "epoch": 2.673116388155966, "grad_norm": 0.3114031401666575, "learning_rate": 0.00019619861794517988, "loss": 3.0540623664855957, "step": 4560, "token_acc": 0.29173182855036806 }, { "epoch": 2.673702726473175, "grad_norm": 0.4022924847718842, "learning_rate": 0.00019619597060284544, "loss": 3.0916781425476074, "step": 4561, "token_acc": 0.2888479878611593 }, { "epoch": 2.674289064790384, "grad_norm": 0.36090436212272575, "learning_rate": 0.0001961933223568795, "loss": 3.104236602783203, "step": 4562, "token_acc": 0.28566051435923995 }, { "epoch": 2.674875403107593, "grad_norm": 0.3819390744440609, "learning_rate": 0.00019619067320730685, "loss": 3.111236333847046, "step": 4563, "token_acc": 0.28461039344068895 }, { "epoch": 2.675461741424802, "grad_norm": 0.3581275614592982, "learning_rate": 0.0001961880231541525, "loss": 3.097654104232788, "step": 4564, "token_acc": 0.28869158949851514 }, { "epoch": 2.676048079742011, "grad_norm": 0.36163033831251296, "learning_rate": 0.00019618537219744128, "loss": 3.108797550201416, "step": 4565, "token_acc": 0.28488825843466886 }, { "epoch": 2.67663441805922, "grad_norm": 0.41489009308025604, "learning_rate": 0.00019618272033719808, "loss": 3.0790529251098633, "step": 4566, "token_acc": 0.28864593699306135 }, { "epoch": 2.677220756376429, "grad_norm": 0.3750461873095386, "learning_rate": 0.00019618006757344782, "loss": 3.070324182510376, "step": 4567, "token_acc": 0.29061931464174456 }, { "epoch": 2.6778070946936383, "grad_norm": 0.3162441869869595, "learning_rate": 0.00019617741390621544, "loss": 3.1262261867523193, "step": 4568, "token_acc": 0.28117845837744254 }, { "epoch": 2.6783934330108474, "grad_norm": 0.3499900932547495, "learning_rate": 0.00019617475933552588, "loss": 3.0733556747436523, "step": 4569, "token_acc": 0.2916897372905501 }, { "epoch": 2.6789797713280565, "grad_norm": 0.35222157983834007, "learning_rate": 0.00019617210386140403, "loss": 3.06453013420105, "step": 4570, "token_acc": 0.2899688670236305 }, { "epoch": 2.679566109645265, "grad_norm": 0.382571075804764, "learning_rate": 0.00019616944748387488, "loss": 3.0631256103515625, "step": 4571, "token_acc": 0.29064297730057526 }, { "epoch": 2.6801524479624743, "grad_norm": 0.3118164015162934, "learning_rate": 0.00019616679020296334, "loss": 3.0829179286956787, "step": 4572, "token_acc": 0.2883335547376619 }, { "epoch": 2.6807387862796834, "grad_norm": 0.3379179181178882, "learning_rate": 0.0001961641320186944, "loss": 3.039517879486084, "step": 4573, "token_acc": 0.295798018577866 }, { "epoch": 2.6813251245968925, "grad_norm": 0.32282983948174376, "learning_rate": 0.000196161472931093, "loss": 3.024003505706787, "step": 4574, "token_acc": 0.2962658415405439 }, { "epoch": 2.681911462914101, "grad_norm": 0.30593756624321233, "learning_rate": 0.00019615881294018418, "loss": 3.0250720977783203, "step": 4575, "token_acc": 0.2970103663338939 }, { "epoch": 2.6824978012313103, "grad_norm": 0.3173413310128304, "learning_rate": 0.00019615615204599286, "loss": 3.075173854827881, "step": 4576, "token_acc": 0.29134404883857584 }, { "epoch": 2.6830841395485194, "grad_norm": 0.290468111537404, "learning_rate": 0.00019615349024854412, "loss": 3.0718934535980225, "step": 4577, "token_acc": 0.29100289622168934 }, { "epoch": 2.6836704778657285, "grad_norm": 0.32407332951306195, "learning_rate": 0.00019615082754786283, "loss": 3.0403764247894287, "step": 4578, "token_acc": 0.29489088496944577 }, { "epoch": 2.6842568161829377, "grad_norm": 0.28160077769186653, "learning_rate": 0.0001961481639439741, "loss": 3.0857834815979004, "step": 4579, "token_acc": 0.28730971094341595 }, { "epoch": 2.6848431545001468, "grad_norm": 0.37236130523301947, "learning_rate": 0.00019614549943690297, "loss": 3.096175193786621, "step": 4580, "token_acc": 0.2859703155549015 }, { "epoch": 2.685429492817356, "grad_norm": 0.3292317410680187, "learning_rate": 0.0001961428340266744, "loss": 3.0943541526794434, "step": 4581, "token_acc": 0.2862232503741792 }, { "epoch": 2.6860158311345645, "grad_norm": 0.3449888147804372, "learning_rate": 0.00019614016771331345, "loss": 3.1412627696990967, "step": 4582, "token_acc": 0.2807060528765929 }, { "epoch": 2.6866021694517737, "grad_norm": 0.34187128117194526, "learning_rate": 0.00019613750049684518, "loss": 3.0711050033569336, "step": 4583, "token_acc": 0.2899561084206417 }, { "epoch": 2.6871885077689828, "grad_norm": 0.31729738211976827, "learning_rate": 0.00019613483237729463, "loss": 3.053450584411621, "step": 4584, "token_acc": 0.29359155835799766 }, { "epoch": 2.687774846086192, "grad_norm": 0.32844287331706523, "learning_rate": 0.0001961321633546869, "loss": 3.0576930046081543, "step": 4585, "token_acc": 0.2919579721145779 }, { "epoch": 2.6883611844034006, "grad_norm": 0.349752905400812, "learning_rate": 0.00019612949342904704, "loss": 3.098722457885742, "step": 4586, "token_acc": 0.288147978259826 }, { "epoch": 2.6889475227206097, "grad_norm": 0.34465407614689486, "learning_rate": 0.0001961268226004001, "loss": 3.0849053859710693, "step": 4587, "token_acc": 0.28720458562802054 }, { "epoch": 2.6895338610378188, "grad_norm": 0.25791209653377434, "learning_rate": 0.00019612415086877122, "loss": 3.0729269981384277, "step": 4588, "token_acc": 0.2909098928927267 }, { "epoch": 2.690120199355028, "grad_norm": 0.30326907633575656, "learning_rate": 0.00019612147823418546, "loss": 3.034104824066162, "step": 4589, "token_acc": 0.29637118992882827 }, { "epoch": 2.690706537672237, "grad_norm": 0.26346440519259406, "learning_rate": 0.0001961188046966679, "loss": 3.087848663330078, "step": 4590, "token_acc": 0.28711999201128663 }, { "epoch": 2.691292875989446, "grad_norm": 0.3094679598124755, "learning_rate": 0.00019611613025624373, "loss": 3.0497069358825684, "step": 4591, "token_acc": 0.2938933372682722 }, { "epoch": 2.6918792143066548, "grad_norm": 0.3358553416485472, "learning_rate": 0.00019611345491293803, "loss": 3.058612823486328, "step": 4592, "token_acc": 0.29250981653671565 }, { "epoch": 2.692465552623864, "grad_norm": 0.2937235097564311, "learning_rate": 0.00019611077866677593, "loss": 3.081667423248291, "step": 4593, "token_acc": 0.2882895002354207 }, { "epoch": 2.693051890941073, "grad_norm": 0.3379997161411641, "learning_rate": 0.00019610810151778258, "loss": 3.0104331970214844, "step": 4594, "token_acc": 0.29800038029261977 }, { "epoch": 2.693638229258282, "grad_norm": 0.35560811230546824, "learning_rate": 0.00019610542346598314, "loss": 3.044588088989258, "step": 4595, "token_acc": 0.293914189873611 }, { "epoch": 2.6942245675754912, "grad_norm": 0.34088460525265607, "learning_rate": 0.00019610274451140272, "loss": 3.115767240524292, "step": 4596, "token_acc": 0.28402360832203544 }, { "epoch": 2.6948109058927, "grad_norm": 0.33497354791512735, "learning_rate": 0.00019610006465406655, "loss": 3.086986780166626, "step": 4597, "token_acc": 0.2868543587682675 }, { "epoch": 2.695397244209909, "grad_norm": 0.27525214080721966, "learning_rate": 0.0001960973838939997, "loss": 3.080409526824951, "step": 4598, "token_acc": 0.28872607012260704 }, { "epoch": 2.695983582527118, "grad_norm": 0.2995414140533698, "learning_rate": 0.0001960947022312275, "loss": 3.0814218521118164, "step": 4599, "token_acc": 0.28911611674451765 }, { "epoch": 2.6965699208443272, "grad_norm": 0.2849168993826068, "learning_rate": 0.00019609201966577504, "loss": 3.0284667015075684, "step": 4600, "token_acc": 0.2964905885646967 }, { "epoch": 2.6971562591615363, "grad_norm": 0.2587181971182644, "learning_rate": 0.00019608933619766754, "loss": 3.072713851928711, "step": 4601, "token_acc": 0.28916574474504675 }, { "epoch": 2.6977425974787455, "grad_norm": 0.2786439890523753, "learning_rate": 0.0001960866518269302, "loss": 3.0935587882995605, "step": 4602, "token_acc": 0.2861736170299637 }, { "epoch": 2.698328935795954, "grad_norm": 0.338772670331652, "learning_rate": 0.00019608396655358822, "loss": 3.113826036453247, "step": 4603, "token_acc": 0.28389691079576224 }, { "epoch": 2.6989152741131632, "grad_norm": 0.37271601858279096, "learning_rate": 0.00019608128037766688, "loss": 3.0474891662597656, "step": 4604, "token_acc": 0.2954168820304049 }, { "epoch": 2.6995016124303723, "grad_norm": 0.385045455634142, "learning_rate": 0.00019607859329919138, "loss": 3.005220413208008, "step": 4605, "token_acc": 0.3008332465368191 }, { "epoch": 2.7000879507475815, "grad_norm": 0.2849201771932255, "learning_rate": 0.00019607590531818696, "loss": 3.039815902709961, "step": 4606, "token_acc": 0.2928555611582 }, { "epoch": 2.70067428906479, "grad_norm": 0.353030282025018, "learning_rate": 0.00019607321643467889, "loss": 3.046417236328125, "step": 4607, "token_acc": 0.294735450085296 }, { "epoch": 2.7012606273819992, "grad_norm": 0.3707303185644731, "learning_rate": 0.00019607052664869236, "loss": 3.0585556030273438, "step": 4608, "token_acc": 0.2932592550005488 }, { "epoch": 2.7018469656992083, "grad_norm": 0.38869429678549705, "learning_rate": 0.00019606783596025273, "loss": 3.0804758071899414, "step": 4609, "token_acc": 0.28922501882327634 }, { "epoch": 2.7024333040164175, "grad_norm": 0.3603531737301099, "learning_rate": 0.00019606514436938522, "loss": 3.029695510864258, "step": 4610, "token_acc": 0.295240389763358 }, { "epoch": 2.7030196423336266, "grad_norm": 0.37622594772044854, "learning_rate": 0.00019606245187611516, "loss": 3.0557150840759277, "step": 4611, "token_acc": 0.2921033866725961 }, { "epoch": 2.7036059806508357, "grad_norm": 0.3142220577318543, "learning_rate": 0.00019605975848046775, "loss": 3.0820374488830566, "step": 4612, "token_acc": 0.2884864366176436 }, { "epoch": 2.704192318968045, "grad_norm": 0.3402226734775937, "learning_rate": 0.00019605706418246842, "loss": 3.0157523155212402, "step": 4613, "token_acc": 0.29827982850569906 }, { "epoch": 2.7047786572852535, "grad_norm": 0.41825789334529473, "learning_rate": 0.00019605436898214234, "loss": 3.0657761096954346, "step": 4614, "token_acc": 0.2901373858316064 }, { "epoch": 2.7053649956024626, "grad_norm": 0.42276411250977985, "learning_rate": 0.00019605167287951498, "loss": 3.069960355758667, "step": 4615, "token_acc": 0.289633954590243 }, { "epoch": 2.7059513339196717, "grad_norm": 0.3333705382069517, "learning_rate": 0.00019604897587461152, "loss": 3.0744457244873047, "step": 4616, "token_acc": 0.290705683454739 }, { "epoch": 2.706537672236881, "grad_norm": 0.36885850245526164, "learning_rate": 0.0001960462779674574, "loss": 3.115445137023926, "step": 4617, "token_acc": 0.28453691504479295 }, { "epoch": 2.7071240105540895, "grad_norm": 0.3507145322574794, "learning_rate": 0.00019604357915807788, "loss": 3.1081814765930176, "step": 4618, "token_acc": 0.28615999035806633 }, { "epoch": 2.7077103488712986, "grad_norm": 0.3375902388952492, "learning_rate": 0.00019604087944649837, "loss": 3.101623058319092, "step": 4619, "token_acc": 0.28516436244733306 }, { "epoch": 2.7082966871885077, "grad_norm": 0.3043907305211772, "learning_rate": 0.00019603817883274424, "loss": 3.050922393798828, "step": 4620, "token_acc": 0.2914007916338575 }, { "epoch": 2.708883025505717, "grad_norm": 0.35341317383193205, "learning_rate": 0.0001960354773168408, "loss": 3.052631139755249, "step": 4621, "token_acc": 0.2927062964920549 }, { "epoch": 2.709469363822926, "grad_norm": 0.297846945583081, "learning_rate": 0.00019603277489881347, "loss": 3.0719237327575684, "step": 4622, "token_acc": 0.2900396840988566 }, { "epoch": 2.710055702140135, "grad_norm": 0.3690801624691033, "learning_rate": 0.00019603007157868762, "loss": 3.0560717582702637, "step": 4623, "token_acc": 0.29208165988733914 }, { "epoch": 2.710642040457344, "grad_norm": 0.3498312818189329, "learning_rate": 0.00019602736735648867, "loss": 3.0670833587646484, "step": 4624, "token_acc": 0.2920055654939376 }, { "epoch": 2.711228378774553, "grad_norm": 0.34390655350503474, "learning_rate": 0.00019602466223224202, "loss": 3.060535430908203, "step": 4625, "token_acc": 0.29069353292723166 }, { "epoch": 2.711814717091762, "grad_norm": 0.32532908919171283, "learning_rate": 0.00019602195620597302, "loss": 3.1041665077209473, "step": 4626, "token_acc": 0.2853834748893112 }, { "epoch": 2.712401055408971, "grad_norm": 0.43594270548360947, "learning_rate": 0.00019601924927770714, "loss": 3.1092512607574463, "step": 4627, "token_acc": 0.2844591191448878 }, { "epoch": 2.71298739372618, "grad_norm": 0.4630780654760894, "learning_rate": 0.00019601654144746981, "loss": 3.0721325874328613, "step": 4628, "token_acc": 0.2916355781949886 }, { "epoch": 2.713573732043389, "grad_norm": 0.3553615385212874, "learning_rate": 0.00019601383271528646, "loss": 3.0570480823516846, "step": 4629, "token_acc": 0.2922392415838757 }, { "epoch": 2.714160070360598, "grad_norm": 0.35840586420755255, "learning_rate": 0.0001960111230811825, "loss": 3.0486514568328857, "step": 4630, "token_acc": 0.2923069325459536 }, { "epoch": 2.714746408677807, "grad_norm": 0.3552950301349356, "learning_rate": 0.00019600841254518346, "loss": 3.0606350898742676, "step": 4631, "token_acc": 0.2923529301793922 }, { "epoch": 2.715332746995016, "grad_norm": 0.3539224366752878, "learning_rate": 0.00019600570110731472, "loss": 3.090754508972168, "step": 4632, "token_acc": 0.28820463086434717 }, { "epoch": 2.7159190853122253, "grad_norm": 0.36553392520341765, "learning_rate": 0.0001960029887676018, "loss": 3.0873289108276367, "step": 4633, "token_acc": 0.2893336101938699 }, { "epoch": 2.7165054236294344, "grad_norm": 0.3605116182937075, "learning_rate": 0.0001960002755260702, "loss": 3.0798168182373047, "step": 4634, "token_acc": 0.2882106541758875 }, { "epoch": 2.7170917619466435, "grad_norm": 0.32631907619650247, "learning_rate": 0.00019599756138274533, "loss": 3.1280314922332764, "step": 4635, "token_acc": 0.2823222726487539 }, { "epoch": 2.717678100263852, "grad_norm": 0.4181030200341294, "learning_rate": 0.00019599484633765277, "loss": 3.046919584274292, "step": 4636, "token_acc": 0.29325448630278916 }, { "epoch": 2.7182644385810613, "grad_norm": 0.37397930933740153, "learning_rate": 0.00019599213039081794, "loss": 3.0940630435943604, "step": 4637, "token_acc": 0.28769915706681287 }, { "epoch": 2.7188507768982704, "grad_norm": 0.33913666556650074, "learning_rate": 0.0001959894135422664, "loss": 3.045283317565918, "step": 4638, "token_acc": 0.292696962705854 }, { "epoch": 2.7194371152154795, "grad_norm": 0.3470344307622619, "learning_rate": 0.00019598669579202368, "loss": 3.094909429550171, "step": 4639, "token_acc": 0.28872493146443223 }, { "epoch": 2.720023453532688, "grad_norm": 0.37438002093932476, "learning_rate": 0.00019598397714011532, "loss": 3.0970559120178223, "step": 4640, "token_acc": 0.2849042199340155 }, { "epoch": 2.7206097918498973, "grad_norm": 0.3608262904244165, "learning_rate": 0.00019598125758656682, "loss": 3.0626635551452637, "step": 4641, "token_acc": 0.2916284567259685 }, { "epoch": 2.7211961301671064, "grad_norm": 0.3493893388056432, "learning_rate": 0.00019597853713140373, "loss": 3.0793392658233643, "step": 4642, "token_acc": 0.2890838029449191 }, { "epoch": 2.7217824684843155, "grad_norm": 0.3560141970482984, "learning_rate": 0.00019597581577465164, "loss": 3.0795159339904785, "step": 4643, "token_acc": 0.28829914011855473 }, { "epoch": 2.7223688068015246, "grad_norm": 0.3593867827080234, "learning_rate": 0.00019597309351633606, "loss": 3.0684590339660645, "step": 4644, "token_acc": 0.2917868430034574 }, { "epoch": 2.7229551451187337, "grad_norm": 0.32180550235222366, "learning_rate": 0.00019597037035648258, "loss": 3.0816004276275635, "step": 4645, "token_acc": 0.2886940889045958 }, { "epoch": 2.7235414834359424, "grad_norm": 0.35681733398488874, "learning_rate": 0.0001959676462951168, "loss": 3.078744888305664, "step": 4646, "token_acc": 0.28831233400060186 }, { "epoch": 2.7241278217531515, "grad_norm": 0.3028995387458006, "learning_rate": 0.00019596492133226433, "loss": 3.074610471725464, "step": 4647, "token_acc": 0.29086400567381804 }, { "epoch": 2.7247141600703606, "grad_norm": 0.33198462887991426, "learning_rate": 0.00019596219546795074, "loss": 3.0617027282714844, "step": 4648, "token_acc": 0.29290141340562464 }, { "epoch": 2.7253004983875697, "grad_norm": 0.3956395327224407, "learning_rate": 0.00019595946870220161, "loss": 3.0271615982055664, "step": 4649, "token_acc": 0.29771774592562267 }, { "epoch": 2.7258868367047784, "grad_norm": 0.36416418492854763, "learning_rate": 0.00019595674103504258, "loss": 3.021888256072998, "step": 4650, "token_acc": 0.29820106042201755 }, { "epoch": 2.7264731750219875, "grad_norm": 0.29589315395755755, "learning_rate": 0.00019595401246649927, "loss": 3.0241734981536865, "step": 4651, "token_acc": 0.29568094805468154 }, { "epoch": 2.7270595133391966, "grad_norm": 0.29836452049302997, "learning_rate": 0.00019595128299659733, "loss": 3.0576462745666504, "step": 4652, "token_acc": 0.29146687709566044 }, { "epoch": 2.7276458516564057, "grad_norm": 0.2730215587276771, "learning_rate": 0.00019594855262536237, "loss": 3.045133113861084, "step": 4653, "token_acc": 0.2934791028390864 }, { "epoch": 2.728232189973615, "grad_norm": 0.2905621118751534, "learning_rate": 0.00019594582135282006, "loss": 3.0950140953063965, "step": 4654, "token_acc": 0.2875177080850631 }, { "epoch": 2.728818528290824, "grad_norm": 0.2628072451628686, "learning_rate": 0.00019594308917899605, "loss": 3.0977776050567627, "step": 4655, "token_acc": 0.28793800059027674 }, { "epoch": 2.729404866608033, "grad_norm": 0.2558353075979535, "learning_rate": 0.00019594035610391597, "loss": 3.014267921447754, "step": 4656, "token_acc": 0.29720147806054054 }, { "epoch": 2.7299912049252417, "grad_norm": 0.33100191650068966, "learning_rate": 0.00019593762212760557, "loss": 3.0784945487976074, "step": 4657, "token_acc": 0.28895315997573273 }, { "epoch": 2.730577543242451, "grad_norm": 0.32806953792579385, "learning_rate": 0.00019593488725009044, "loss": 3.0503933429718018, "step": 4658, "token_acc": 0.2932190818854645 }, { "epoch": 2.73116388155966, "grad_norm": 0.31314419072789296, "learning_rate": 0.00019593215147139636, "loss": 3.0369620323181152, "step": 4659, "token_acc": 0.2955304721523686 }, { "epoch": 2.731750219876869, "grad_norm": 0.28879276370844303, "learning_rate": 0.00019592941479154897, "loss": 3.063556671142578, "step": 4660, "token_acc": 0.2921654838326351 }, { "epoch": 2.7323365581940777, "grad_norm": 0.2984387593755698, "learning_rate": 0.000195926677210574, "loss": 3.0611438751220703, "step": 4661, "token_acc": 0.29205801789978864 }, { "epoch": 2.732922896511287, "grad_norm": 0.2929929716247293, "learning_rate": 0.00019592393872849714, "loss": 3.013617515563965, "step": 4662, "token_acc": 0.29880121098368567 }, { "epoch": 2.733509234828496, "grad_norm": 0.37261649586524026, "learning_rate": 0.00019592119934534416, "loss": 3.077099323272705, "step": 4663, "token_acc": 0.28830960779811726 }, { "epoch": 2.734095573145705, "grad_norm": 0.35175936282198433, "learning_rate": 0.00019591845906114075, "loss": 3.044412851333618, "step": 4664, "token_acc": 0.2937069167404873 }, { "epoch": 2.734681911462914, "grad_norm": 0.3144549791936237, "learning_rate": 0.0001959157178759127, "loss": 3.0678317546844482, "step": 4665, "token_acc": 0.2916704256937016 }, { "epoch": 2.7352682497801233, "grad_norm": 0.29176557350581506, "learning_rate": 0.00019591297578968567, "loss": 3.073025703430176, "step": 4666, "token_acc": 0.2906773030120212 }, { "epoch": 2.7358545880973324, "grad_norm": 0.29566861411407064, "learning_rate": 0.00019591023280248553, "loss": 3.073455333709717, "step": 4667, "token_acc": 0.2911539056336219 }, { "epoch": 2.736440926414541, "grad_norm": 0.3009505906839044, "learning_rate": 0.00019590748891433796, "loss": 3.0105578899383545, "step": 4668, "token_acc": 0.2973537931574415 }, { "epoch": 2.73702726473175, "grad_norm": 0.33511579029209515, "learning_rate": 0.0001959047441252688, "loss": 3.106764316558838, "step": 4669, "token_acc": 0.2853260017353307 }, { "epoch": 2.7376136030489593, "grad_norm": 0.4605083147386973, "learning_rate": 0.0001959019984353038, "loss": 3.071453094482422, "step": 4670, "token_acc": 0.28911006520143356 }, { "epoch": 2.7381999413661684, "grad_norm": 0.4578590824992916, "learning_rate": 0.0001958992518444687, "loss": 3.030712842941284, "step": 4671, "token_acc": 0.2959021262043306 }, { "epoch": 2.738786279683377, "grad_norm": 0.26593354325576213, "learning_rate": 0.00019589650435278944, "loss": 3.0777077674865723, "step": 4672, "token_acc": 0.2885764418207303 }, { "epoch": 2.739372618000586, "grad_norm": 0.4433735877345629, "learning_rate": 0.00019589375596029167, "loss": 3.0942602157592773, "step": 4673, "token_acc": 0.287688164690784 }, { "epoch": 2.7399589563177953, "grad_norm": 0.3059161207637901, "learning_rate": 0.0001958910066670013, "loss": 3.0496675968170166, "step": 4674, "token_acc": 0.2961370736612304 }, { "epoch": 2.7405452946350044, "grad_norm": 0.3635916470076969, "learning_rate": 0.00019588825647294417, "loss": 3.0929830074310303, "step": 4675, "token_acc": 0.28623168349715716 }, { "epoch": 2.7411316329522135, "grad_norm": 0.26899822975510257, "learning_rate": 0.00019588550537814604, "loss": 3.0790436267852783, "step": 4676, "token_acc": 0.29015201586252476 }, { "epoch": 2.7417179712694226, "grad_norm": 0.3231045312810313, "learning_rate": 0.00019588275338263284, "loss": 3.0601134300231934, "step": 4677, "token_acc": 0.291321438902434 }, { "epoch": 2.7423043095866317, "grad_norm": 0.35540835698269974, "learning_rate": 0.00019588000048643034, "loss": 3.099900245666504, "step": 4678, "token_acc": 0.2856986295650722 }, { "epoch": 2.7428906479038404, "grad_norm": 0.3956783976685458, "learning_rate": 0.00019587724668956446, "loss": 3.09187912940979, "step": 4679, "token_acc": 0.28746296921526465 }, { "epoch": 2.7434769862210495, "grad_norm": 0.3443251424533783, "learning_rate": 0.00019587449199206104, "loss": 3.040811538696289, "step": 4680, "token_acc": 0.29417252970503904 }, { "epoch": 2.7440633245382586, "grad_norm": 0.3242456837977996, "learning_rate": 0.00019587173639394593, "loss": 3.1106135845184326, "step": 4681, "token_acc": 0.2858626656005841 }, { "epoch": 2.7446496628554677, "grad_norm": 0.27962492594436156, "learning_rate": 0.00019586897989524505, "loss": 3.0799636840820312, "step": 4682, "token_acc": 0.28810755073258026 }, { "epoch": 2.7452360011726764, "grad_norm": 0.3200563141330118, "learning_rate": 0.0001958662224959843, "loss": 3.022454023361206, "step": 4683, "token_acc": 0.2946517134149012 }, { "epoch": 2.7458223394898855, "grad_norm": 0.3470764414478528, "learning_rate": 0.00019586346419618958, "loss": 3.0411646366119385, "step": 4684, "token_acc": 0.2956099928516978 }, { "epoch": 2.7464086778070946, "grad_norm": 0.371575536355943, "learning_rate": 0.0001958607049958868, "loss": 3.0789613723754883, "step": 4685, "token_acc": 0.2882726959956508 }, { "epoch": 2.7469950161243037, "grad_norm": 0.36172382853085894, "learning_rate": 0.00019585794489510185, "loss": 3.064236640930176, "step": 4686, "token_acc": 0.2914867132110191 }, { "epoch": 2.747581354441513, "grad_norm": 0.2998610455800225, "learning_rate": 0.00019585518389386066, "loss": 3.029071807861328, "step": 4687, "token_acc": 0.2972378247066233 }, { "epoch": 2.748167692758722, "grad_norm": 0.3266438526671952, "learning_rate": 0.0001958524219921892, "loss": 3.0545945167541504, "step": 4688, "token_acc": 0.2934507244725807 }, { "epoch": 2.748754031075931, "grad_norm": 0.3390500727553841, "learning_rate": 0.0001958496591901134, "loss": 3.0861992835998535, "step": 4689, "token_acc": 0.2885668548319151 }, { "epoch": 2.7493403693931397, "grad_norm": 0.31174469725482884, "learning_rate": 0.00019584689548765924, "loss": 3.0544002056121826, "step": 4690, "token_acc": 0.2940541296376857 }, { "epoch": 2.749926707710349, "grad_norm": 0.3764127403414886, "learning_rate": 0.00019584413088485263, "loss": 3.046809673309326, "step": 4691, "token_acc": 0.2941862662349933 }, { "epoch": 2.750513046027558, "grad_norm": 0.35695470617247205, "learning_rate": 0.00019584136538171957, "loss": 3.065302848815918, "step": 4692, "token_acc": 0.29076216254163384 }, { "epoch": 2.751099384344767, "grad_norm": 0.3389711880486392, "learning_rate": 0.00019583859897828602, "loss": 3.098149299621582, "step": 4693, "token_acc": 0.28518565650365424 }, { "epoch": 2.7516857226619758, "grad_norm": 0.28084049582478204, "learning_rate": 0.00019583583167457797, "loss": 3.1127569675445557, "step": 4694, "token_acc": 0.2848683358794438 }, { "epoch": 2.752272060979185, "grad_norm": 0.3247790265293051, "learning_rate": 0.0001958330634706214, "loss": 3.0625367164611816, "step": 4695, "token_acc": 0.291680886594017 }, { "epoch": 2.752858399296394, "grad_norm": 0.3106951595498024, "learning_rate": 0.00019583029436644238, "loss": 3.033874988555908, "step": 4696, "token_acc": 0.29539137757927825 }, { "epoch": 2.753444737613603, "grad_norm": 0.29705095587870434, "learning_rate": 0.00019582752436206685, "loss": 3.070064067840576, "step": 4697, "token_acc": 0.2887197727197061 }, { "epoch": 2.754031075930812, "grad_norm": 0.2929945761325786, "learning_rate": 0.0001958247534575209, "loss": 3.112363338470459, "step": 4698, "token_acc": 0.28455431060983494 }, { "epoch": 2.7546174142480213, "grad_norm": 0.3359301507738248, "learning_rate": 0.00019582198165283047, "loss": 3.0615921020507812, "step": 4699, "token_acc": 0.29164272577066774 }, { "epoch": 2.75520375256523, "grad_norm": 0.30165111441009423, "learning_rate": 0.00019581920894802166, "loss": 3.0730247497558594, "step": 4700, "token_acc": 0.289022546472928 }, { "epoch": 2.755790090882439, "grad_norm": 0.2925426932606334, "learning_rate": 0.00019581643534312052, "loss": 3.063750743865967, "step": 4701, "token_acc": 0.2912387791741472 }, { "epoch": 2.756376429199648, "grad_norm": 0.3313376692642333, "learning_rate": 0.00019581366083815306, "loss": 3.0759363174438477, "step": 4702, "token_acc": 0.2897901469499947 }, { "epoch": 2.7569627675168573, "grad_norm": 0.2562860886490982, "learning_rate": 0.00019581088543314535, "loss": 3.0696358680725098, "step": 4703, "token_acc": 0.29079656358875594 }, { "epoch": 2.757549105834066, "grad_norm": 0.28867385766927933, "learning_rate": 0.0001958081091281235, "loss": 3.087944507598877, "step": 4704, "token_acc": 0.2875663123764376 }, { "epoch": 2.758135444151275, "grad_norm": 0.3305289556075944, "learning_rate": 0.0001958053319231136, "loss": 3.0573315620422363, "step": 4705, "token_acc": 0.2941450695322377 }, { "epoch": 2.758721782468484, "grad_norm": 0.26457058929173943, "learning_rate": 0.00019580255381814165, "loss": 3.0935044288635254, "step": 4706, "token_acc": 0.28809805744742284 }, { "epoch": 2.7593081207856933, "grad_norm": 0.33978038309946346, "learning_rate": 0.00019579977481323384, "loss": 3.050231456756592, "step": 4707, "token_acc": 0.2941964704463532 }, { "epoch": 2.7598944591029024, "grad_norm": 0.34472759426439287, "learning_rate": 0.00019579699490841624, "loss": 3.0440878868103027, "step": 4708, "token_acc": 0.2937197206604339 }, { "epoch": 2.7604807974201115, "grad_norm": 0.29485957217769715, "learning_rate": 0.00019579421410371493, "loss": 3.0662012100219727, "step": 4709, "token_acc": 0.2916552940435449 }, { "epoch": 2.7610671357373207, "grad_norm": 0.336811692543625, "learning_rate": 0.00019579143239915607, "loss": 3.0733067989349365, "step": 4710, "token_acc": 0.2893607446271376 }, { "epoch": 2.7616534740545293, "grad_norm": 0.3391421590046798, "learning_rate": 0.00019578864979476578, "loss": 3.1234397888183594, "step": 4711, "token_acc": 0.2837658190051358 }, { "epoch": 2.7622398123717384, "grad_norm": 0.3173781862702908, "learning_rate": 0.00019578586629057023, "loss": 3.077023506164551, "step": 4712, "token_acc": 0.28865633628787213 }, { "epoch": 2.7628261506889475, "grad_norm": 0.295856290220241, "learning_rate": 0.0001957830818865955, "loss": 3.073726177215576, "step": 4713, "token_acc": 0.2900482635746166 }, { "epoch": 2.7634124890061567, "grad_norm": 0.2988660181723311, "learning_rate": 0.0001957802965828678, "loss": 3.0881896018981934, "step": 4714, "token_acc": 0.287873678465978 }, { "epoch": 2.7639988273233653, "grad_norm": 0.33936644452665277, "learning_rate": 0.0001957775103794133, "loss": 3.0753698348999023, "step": 4715, "token_acc": 0.28874593145701705 }, { "epoch": 2.7645851656405744, "grad_norm": 0.32235436266320716, "learning_rate": 0.00019577472327625815, "loss": 3.050734519958496, "step": 4716, "token_acc": 0.292624627093809 }, { "epoch": 2.7651715039577835, "grad_norm": 0.3926682045040899, "learning_rate": 0.0001957719352734285, "loss": 3.082221269607544, "step": 4717, "token_acc": 0.2890802136508495 }, { "epoch": 2.7657578422749927, "grad_norm": 0.5463267742163548, "learning_rate": 0.0001957691463709506, "loss": 3.0633373260498047, "step": 4718, "token_acc": 0.2933403151084927 }, { "epoch": 2.7663441805922018, "grad_norm": 0.45305651236167416, "learning_rate": 0.0001957663565688506, "loss": 3.0627689361572266, "step": 4719, "token_acc": 0.29162365997302125 }, { "epoch": 2.766930518909411, "grad_norm": 0.36121123116610665, "learning_rate": 0.00019576356586715473, "loss": 3.0541634559631348, "step": 4720, "token_acc": 0.2935886254275925 }, { "epoch": 2.76751685722662, "grad_norm": 0.38688455449479037, "learning_rate": 0.00019576077426588923, "loss": 3.0042762756347656, "step": 4721, "token_acc": 0.30013287338699374 }, { "epoch": 2.7681031955438287, "grad_norm": 0.2947530000112747, "learning_rate": 0.0001957579817650803, "loss": 3.052546977996826, "step": 4722, "token_acc": 0.2927042164110335 }, { "epoch": 2.7686895338610378, "grad_norm": 0.32017641901143074, "learning_rate": 0.00019575518836475412, "loss": 3.0398685932159424, "step": 4723, "token_acc": 0.2960986305903769 }, { "epoch": 2.769275872178247, "grad_norm": 0.3504907476961808, "learning_rate": 0.000195752394064937, "loss": 3.0588197708129883, "step": 4724, "token_acc": 0.292848503970132 }, { "epoch": 2.769862210495456, "grad_norm": 0.2917694815622761, "learning_rate": 0.00019574959886565517, "loss": 3.063797950744629, "step": 4725, "token_acc": 0.29095620509643644 }, { "epoch": 2.7704485488126647, "grad_norm": 0.30190155906695665, "learning_rate": 0.00019574680276693486, "loss": 3.0904717445373535, "step": 4726, "token_acc": 0.2884245279006465 }, { "epoch": 2.771034887129874, "grad_norm": 0.312431659698821, "learning_rate": 0.00019574400576880238, "loss": 3.0925369262695312, "step": 4727, "token_acc": 0.28749324352417777 }, { "epoch": 2.771621225447083, "grad_norm": 0.3773457249388687, "learning_rate": 0.000195741207871284, "loss": 3.0582962036132812, "step": 4728, "token_acc": 0.29231622686425685 }, { "epoch": 2.772207563764292, "grad_norm": 0.3209095466808538, "learning_rate": 0.00019573840907440598, "loss": 3.128178358078003, "step": 4729, "token_acc": 0.2824768929654992 }, { "epoch": 2.772793902081501, "grad_norm": 0.29226061188838, "learning_rate": 0.00019573560937819457, "loss": 3.0923616886138916, "step": 4730, "token_acc": 0.2864712196533112 }, { "epoch": 2.7733802403987102, "grad_norm": 0.32371053912124587, "learning_rate": 0.00019573280878267614, "loss": 3.029703140258789, "step": 4731, "token_acc": 0.29724591218459495 }, { "epoch": 2.7739665787159193, "grad_norm": 0.2786054331978042, "learning_rate": 0.000195730007287877, "loss": 3.0493087768554688, "step": 4732, "token_acc": 0.294330019413535 }, { "epoch": 2.774552917033128, "grad_norm": 0.32943201248548615, "learning_rate": 0.0001957272048938234, "loss": 3.090531826019287, "step": 4733, "token_acc": 0.28691924350759895 }, { "epoch": 2.775139255350337, "grad_norm": 0.27186942100487144, "learning_rate": 0.00019572440160054173, "loss": 3.0443716049194336, "step": 4734, "token_acc": 0.29338803405018826 }, { "epoch": 2.7757255936675462, "grad_norm": 0.3302503932228583, "learning_rate": 0.00019572159740805832, "loss": 3.067739248275757, "step": 4735, "token_acc": 0.29002010226042696 }, { "epoch": 2.7763119319847553, "grad_norm": 0.2985261874916844, "learning_rate": 0.00019571879231639945, "loss": 3.0648317337036133, "step": 4736, "token_acc": 0.2914637576808632 }, { "epoch": 2.776898270301964, "grad_norm": 0.3310816357812853, "learning_rate": 0.00019571598632559152, "loss": 3.067718982696533, "step": 4737, "token_acc": 0.2905018476514384 }, { "epoch": 2.777484608619173, "grad_norm": 0.29818552734573334, "learning_rate": 0.00019571317943566086, "loss": 3.1026358604431152, "step": 4738, "token_acc": 0.2855509040896296 }, { "epoch": 2.7780709469363822, "grad_norm": 0.3330834669152819, "learning_rate": 0.00019571037164663389, "loss": 3.021570920944214, "step": 4739, "token_acc": 0.2981759041933874 }, { "epoch": 2.7786572852535913, "grad_norm": 0.3039648515280741, "learning_rate": 0.00019570756295853693, "loss": 3.03800106048584, "step": 4740, "token_acc": 0.2946683961484478 }, { "epoch": 2.7792436235708005, "grad_norm": 0.26935054939240355, "learning_rate": 0.00019570475337139635, "loss": 3.1004624366760254, "step": 4741, "token_acc": 0.286403310917846 }, { "epoch": 2.7798299618880096, "grad_norm": 0.29738923224375247, "learning_rate": 0.0001957019428852386, "loss": 3.0832347869873047, "step": 4742, "token_acc": 0.2880243846665558 }, { "epoch": 2.7804163002052187, "grad_norm": 0.27315316878689616, "learning_rate": 0.00019569913150009008, "loss": 3.057856559753418, "step": 4743, "token_acc": 0.29203503220936605 }, { "epoch": 2.7810026385224274, "grad_norm": 0.29283060585933907, "learning_rate": 0.00019569631921597717, "loss": 3.0598127841949463, "step": 4744, "token_acc": 0.29049602010457287 }, { "epoch": 2.7815889768396365, "grad_norm": 0.29953033051413813, "learning_rate": 0.00019569350603292627, "loss": 3.0425472259521484, "step": 4745, "token_acc": 0.29547998117877244 }, { "epoch": 2.7821753151568456, "grad_norm": 0.2697598650110707, "learning_rate": 0.00019569069195096386, "loss": 3.0947084426879883, "step": 4746, "token_acc": 0.2875841711050182 }, { "epoch": 2.7827616534740547, "grad_norm": 0.31858661201747285, "learning_rate": 0.00019568787697011632, "loss": 3.071944236755371, "step": 4747, "token_acc": 0.2898569201307825 }, { "epoch": 2.7833479917912634, "grad_norm": 0.3676013530382402, "learning_rate": 0.00019568506109041015, "loss": 3.077906608581543, "step": 4748, "token_acc": 0.28924578356454117 }, { "epoch": 2.7839343301084725, "grad_norm": 0.326779862425377, "learning_rate": 0.00019568224431187173, "loss": 3.077691078186035, "step": 4749, "token_acc": 0.28780931288470024 }, { "epoch": 2.7845206684256816, "grad_norm": 0.33694893861544417, "learning_rate": 0.00019567942663452757, "loss": 3.041159152984619, "step": 4750, "token_acc": 0.29606528199342097 }, { "epoch": 2.7851070067428907, "grad_norm": 0.3178259923850441, "learning_rate": 0.00019567660805840413, "loss": 3.1025707721710205, "step": 4751, "token_acc": 0.28807764538445485 }, { "epoch": 2.7856933450601, "grad_norm": 0.3658597927252303, "learning_rate": 0.00019567378858352785, "loss": 3.055793285369873, "step": 4752, "token_acc": 0.29271552055813477 }, { "epoch": 2.786279683377309, "grad_norm": 0.3376723299126635, "learning_rate": 0.0001956709682099253, "loss": 3.0499935150146484, "step": 4753, "token_acc": 0.29120883277893084 }, { "epoch": 2.7868660216945176, "grad_norm": 0.36056985141640185, "learning_rate": 0.0001956681469376229, "loss": 3.047048568725586, "step": 4754, "token_acc": 0.2925351547627261 }, { "epoch": 2.7874523600117267, "grad_norm": 0.33932736999124347, "learning_rate": 0.00019566532476664717, "loss": 3.090956211090088, "step": 4755, "token_acc": 0.28699214608487994 }, { "epoch": 2.788038698328936, "grad_norm": 0.3352132222736056, "learning_rate": 0.00019566250169702462, "loss": 3.1232051849365234, "step": 4756, "token_acc": 0.28255951601430795 }, { "epoch": 2.788625036646145, "grad_norm": 0.28133736988114944, "learning_rate": 0.0001956596777287818, "loss": 3.0788848400115967, "step": 4757, "token_acc": 0.28894088537464363 }, { "epoch": 2.7892113749633536, "grad_norm": 0.3331359371690295, "learning_rate": 0.00019565685286194518, "loss": 3.056391716003418, "step": 4758, "token_acc": 0.2908862787387981 }, { "epoch": 2.7897977132805627, "grad_norm": 0.28472302880336786, "learning_rate": 0.0001956540270965413, "loss": 3.076047897338867, "step": 4759, "token_acc": 0.2881463130435695 }, { "epoch": 2.790384051597772, "grad_norm": 0.3156466927795733, "learning_rate": 0.00019565120043259682, "loss": 3.0509047508239746, "step": 4760, "token_acc": 0.29341020199140505 }, { "epoch": 2.790970389914981, "grad_norm": 0.31371019360064517, "learning_rate": 0.00019564837287013815, "loss": 3.0290744304656982, "step": 4761, "token_acc": 0.29575254166229953 }, { "epoch": 2.79155672823219, "grad_norm": 0.281034923604501, "learning_rate": 0.0001956455444091919, "loss": 3.0546388626098633, "step": 4762, "token_acc": 0.2940789215878365 }, { "epoch": 2.792143066549399, "grad_norm": 0.311617018580613, "learning_rate": 0.00019564271504978465, "loss": 3.0565614700317383, "step": 4763, "token_acc": 0.2922941892768093 }, { "epoch": 2.7927294048666083, "grad_norm": 0.4114744474658279, "learning_rate": 0.000195639884791943, "loss": 3.0467729568481445, "step": 4764, "token_acc": 0.2934740312935201 }, { "epoch": 2.793315743183817, "grad_norm": 0.395083272653278, "learning_rate": 0.00019563705363569346, "loss": 3.1033642292022705, "step": 4765, "token_acc": 0.28690961750521465 }, { "epoch": 2.793902081501026, "grad_norm": 0.37157473582972894, "learning_rate": 0.0001956342215810627, "loss": 3.055727958679199, "step": 4766, "token_acc": 0.2921332124268661 }, { "epoch": 2.794488419818235, "grad_norm": 0.33450703465703785, "learning_rate": 0.0001956313886280773, "loss": 3.0712943077087402, "step": 4767, "token_acc": 0.29050169014157695 }, { "epoch": 2.7950747581354443, "grad_norm": 0.3466754361645007, "learning_rate": 0.00019562855477676388, "loss": 3.0822036266326904, "step": 4768, "token_acc": 0.2875308568883901 }, { "epoch": 2.795661096452653, "grad_norm": 0.3206547441146517, "learning_rate": 0.000195625720027149, "loss": 3.0682313442230225, "step": 4769, "token_acc": 0.2898575821500163 }, { "epoch": 2.796247434769862, "grad_norm": 0.3005907915558481, "learning_rate": 0.0001956228843792594, "loss": 3.093679428100586, "step": 4770, "token_acc": 0.2874054326286171 }, { "epoch": 2.796833773087071, "grad_norm": 0.2661451093039696, "learning_rate": 0.0001956200478331216, "loss": 3.107438087463379, "step": 4771, "token_acc": 0.2844080935730453 }, { "epoch": 2.7974201114042803, "grad_norm": 0.3434832824587024, "learning_rate": 0.00019561721038876235, "loss": 3.087347984313965, "step": 4772, "token_acc": 0.28813515411060936 }, { "epoch": 2.7980064497214894, "grad_norm": 0.3336395347608038, "learning_rate": 0.0001956143720462082, "loss": 3.065269947052002, "step": 4773, "token_acc": 0.29143902820112866 }, { "epoch": 2.7985927880386985, "grad_norm": 0.3183739760996455, "learning_rate": 0.0001956115328054859, "loss": 3.084470510482788, "step": 4774, "token_acc": 0.2878377561982711 }, { "epoch": 2.7991791263559076, "grad_norm": 0.31227590923188003, "learning_rate": 0.00019560869266662209, "loss": 3.079319715499878, "step": 4775, "token_acc": 0.2889203923034294 }, { "epoch": 2.7997654646731163, "grad_norm": 0.3549238363443517, "learning_rate": 0.00019560585162964344, "loss": 3.0619454383850098, "step": 4776, "token_acc": 0.29122783575408545 }, { "epoch": 2.8003518029903254, "grad_norm": 0.3927413516897163, "learning_rate": 0.00019560300969457663, "loss": 3.0831174850463867, "step": 4777, "token_acc": 0.28833676767296845 }, { "epoch": 2.8009381413075345, "grad_norm": 0.41221947619570604, "learning_rate": 0.0001956001668614484, "loss": 3.09385347366333, "step": 4778, "token_acc": 0.2855751419482141 }, { "epoch": 2.8015244796247436, "grad_norm": 0.3641670349002705, "learning_rate": 0.0001955973231302854, "loss": 3.0538108348846436, "step": 4779, "token_acc": 0.294092991467595 }, { "epoch": 2.8021108179419523, "grad_norm": 0.36265745122581533, "learning_rate": 0.00019559447850111438, "loss": 3.0987319946289062, "step": 4780, "token_acc": 0.28809029983106993 }, { "epoch": 2.8026971562591614, "grad_norm": 0.3891272704505942, "learning_rate": 0.00019559163297396207, "loss": 3.0733296871185303, "step": 4781, "token_acc": 0.28964453166740944 }, { "epoch": 2.8032834945763705, "grad_norm": 0.3929774321343106, "learning_rate": 0.00019558878654885514, "loss": 3.0486974716186523, "step": 4782, "token_acc": 0.2914900571203262 }, { "epoch": 2.8038698328935796, "grad_norm": 0.32330171386368967, "learning_rate": 0.0001955859392258204, "loss": 3.0741257667541504, "step": 4783, "token_acc": 0.28917372154970067 }, { "epoch": 2.8044561712107887, "grad_norm": 0.3919995427880726, "learning_rate": 0.00019558309100488455, "loss": 3.071486711502075, "step": 4784, "token_acc": 0.28951822002812655 }, { "epoch": 2.805042509527998, "grad_norm": 0.3322446673937404, "learning_rate": 0.00019558024188607434, "loss": 3.068291664123535, "step": 4785, "token_acc": 0.29114804392807553 }, { "epoch": 2.805628847845207, "grad_norm": 0.3997891503729205, "learning_rate": 0.00019557739186941658, "loss": 3.0418734550476074, "step": 4786, "token_acc": 0.2946515619287484 }, { "epoch": 2.8062151861624156, "grad_norm": 0.2781158824893155, "learning_rate": 0.00019557454095493802, "loss": 3.036565065383911, "step": 4787, "token_acc": 0.29455762202380176 }, { "epoch": 2.8068015244796247, "grad_norm": 0.3533572699291522, "learning_rate": 0.00019557168914266542, "loss": 3.0746285915374756, "step": 4788, "token_acc": 0.28938235880015517 }, { "epoch": 2.807387862796834, "grad_norm": 0.3484236456138922, "learning_rate": 0.00019556883643262558, "loss": 3.076582908630371, "step": 4789, "token_acc": 0.2894416045537373 }, { "epoch": 2.807974201114043, "grad_norm": 0.31151568791183704, "learning_rate": 0.00019556598282484533, "loss": 3.068746328353882, "step": 4790, "token_acc": 0.2907734137876514 }, { "epoch": 2.8085605394312516, "grad_norm": 0.30422662595008054, "learning_rate": 0.00019556312831935143, "loss": 3.0603930950164795, "step": 4791, "token_acc": 0.29182015923616245 }, { "epoch": 2.8091468777484607, "grad_norm": 0.2844521900280327, "learning_rate": 0.0001955602729161707, "loss": 3.0700931549072266, "step": 4792, "token_acc": 0.2885733871180363 }, { "epoch": 2.80973321606567, "grad_norm": 0.314068305913703, "learning_rate": 0.00019555741661533, "loss": 3.0719380378723145, "step": 4793, "token_acc": 0.29097032249040383 }, { "epoch": 2.810319554382879, "grad_norm": 0.2867334866449183, "learning_rate": 0.0001955545594168561, "loss": 3.0702767372131348, "step": 4794, "token_acc": 0.28948765022993145 }, { "epoch": 2.810905892700088, "grad_norm": 0.27354229014054054, "learning_rate": 0.0001955517013207759, "loss": 3.0938868522644043, "step": 4795, "token_acc": 0.28598251795463636 }, { "epoch": 2.811492231017297, "grad_norm": 0.361403078013731, "learning_rate": 0.00019554884232711624, "loss": 3.048142433166504, "step": 4796, "token_acc": 0.29303709294301006 }, { "epoch": 2.8120785693345063, "grad_norm": 0.32596841347359246, "learning_rate": 0.00019554598243590394, "loss": 3.0081427097320557, "step": 4797, "token_acc": 0.2989001133184032 }, { "epoch": 2.812664907651715, "grad_norm": 0.27486598572727083, "learning_rate": 0.00019554312164716587, "loss": 3.0953097343444824, "step": 4798, "token_acc": 0.2862303270854006 }, { "epoch": 2.813251245968924, "grad_norm": 0.2796793463258275, "learning_rate": 0.00019554025996092894, "loss": 3.0770645141601562, "step": 4799, "token_acc": 0.28943960565616056 }, { "epoch": 2.813837584286133, "grad_norm": 0.2928882554213848, "learning_rate": 0.00019553739737722002, "loss": 3.066865921020508, "step": 4800, "token_acc": 0.29224330236285273 }, { "epoch": 2.8144239226033423, "grad_norm": 0.30580552927601373, "learning_rate": 0.00019553453389606598, "loss": 3.0743274688720703, "step": 4801, "token_acc": 0.2898601247350462 }, { "epoch": 2.815010260920551, "grad_norm": 0.34627133378798947, "learning_rate": 0.0001955316695174937, "loss": 3.0507407188415527, "step": 4802, "token_acc": 0.2917941039038274 }, { "epoch": 2.81559659923776, "grad_norm": 0.29319270367760336, "learning_rate": 0.00019552880424153016, "loss": 3.068697929382324, "step": 4803, "token_acc": 0.29112078083927656 }, { "epoch": 2.816182937554969, "grad_norm": 0.2890990606826052, "learning_rate": 0.00019552593806820221, "loss": 3.0224251747131348, "step": 4804, "token_acc": 0.29567714049423427 }, { "epoch": 2.8167692758721783, "grad_norm": 0.31283062532229844, "learning_rate": 0.0001955230709975368, "loss": 3.0380756855010986, "step": 4805, "token_acc": 0.2946384078734272 }, { "epoch": 2.8173556141893874, "grad_norm": 0.3302896935973322, "learning_rate": 0.00019552020302956085, "loss": 3.0549888610839844, "step": 4806, "token_acc": 0.2918140181290734 }, { "epoch": 2.8179419525065965, "grad_norm": 0.33032486295032326, "learning_rate": 0.0001955173341643013, "loss": 3.055619716644287, "step": 4807, "token_acc": 0.29314283965790694 }, { "epoch": 2.818528290823805, "grad_norm": 0.3040012386436506, "learning_rate": 0.0001955144644017851, "loss": 3.1090478897094727, "step": 4808, "token_acc": 0.2845434293653076 }, { "epoch": 2.8191146291410143, "grad_norm": 0.33060042235189196, "learning_rate": 0.00019551159374203925, "loss": 3.035707473754883, "step": 4809, "token_acc": 0.29444163196717615 }, { "epoch": 2.8197009674582234, "grad_norm": 0.412898277380587, "learning_rate": 0.00019550872218509065, "loss": 3.043006181716919, "step": 4810, "token_acc": 0.2955802625062561 }, { "epoch": 2.8202873057754325, "grad_norm": 0.3190992896487028, "learning_rate": 0.00019550584973096634, "loss": 3.0928313732147217, "step": 4811, "token_acc": 0.28715416425356377 }, { "epoch": 2.820873644092641, "grad_norm": 0.3072258631131407, "learning_rate": 0.00019550297637969323, "loss": 3.0841221809387207, "step": 4812, "token_acc": 0.2886044743815788 }, { "epoch": 2.8214599824098503, "grad_norm": 0.319172103854892, "learning_rate": 0.00019550010213129838, "loss": 3.07167387008667, "step": 4813, "token_acc": 0.2896572915913528 }, { "epoch": 2.8220463207270594, "grad_norm": 0.2666530398333794, "learning_rate": 0.00019549722698580872, "loss": 3.0691330432891846, "step": 4814, "token_acc": 0.29123104117796034 }, { "epoch": 2.8226326590442685, "grad_norm": 0.25879114069297915, "learning_rate": 0.00019549435094325132, "loss": 3.086653232574463, "step": 4815, "token_acc": 0.2903096298883956 }, { "epoch": 2.8232189973614776, "grad_norm": 0.24183900721932308, "learning_rate": 0.00019549147400365317, "loss": 3.071302890777588, "step": 4816, "token_acc": 0.28998693729903535 }, { "epoch": 2.8238053356786867, "grad_norm": 0.3007085857779486, "learning_rate": 0.00019548859616704128, "loss": 3.063629388809204, "step": 4817, "token_acc": 0.291296979636463 }, { "epoch": 2.824391673995896, "grad_norm": 0.2988575184131992, "learning_rate": 0.00019548571743344275, "loss": 3.0706238746643066, "step": 4818, "token_acc": 0.29067878410667286 }, { "epoch": 2.8249780123131045, "grad_norm": 0.33728449152656953, "learning_rate": 0.00019548283780288452, "loss": 3.0786819458007812, "step": 4819, "token_acc": 0.28905718620833987 }, { "epoch": 2.8255643506303136, "grad_norm": 0.36388693238450476, "learning_rate": 0.00019547995727539372, "loss": 3.0740575790405273, "step": 4820, "token_acc": 0.2904994036197687 }, { "epoch": 2.8261506889475227, "grad_norm": 0.3013146052521194, "learning_rate": 0.00019547707585099738, "loss": 3.0459136962890625, "step": 4821, "token_acc": 0.29231553528306947 }, { "epoch": 2.826737027264732, "grad_norm": 0.3032751842751434, "learning_rate": 0.00019547419352972258, "loss": 3.024435043334961, "step": 4822, "token_acc": 0.29680993981551645 }, { "epoch": 2.8273233655819405, "grad_norm": 0.33773897338389397, "learning_rate": 0.0001954713103115964, "loss": 3.035207986831665, "step": 4823, "token_acc": 0.29501511728748153 }, { "epoch": 2.8279097038991496, "grad_norm": 0.3004819294348285, "learning_rate": 0.00019546842619664584, "loss": 3.0687451362609863, "step": 4824, "token_acc": 0.2916019065381826 }, { "epoch": 2.8284960422163588, "grad_norm": 0.3201179298297896, "learning_rate": 0.0001954655411848981, "loss": 3.0945825576782227, "step": 4825, "token_acc": 0.2866025715029788 }, { "epoch": 2.829082380533568, "grad_norm": 0.2804669391596908, "learning_rate": 0.00019546265527638026, "loss": 3.0753512382507324, "step": 4826, "token_acc": 0.28883611078124793 }, { "epoch": 2.829668718850777, "grad_norm": 0.27488218924690433, "learning_rate": 0.00019545976847111938, "loss": 3.03167462348938, "step": 4827, "token_acc": 0.2962654161951937 }, { "epoch": 2.830255057167986, "grad_norm": 0.3349400520603764, "learning_rate": 0.00019545688076914264, "loss": 3.0509636402130127, "step": 4828, "token_acc": 0.2910965147453083 }, { "epoch": 2.830841395485195, "grad_norm": 0.31931893238218856, "learning_rate": 0.00019545399217047712, "loss": 3.106464385986328, "step": 4829, "token_acc": 0.2845313687990928 }, { "epoch": 2.831427733802404, "grad_norm": 0.29040699046720536, "learning_rate": 0.00019545110267514995, "loss": 3.018885612487793, "step": 4830, "token_acc": 0.2979952538093374 }, { "epoch": 2.832014072119613, "grad_norm": 0.3311284952422099, "learning_rate": 0.00019544821228318832, "loss": 3.0981290340423584, "step": 4831, "token_acc": 0.2868815117329128 }, { "epoch": 2.832600410436822, "grad_norm": 0.3488209986385302, "learning_rate": 0.00019544532099461935, "loss": 3.032848358154297, "step": 4832, "token_acc": 0.29663976318771534 }, { "epoch": 2.833186748754031, "grad_norm": 0.4315055900502949, "learning_rate": 0.00019544242880947018, "loss": 3.0497515201568604, "step": 4833, "token_acc": 0.2940612020286376 }, { "epoch": 2.83377308707124, "grad_norm": 0.55374461309147, "learning_rate": 0.00019543953572776803, "loss": 3.0710690021514893, "step": 4834, "token_acc": 0.2905672003199758 }, { "epoch": 2.834359425388449, "grad_norm": 0.38188431680644663, "learning_rate": 0.00019543664174954004, "loss": 3.086817979812622, "step": 4835, "token_acc": 0.2890708811768873 }, { "epoch": 2.834945763705658, "grad_norm": 0.3797547727488267, "learning_rate": 0.00019543374687481342, "loss": 3.020171880722046, "step": 4836, "token_acc": 0.2982751157574406 }, { "epoch": 2.835532102022867, "grad_norm": 0.4167338356417922, "learning_rate": 0.0001954308511036153, "loss": 3.09006667137146, "step": 4837, "token_acc": 0.28785823232975155 }, { "epoch": 2.8361184403400763, "grad_norm": 0.3203852242666415, "learning_rate": 0.000195427954435973, "loss": 3.0870513916015625, "step": 4838, "token_acc": 0.28868300509821176 }, { "epoch": 2.8367047786572854, "grad_norm": 0.43809277582392286, "learning_rate": 0.00019542505687191358, "loss": 3.0553090572357178, "step": 4839, "token_acc": 0.29217339728712155 }, { "epoch": 2.8372911169744945, "grad_norm": 0.3376205603932334, "learning_rate": 0.0001954221584114644, "loss": 3.074392557144165, "step": 4840, "token_acc": 0.28855033528874713 }, { "epoch": 2.837877455291703, "grad_norm": 0.413718309111033, "learning_rate": 0.0001954192590546526, "loss": 3.065587282180786, "step": 4841, "token_acc": 0.28989833452806396 }, { "epoch": 2.8384637936089123, "grad_norm": 0.3795802555153408, "learning_rate": 0.00019541635880150545, "loss": 3.066976547241211, "step": 4842, "token_acc": 0.29106589305379943 }, { "epoch": 2.8390501319261214, "grad_norm": 0.3247764503371684, "learning_rate": 0.00019541345765205015, "loss": 3.0576696395874023, "step": 4843, "token_acc": 0.2903086962202711 }, { "epoch": 2.8396364702433305, "grad_norm": 0.4234376828592185, "learning_rate": 0.000195410555606314, "loss": 3.0026135444641113, "step": 4844, "token_acc": 0.2998630354012603 }, { "epoch": 2.840222808560539, "grad_norm": 0.3391443912458567, "learning_rate": 0.00019540765266432426, "loss": 3.068728446960449, "step": 4845, "token_acc": 0.2889766071419453 }, { "epoch": 2.8408091468777483, "grad_norm": 0.3663031667230038, "learning_rate": 0.0001954047488261082, "loss": 3.0565459728240967, "step": 4846, "token_acc": 0.2918244356064417 }, { "epoch": 2.8413954851949574, "grad_norm": 0.38458118818323517, "learning_rate": 0.00019540184409169308, "loss": 3.0283827781677246, "step": 4847, "token_acc": 0.29816596964184844 }, { "epoch": 2.8419818235121665, "grad_norm": 0.4213115324657733, "learning_rate": 0.00019539893846110616, "loss": 3.101609230041504, "step": 4848, "token_acc": 0.2860203744249679 }, { "epoch": 2.8425681618293757, "grad_norm": 0.3537049660591789, "learning_rate": 0.0001953960319343748, "loss": 3.1010212898254395, "step": 4849, "token_acc": 0.2846537919532161 }, { "epoch": 2.8431545001465848, "grad_norm": 0.381323900636401, "learning_rate": 0.00019539312451152623, "loss": 3.1073460578918457, "step": 4850, "token_acc": 0.28463405526512325 }, { "epoch": 2.843740838463794, "grad_norm": 0.3834835020609835, "learning_rate": 0.00019539021619258782, "loss": 3.0617198944091797, "step": 4851, "token_acc": 0.2928236820929369 }, { "epoch": 2.8443271767810026, "grad_norm": 0.4026338214935844, "learning_rate": 0.00019538730697758687, "loss": 3.08662748336792, "step": 4852, "token_acc": 0.28897779709329824 }, { "epoch": 2.8449135150982117, "grad_norm": 0.29185528775253, "learning_rate": 0.0001953843968665507, "loss": 3.0339646339416504, "step": 4853, "token_acc": 0.29589362239822775 }, { "epoch": 2.8454998534154208, "grad_norm": 0.4035163244853389, "learning_rate": 0.00019538148585950666, "loss": 3.072492837905884, "step": 4854, "token_acc": 0.28918785014672815 }, { "epoch": 2.84608619173263, "grad_norm": 0.28911351690481685, "learning_rate": 0.0001953785739564821, "loss": 3.093196392059326, "step": 4855, "token_acc": 0.2887643048261868 }, { "epoch": 2.8466725300498386, "grad_norm": 0.3287491819913607, "learning_rate": 0.00019537566115750433, "loss": 3.060621500015259, "step": 4856, "token_acc": 0.2912318012944219 }, { "epoch": 2.8472588683670477, "grad_norm": 0.3031585022389628, "learning_rate": 0.00019537274746260077, "loss": 3.0571258068084717, "step": 4857, "token_acc": 0.2920591592112806 }, { "epoch": 2.847845206684257, "grad_norm": 0.29596368297161246, "learning_rate": 0.00019536983287179877, "loss": 3.103498935699463, "step": 4858, "token_acc": 0.28440152299369487 }, { "epoch": 2.848431545001466, "grad_norm": 0.30920269739266804, "learning_rate": 0.00019536691738512567, "loss": 3.069467544555664, "step": 4859, "token_acc": 0.29090645894332035 }, { "epoch": 2.849017883318675, "grad_norm": 0.2717254454120263, "learning_rate": 0.00019536400100260892, "loss": 3.079941987991333, "step": 4860, "token_acc": 0.2911455223591263 }, { "epoch": 2.849604221635884, "grad_norm": 0.33628052562924493, "learning_rate": 0.00019536108372427587, "loss": 3.0555896759033203, "step": 4861, "token_acc": 0.290561073472341 }, { "epoch": 2.850190559953093, "grad_norm": 0.28501255791407487, "learning_rate": 0.00019535816555015396, "loss": 3.0917677879333496, "step": 4862, "token_acc": 0.2867813115093753 }, { "epoch": 2.850776898270302, "grad_norm": 0.3054068772088216, "learning_rate": 0.00019535524648027055, "loss": 3.0719780921936035, "step": 4863, "token_acc": 0.2909968498861029 }, { "epoch": 2.851363236587511, "grad_norm": 0.2985137496979471, "learning_rate": 0.0001953523265146531, "loss": 3.0235595703125, "step": 4864, "token_acc": 0.296651389974607 }, { "epoch": 2.85194957490472, "grad_norm": 0.2866215802214872, "learning_rate": 0.00019534940565332906, "loss": 3.054766893386841, "step": 4865, "token_acc": 0.2916961168018736 }, { "epoch": 2.852535913221929, "grad_norm": 0.24659156770918106, "learning_rate": 0.00019534648389632578, "loss": 3.0671842098236084, "step": 4866, "token_acc": 0.2920148172176907 }, { "epoch": 2.853122251539138, "grad_norm": 0.28445428017836805, "learning_rate": 0.00019534356124367084, "loss": 3.011157989501953, "step": 4867, "token_acc": 0.2980853068287034 }, { "epoch": 2.853708589856347, "grad_norm": 0.30445143180651935, "learning_rate": 0.00019534063769539157, "loss": 3.094839572906494, "step": 4868, "token_acc": 0.28696462776146087 }, { "epoch": 2.854294928173556, "grad_norm": 0.30367132114162815, "learning_rate": 0.0001953377132515155, "loss": 3.0809757709503174, "step": 4869, "token_acc": 0.28882139858954975 }, { "epoch": 2.8548812664907652, "grad_norm": 0.27759814970319696, "learning_rate": 0.00019533478791207008, "loss": 3.130516529083252, "step": 4870, "token_acc": 0.2818313419058565 }, { "epoch": 2.8554676048079743, "grad_norm": 0.29533898152526294, "learning_rate": 0.00019533186167708277, "loss": 3.101151466369629, "step": 4871, "token_acc": 0.28588318813882724 }, { "epoch": 2.8560539431251835, "grad_norm": 0.28787873888458543, "learning_rate": 0.0001953289345465811, "loss": 3.043135643005371, "step": 4872, "token_acc": 0.29304473538295606 }, { "epoch": 2.856640281442392, "grad_norm": 0.3224552936060447, "learning_rate": 0.00019532600652059256, "loss": 3.0701546669006348, "step": 4873, "token_acc": 0.2900308031035979 }, { "epoch": 2.8572266197596012, "grad_norm": 0.35021669424150276, "learning_rate": 0.00019532307759914463, "loss": 3.0508546829223633, "step": 4874, "token_acc": 0.29209976836406165 }, { "epoch": 2.8578129580768104, "grad_norm": 0.2601008953817544, "learning_rate": 0.00019532014778226483, "loss": 3.061588764190674, "step": 4875, "token_acc": 0.2907599404545861 }, { "epoch": 2.8583992963940195, "grad_norm": 0.36007255118109815, "learning_rate": 0.0001953172170699807, "loss": 3.061830997467041, "step": 4876, "token_acc": 0.29151114184880444 }, { "epoch": 2.858985634711228, "grad_norm": 0.3268574077839835, "learning_rate": 0.00019531428546231972, "loss": 3.0864903926849365, "step": 4877, "token_acc": 0.2878912831202473 }, { "epoch": 2.8595719730284372, "grad_norm": 0.3666618062626624, "learning_rate": 0.00019531135295930953, "loss": 3.0952935218811035, "step": 4878, "token_acc": 0.28681671687894156 }, { "epoch": 2.8601583113456464, "grad_norm": 0.2923946530845022, "learning_rate": 0.00019530841956097756, "loss": 3.061192035675049, "step": 4879, "token_acc": 0.2905892146982791 }, { "epoch": 2.8607446496628555, "grad_norm": 0.36756679019953775, "learning_rate": 0.00019530548526735145, "loss": 3.061936855316162, "step": 4880, "token_acc": 0.29129293371253556 }, { "epoch": 2.8613309879800646, "grad_norm": 0.31838698203652577, "learning_rate": 0.0001953025500784587, "loss": 3.118236541748047, "step": 4881, "token_acc": 0.28292039035494476 }, { "epoch": 2.8619173262972737, "grad_norm": 0.29507769731151284, "learning_rate": 0.00019529961399432694, "loss": 3.072221517562866, "step": 4882, "token_acc": 0.2905912732569688 }, { "epoch": 2.862503664614483, "grad_norm": 0.31811885578532173, "learning_rate": 0.00019529667701498373, "loss": 3.059858798980713, "step": 4883, "token_acc": 0.29262427393011936 }, { "epoch": 2.8630900029316915, "grad_norm": 0.35718945232677896, "learning_rate": 0.0001952937391404566, "loss": 3.08382511138916, "step": 4884, "token_acc": 0.28940702330568835 }, { "epoch": 2.8636763412489006, "grad_norm": 0.30314472896270445, "learning_rate": 0.00019529080037077327, "loss": 3.0534591674804688, "step": 4885, "token_acc": 0.2919510403791479 }, { "epoch": 2.8642626795661097, "grad_norm": 0.3514504752727815, "learning_rate": 0.00019528786070596124, "loss": 3.0765669345855713, "step": 4886, "token_acc": 0.28895427566556614 }, { "epoch": 2.864849017883319, "grad_norm": 0.4280942191060666, "learning_rate": 0.0001952849201460482, "loss": 3.042485237121582, "step": 4887, "token_acc": 0.2937603972823868 }, { "epoch": 2.8654353562005275, "grad_norm": 0.35443377793076597, "learning_rate": 0.00019528197869106165, "loss": 3.051652431488037, "step": 4888, "token_acc": 0.2934732877222476 }, { "epoch": 2.8660216945177366, "grad_norm": 0.3033428252202999, "learning_rate": 0.0001952790363410294, "loss": 3.0494492053985596, "step": 4889, "token_acc": 0.2940113398539273 }, { "epoch": 2.8666080328349457, "grad_norm": 0.37533714897006115, "learning_rate": 0.00019527609309597893, "loss": 3.1051902770996094, "step": 4890, "token_acc": 0.2859208554213232 }, { "epoch": 2.867194371152155, "grad_norm": 0.26587952960690314, "learning_rate": 0.00019527314895593796, "loss": 3.0404930114746094, "step": 4891, "token_acc": 0.29563901073068916 }, { "epoch": 2.867780709469364, "grad_norm": 0.34869614202683685, "learning_rate": 0.00019527020392093417, "loss": 3.031594753265381, "step": 4892, "token_acc": 0.29602170693022584 }, { "epoch": 2.868367047786573, "grad_norm": 0.3144740185604402, "learning_rate": 0.00019526725799099516, "loss": 3.0938291549682617, "step": 4893, "token_acc": 0.2881541375775532 }, { "epoch": 2.868953386103782, "grad_norm": 0.3226685545681531, "learning_rate": 0.00019526431116614863, "loss": 3.060157060623169, "step": 4894, "token_acc": 0.2908762123118127 }, { "epoch": 2.869539724420991, "grad_norm": 0.2919388821578318, "learning_rate": 0.0001952613634464223, "loss": 3.099207878112793, "step": 4895, "token_acc": 0.2872777432926409 }, { "epoch": 2.8701260627382, "grad_norm": 0.3223641289830053, "learning_rate": 0.00019525841483184378, "loss": 3.011539936065674, "step": 4896, "token_acc": 0.29779659992801577 }, { "epoch": 2.870712401055409, "grad_norm": 0.3255575485130732, "learning_rate": 0.00019525546532244084, "loss": 3.0442047119140625, "step": 4897, "token_acc": 0.29523556693786407 }, { "epoch": 2.871298739372618, "grad_norm": 0.3284871722886573, "learning_rate": 0.0001952525149182412, "loss": 3.055866241455078, "step": 4898, "token_acc": 0.2915886285875718 }, { "epoch": 2.871885077689827, "grad_norm": 0.3227825198785674, "learning_rate": 0.00019524956361927247, "loss": 3.06754994392395, "step": 4899, "token_acc": 0.2899600570396618 }, { "epoch": 2.872471416007036, "grad_norm": 0.30161036823209014, "learning_rate": 0.0001952466114255625, "loss": 3.0585498809814453, "step": 4900, "token_acc": 0.2912838015227094 }, { "epoch": 2.873057754324245, "grad_norm": 0.3018326401099606, "learning_rate": 0.0001952436583371389, "loss": 3.045538902282715, "step": 4901, "token_acc": 0.29375840204788595 }, { "epoch": 2.873644092641454, "grad_norm": 0.2875467379991328, "learning_rate": 0.00019524070435402954, "loss": 3.0933055877685547, "step": 4902, "token_acc": 0.2868314807846806 }, { "epoch": 2.8742304309586633, "grad_norm": 0.2966062176744263, "learning_rate": 0.00019523774947626205, "loss": 3.0361576080322266, "step": 4903, "token_acc": 0.29406529499450806 }, { "epoch": 2.8748167692758724, "grad_norm": 0.30411610714541404, "learning_rate": 0.00019523479370386426, "loss": 3.090198040008545, "step": 4904, "token_acc": 0.28629630216001983 }, { "epoch": 2.875403107593081, "grad_norm": 0.28227136568442396, "learning_rate": 0.0001952318370368639, "loss": 3.0773372650146484, "step": 4905, "token_acc": 0.2884729404570411 }, { "epoch": 2.87598944591029, "grad_norm": 0.33037340078595684, "learning_rate": 0.00019522887947528877, "loss": 3.0895557403564453, "step": 4906, "token_acc": 0.2874898352918666 }, { "epoch": 2.8765757842274993, "grad_norm": 0.31841402243589145, "learning_rate": 0.00019522592101916663, "loss": 3.0663156509399414, "step": 4907, "token_acc": 0.2916984224011237 }, { "epoch": 2.8771621225447084, "grad_norm": 0.3086965140967257, "learning_rate": 0.0001952229616685253, "loss": 3.0425424575805664, "step": 4908, "token_acc": 0.2942559235868207 }, { "epoch": 2.8777484608619175, "grad_norm": 0.28716991918330004, "learning_rate": 0.00019522000142339254, "loss": 3.0544233322143555, "step": 4909, "token_acc": 0.2911381364168764 }, { "epoch": 2.878334799179126, "grad_norm": 0.28715482071723203, "learning_rate": 0.00019521704028379618, "loss": 3.0353474617004395, "step": 4910, "token_acc": 0.29551806468363256 }, { "epoch": 2.8789211374963353, "grad_norm": 0.3197001284280265, "learning_rate": 0.00019521407824976404, "loss": 3.086859703063965, "step": 4911, "token_acc": 0.2868983155202627 }, { "epoch": 2.8795074758135444, "grad_norm": 0.306877706993746, "learning_rate": 0.00019521111532132395, "loss": 3.0582661628723145, "step": 4912, "token_acc": 0.29250594903416466 }, { "epoch": 2.8800938141307535, "grad_norm": 0.32556874648938794, "learning_rate": 0.0001952081514985037, "loss": 3.070643901824951, "step": 4913, "token_acc": 0.29001512272674396 }, { "epoch": 2.8806801524479626, "grad_norm": 0.36671361547295483, "learning_rate": 0.0001952051867813312, "loss": 3.057034492492676, "step": 4914, "token_acc": 0.2897786822691427 }, { "epoch": 2.8812664907651717, "grad_norm": 0.3048496487672765, "learning_rate": 0.00019520222116983422, "loss": 3.0235021114349365, "step": 4915, "token_acc": 0.29852030193280393 }, { "epoch": 2.8818528290823804, "grad_norm": 0.2638143710500929, "learning_rate": 0.00019519925466404068, "loss": 3.0681281089782715, "step": 4916, "token_acc": 0.2916421432134477 }, { "epoch": 2.8824391673995895, "grad_norm": 0.2982293147052228, "learning_rate": 0.0001951962872639784, "loss": 3.0866174697875977, "step": 4917, "token_acc": 0.28765797049066055 }, { "epoch": 2.8830255057167986, "grad_norm": 0.3540207206095341, "learning_rate": 0.0001951933189696753, "loss": 3.0931448936462402, "step": 4918, "token_acc": 0.2883862402306779 }, { "epoch": 2.8836118440340077, "grad_norm": 0.31971507281384876, "learning_rate": 0.00019519034978115926, "loss": 3.0571348667144775, "step": 4919, "token_acc": 0.2932009103208147 }, { "epoch": 2.8841981823512164, "grad_norm": 0.27405814727918415, "learning_rate": 0.00019518737969845813, "loss": 3.041811943054199, "step": 4920, "token_acc": 0.29337055352582153 }, { "epoch": 2.8847845206684255, "grad_norm": 0.36428684911126924, "learning_rate": 0.00019518440872159985, "loss": 3.085550308227539, "step": 4921, "token_acc": 0.29055522320903576 }, { "epoch": 2.8853708589856346, "grad_norm": 0.2700458102631939, "learning_rate": 0.0001951814368506123, "loss": 3.027010917663574, "step": 4922, "token_acc": 0.2973262750444045 }, { "epoch": 2.8859571973028437, "grad_norm": 0.2775340947246322, "learning_rate": 0.00019517846408552344, "loss": 3.061241865158081, "step": 4923, "token_acc": 0.29162124362526387 }, { "epoch": 2.886543535620053, "grad_norm": 0.25164059915431164, "learning_rate": 0.0001951754904263611, "loss": 3.051241159439087, "step": 4924, "token_acc": 0.2936431476893905 }, { "epoch": 2.887129873937262, "grad_norm": 0.28960076649361527, "learning_rate": 0.00019517251587315333, "loss": 3.048980474472046, "step": 4925, "token_acc": 0.29265462372831214 }, { "epoch": 2.887716212254471, "grad_norm": 0.30774919758035363, "learning_rate": 0.000195169540425928, "loss": 3.069098472595215, "step": 4926, "token_acc": 0.29126203282379115 }, { "epoch": 2.8883025505716797, "grad_norm": 0.2542348221507696, "learning_rate": 0.00019516656408471308, "loss": 3.070157289505005, "step": 4927, "token_acc": 0.29073473255046084 }, { "epoch": 2.888888888888889, "grad_norm": 0.30542715171935125, "learning_rate": 0.00019516358684953654, "loss": 3.052456855773926, "step": 4928, "token_acc": 0.29356946773846526 }, { "epoch": 2.889475227206098, "grad_norm": 0.30027747246176606, "learning_rate": 0.00019516060872042633, "loss": 3.0504140853881836, "step": 4929, "token_acc": 0.2912003191236374 }, { "epoch": 2.890061565523307, "grad_norm": 0.3218139644405959, "learning_rate": 0.00019515762969741043, "loss": 3.042534112930298, "step": 4930, "token_acc": 0.2941679956501266 }, { "epoch": 2.8906479038405157, "grad_norm": 0.35429420847453924, "learning_rate": 0.00019515464978051684, "loss": 3.0787360668182373, "step": 4931, "token_acc": 0.2892535280844871 }, { "epoch": 2.891234242157725, "grad_norm": 0.3075804671801152, "learning_rate": 0.00019515166896977353, "loss": 3.075737953186035, "step": 4932, "token_acc": 0.28799967496686185 }, { "epoch": 2.891820580474934, "grad_norm": 0.3109209487414417, "learning_rate": 0.0001951486872652085, "loss": 3.0373785495758057, "step": 4933, "token_acc": 0.2958004147090475 }, { "epoch": 2.892406918792143, "grad_norm": 0.40040084121628716, "learning_rate": 0.00019514570466684975, "loss": 3.091073989868164, "step": 4934, "token_acc": 0.28876125636003774 }, { "epoch": 2.892993257109352, "grad_norm": 0.3575109866455189, "learning_rate": 0.00019514272117472536, "loss": 3.065265655517578, "step": 4935, "token_acc": 0.29073600591763554 }, { "epoch": 2.8935795954265613, "grad_norm": 0.28472537333202524, "learning_rate": 0.0001951397367888633, "loss": 3.0574464797973633, "step": 4936, "token_acc": 0.2931033112266933 }, { "epoch": 2.8941659337437704, "grad_norm": 0.3784304959634468, "learning_rate": 0.0001951367515092916, "loss": 3.0993096828460693, "step": 4937, "token_acc": 0.28689739070761267 }, { "epoch": 2.894752272060979, "grad_norm": 0.32214704026602126, "learning_rate": 0.00019513376533603834, "loss": 3.0546460151672363, "step": 4938, "token_acc": 0.29124363346357707 }, { "epoch": 2.895338610378188, "grad_norm": 0.37367857818457406, "learning_rate": 0.0001951307782691315, "loss": 3.0816681385040283, "step": 4939, "token_acc": 0.2890486828017395 }, { "epoch": 2.8959249486953973, "grad_norm": 0.34377161438503284, "learning_rate": 0.00019512779030859923, "loss": 3.0321311950683594, "step": 4940, "token_acc": 0.29831264256548357 }, { "epoch": 2.8965112870126064, "grad_norm": 0.30743489572237015, "learning_rate": 0.00019512480145446955, "loss": 3.0444631576538086, "step": 4941, "token_acc": 0.2953198061684297 }, { "epoch": 2.897097625329815, "grad_norm": 0.345661441854576, "learning_rate": 0.00019512181170677054, "loss": 3.0745506286621094, "step": 4942, "token_acc": 0.2908424560486967 }, { "epoch": 2.897683963647024, "grad_norm": 0.28453720045887443, "learning_rate": 0.00019511882106553027, "loss": 3.0129456520080566, "step": 4943, "token_acc": 0.298100760931167 }, { "epoch": 2.8982703019642333, "grad_norm": 0.33390322793680943, "learning_rate": 0.00019511582953077688, "loss": 3.0728859901428223, "step": 4944, "token_acc": 0.2920691026610161 }, { "epoch": 2.8988566402814424, "grad_norm": 0.2989739154637284, "learning_rate": 0.00019511283710253844, "loss": 3.0774335861206055, "step": 4945, "token_acc": 0.28968851320055583 }, { "epoch": 2.8994429785986515, "grad_norm": 0.3056859449014606, "learning_rate": 0.00019510984378084303, "loss": 3.0677621364593506, "step": 4946, "token_acc": 0.2913852132876839 }, { "epoch": 2.9000293169158606, "grad_norm": 0.31106774588008657, "learning_rate": 0.0001951068495657188, "loss": 3.0612406730651855, "step": 4947, "token_acc": 0.2904933265799153 }, { "epoch": 2.9006156552330697, "grad_norm": 0.3431997253882104, "learning_rate": 0.00019510385445719393, "loss": 3.0397167205810547, "step": 4948, "token_acc": 0.29528332380218303 }, { "epoch": 2.9012019935502784, "grad_norm": 0.35228440865085725, "learning_rate": 0.00019510085845529646, "loss": 3.114530563354492, "step": 4949, "token_acc": 0.28503666512518994 }, { "epoch": 2.9017883318674875, "grad_norm": 0.3439863715588709, "learning_rate": 0.0001950978615600546, "loss": 3.1159915924072266, "step": 4950, "token_acc": 0.28478790405758303 }, { "epoch": 2.9023746701846966, "grad_norm": 0.2871610711295451, "learning_rate": 0.00019509486377149643, "loss": 3.0912458896636963, "step": 4951, "token_acc": 0.2878681893324321 }, { "epoch": 2.9029610085019057, "grad_norm": 0.26930918905170254, "learning_rate": 0.0001950918650896502, "loss": 3.057864189147949, "step": 4952, "token_acc": 0.2929889837309404 }, { "epoch": 2.9035473468191144, "grad_norm": 0.2974530225849852, "learning_rate": 0.00019508886551454401, "loss": 3.09619140625, "step": 4953, "token_acc": 0.2869263498501634 }, { "epoch": 2.9041336851363235, "grad_norm": 0.3568310165975505, "learning_rate": 0.00019508586504620606, "loss": 3.0311264991760254, "step": 4954, "token_acc": 0.29708723152446814 }, { "epoch": 2.9047200234535326, "grad_norm": 0.3554506512897541, "learning_rate": 0.00019508286368466457, "loss": 3.0833358764648438, "step": 4955, "token_acc": 0.28782082762542327 }, { "epoch": 2.9053063617707418, "grad_norm": 0.28909144020956834, "learning_rate": 0.00019507986142994769, "loss": 3.087094783782959, "step": 4956, "token_acc": 0.28886779879008523 }, { "epoch": 2.905892700087951, "grad_norm": 0.3578905680315287, "learning_rate": 0.00019507685828208362, "loss": 3.0739173889160156, "step": 4957, "token_acc": 0.2886727387596083 }, { "epoch": 2.90647903840516, "grad_norm": 0.37917783175413816, "learning_rate": 0.00019507385424110058, "loss": 3.082059144973755, "step": 4958, "token_acc": 0.28770687905213665 }, { "epoch": 2.9070653767223686, "grad_norm": 0.37415863577809955, "learning_rate": 0.0001950708493070268, "loss": 3.061110496520996, "step": 4959, "token_acc": 0.2916764963937051 }, { "epoch": 2.9076517150395778, "grad_norm": 0.27267786356567114, "learning_rate": 0.0001950678434798905, "loss": 3.092258930206299, "step": 4960, "token_acc": 0.2866734228527996 }, { "epoch": 2.908238053356787, "grad_norm": 0.3462552538764763, "learning_rate": 0.00019506483675971992, "loss": 3.0762271881103516, "step": 4961, "token_acc": 0.2893115142997401 }, { "epoch": 2.908824391673996, "grad_norm": 0.3393021951700508, "learning_rate": 0.00019506182914654332, "loss": 3.097916841506958, "step": 4962, "token_acc": 0.2852461936180077 }, { "epoch": 2.909410729991205, "grad_norm": 0.26711967742588283, "learning_rate": 0.00019505882064038888, "loss": 3.0462584495544434, "step": 4963, "token_acc": 0.2939512233044212 }, { "epoch": 2.9099970683084138, "grad_norm": 0.3123835303399173, "learning_rate": 0.00019505581124128497, "loss": 3.0573606491088867, "step": 4964, "token_acc": 0.2918963629969737 }, { "epoch": 2.910583406625623, "grad_norm": 0.30336828617282463, "learning_rate": 0.00019505280094925977, "loss": 3.061286449432373, "step": 4965, "token_acc": 0.2926676760734421 }, { "epoch": 2.911169744942832, "grad_norm": 0.30746302459703834, "learning_rate": 0.0001950497897643416, "loss": 3.0631346702575684, "step": 4966, "token_acc": 0.29078807715031135 }, { "epoch": 2.911756083260041, "grad_norm": 0.265751204133962, "learning_rate": 0.00019504677768655872, "loss": 3.035701274871826, "step": 4967, "token_acc": 0.29216665782285595 }, { "epoch": 2.91234242157725, "grad_norm": 0.30833498572244683, "learning_rate": 0.00019504376471593947, "loss": 3.0718464851379395, "step": 4968, "token_acc": 0.28913336049047866 }, { "epoch": 2.9129287598944593, "grad_norm": 0.291196404745855, "learning_rate": 0.0001950407508525121, "loss": 3.085510730743408, "step": 4969, "token_acc": 0.2885935298652277 }, { "epoch": 2.913515098211668, "grad_norm": 0.2515015882422125, "learning_rate": 0.00019503773609630499, "loss": 3.0286383628845215, "step": 4970, "token_acc": 0.29546393585807607 }, { "epoch": 2.914101436528877, "grad_norm": 0.3035115624745694, "learning_rate": 0.00019503472044734633, "loss": 3.0399529933929443, "step": 4971, "token_acc": 0.2947626763997923 }, { "epoch": 2.914687774846086, "grad_norm": 0.4358318558998412, "learning_rate": 0.0001950317039056646, "loss": 3.006024122238159, "step": 4972, "token_acc": 0.2992748846407383 }, { "epoch": 2.9152741131632953, "grad_norm": 0.41211619124687177, "learning_rate": 0.00019502868647128805, "loss": 3.0472846031188965, "step": 4973, "token_acc": 0.29414905003660313 }, { "epoch": 2.915860451480504, "grad_norm": 0.33289942951443097, "learning_rate": 0.00019502566814424505, "loss": 3.0885910987854004, "step": 4974, "token_acc": 0.2887135983324472 }, { "epoch": 2.916446789797713, "grad_norm": 0.33744657897442476, "learning_rate": 0.00019502264892456395, "loss": 3.0814619064331055, "step": 4975, "token_acc": 0.28835315324661853 }, { "epoch": 2.917033128114922, "grad_norm": 0.28690395825190873, "learning_rate": 0.00019501962881227308, "loss": 3.0762784481048584, "step": 4976, "token_acc": 0.2885173023490781 }, { "epoch": 2.9176194664321313, "grad_norm": 0.36021876624438814, "learning_rate": 0.00019501660780740085, "loss": 3.0600881576538086, "step": 4977, "token_acc": 0.29150309437169647 }, { "epoch": 2.9182058047493404, "grad_norm": 0.30353187066328163, "learning_rate": 0.00019501358590997563, "loss": 3.0010123252868652, "step": 4978, "token_acc": 0.30099704091825774 }, { "epoch": 2.9187921430665495, "grad_norm": 0.3269541427919076, "learning_rate": 0.0001950105631200258, "loss": 3.0445008277893066, "step": 4979, "token_acc": 0.2922260121766416 }, { "epoch": 2.9193784813837587, "grad_norm": 0.34086756024199444, "learning_rate": 0.00019500753943757975, "loss": 3.034757614135742, "step": 4980, "token_acc": 0.2958107931135221 }, { "epoch": 2.9199648197009673, "grad_norm": 0.32390192531144235, "learning_rate": 0.00019500451486266593, "loss": 3.070679187774658, "step": 4981, "token_acc": 0.2914003932653312 }, { "epoch": 2.9205511580181764, "grad_norm": 0.36074936740183655, "learning_rate": 0.0001950014893953127, "loss": 3.053281784057617, "step": 4982, "token_acc": 0.2919729868353565 }, { "epoch": 2.9211374963353856, "grad_norm": 0.2739619090305267, "learning_rate": 0.00019499846303554845, "loss": 3.0226783752441406, "step": 4983, "token_acc": 0.2965813365354875 }, { "epoch": 2.9217238346525947, "grad_norm": 0.43881135421249245, "learning_rate": 0.0001949954357834017, "loss": 3.064603805541992, "step": 4984, "token_acc": 0.2913822130718987 }, { "epoch": 2.9223101729698033, "grad_norm": 0.3433560664286759, "learning_rate": 0.0001949924076389008, "loss": 3.0674147605895996, "step": 4985, "token_acc": 0.29210365103384 }, { "epoch": 2.9228965112870124, "grad_norm": 0.3291007238750204, "learning_rate": 0.00019498937860207426, "loss": 3.0799214839935303, "step": 4986, "token_acc": 0.2903882085378998 }, { "epoch": 2.9234828496042216, "grad_norm": 0.4141460207208036, "learning_rate": 0.0001949863486729505, "loss": 3.097292900085449, "step": 4987, "token_acc": 0.2853096206524446 }, { "epoch": 2.9240691879214307, "grad_norm": 0.3837779918947824, "learning_rate": 0.000194983317851558, "loss": 3.061333656311035, "step": 4988, "token_acc": 0.29114903311343077 }, { "epoch": 2.92465552623864, "grad_norm": 0.28843549687321396, "learning_rate": 0.0001949802861379252, "loss": 3.0804803371429443, "step": 4989, "token_acc": 0.28959805897801616 }, { "epoch": 2.925241864555849, "grad_norm": 0.34539893993096815, "learning_rate": 0.00019497725353208062, "loss": 3.0722172260284424, "step": 4990, "token_acc": 0.28805101818099715 }, { "epoch": 2.925828202873058, "grad_norm": 0.29860061980350566, "learning_rate": 0.0001949742200340527, "loss": 3.0547733306884766, "step": 4991, "token_acc": 0.29433025513588645 }, { "epoch": 2.9264145411902667, "grad_norm": 0.284183977617941, "learning_rate": 0.00019497118564387001, "loss": 3.0908350944519043, "step": 4992, "token_acc": 0.28759007384174856 }, { "epoch": 2.927000879507476, "grad_norm": 0.31597819335816807, "learning_rate": 0.00019496815036156096, "loss": 3.014202117919922, "step": 4993, "token_acc": 0.2978801276999319 }, { "epoch": 2.927587217824685, "grad_norm": 0.3237899926362453, "learning_rate": 0.00019496511418715413, "loss": 3.0479018688201904, "step": 4994, "token_acc": 0.29319137206257323 }, { "epoch": 2.928173556141894, "grad_norm": 0.3313212970970978, "learning_rate": 0.00019496207712067803, "loss": 3.073309898376465, "step": 4995, "token_acc": 0.28928840993484645 }, { "epoch": 2.9287598944591027, "grad_norm": 0.33032164822963594, "learning_rate": 0.00019495903916216115, "loss": 3.0593535900115967, "step": 4996, "token_acc": 0.2924629058940183 }, { "epoch": 2.929346232776312, "grad_norm": 0.3187986021205402, "learning_rate": 0.00019495600031163205, "loss": 3.0064120292663574, "step": 4997, "token_acc": 0.29896513334345426 }, { "epoch": 2.929932571093521, "grad_norm": 0.32373557724083457, "learning_rate": 0.0001949529605691193, "loss": 3.0537562370300293, "step": 4998, "token_acc": 0.2930048143534549 }, { "epoch": 2.93051890941073, "grad_norm": 0.29210231839052564, "learning_rate": 0.00019494991993465144, "loss": 3.0679166316986084, "step": 4999, "token_acc": 0.28981495535312324 }, { "epoch": 2.931105247727939, "grad_norm": 0.2850504935544495, "learning_rate": 0.00019494687840825706, "loss": 3.048098087310791, "step": 5000, "token_acc": 0.29239563440727734 }, { "epoch": 2.9316915860451482, "grad_norm": 0.3329465506136974, "learning_rate": 0.00019494383598996464, "loss": 3.0638246536254883, "step": 5001, "token_acc": 0.29252030057290623 }, { "epoch": 2.9322779243623573, "grad_norm": 0.303197221270058, "learning_rate": 0.00019494079267980285, "loss": 3.062152862548828, "step": 5002, "token_acc": 0.2916552067024923 }, { "epoch": 2.932864262679566, "grad_norm": 0.2963560862066697, "learning_rate": 0.00019493774847780025, "loss": 3.0843563079833984, "step": 5003, "token_acc": 0.28678526148345895 }, { "epoch": 2.933450600996775, "grad_norm": 0.26764886399433274, "learning_rate": 0.0001949347033839854, "loss": 3.060847282409668, "step": 5004, "token_acc": 0.290324684132002 }, { "epoch": 2.9340369393139842, "grad_norm": 0.3610937398613952, "learning_rate": 0.00019493165739838697, "loss": 3.061427354812622, "step": 5005, "token_acc": 0.2920216207299187 }, { "epoch": 2.9346232776311933, "grad_norm": 0.31418550079421076, "learning_rate": 0.00019492861052103355, "loss": 3.0480644702911377, "step": 5006, "token_acc": 0.2951172609275392 }, { "epoch": 2.935209615948402, "grad_norm": 0.3278359143463752, "learning_rate": 0.00019492556275195371, "loss": 3.105445384979248, "step": 5007, "token_acc": 0.2860137644330093 }, { "epoch": 2.935795954265611, "grad_norm": 0.3219034016040833, "learning_rate": 0.00019492251409117617, "loss": 3.0747108459472656, "step": 5008, "token_acc": 0.28919790461936185 }, { "epoch": 2.9363822925828202, "grad_norm": 0.2899224127215569, "learning_rate": 0.0001949194645387295, "loss": 3.0997304916381836, "step": 5009, "token_acc": 0.2857688496435602 }, { "epoch": 2.9369686309000294, "grad_norm": 0.3308220068886812, "learning_rate": 0.00019491641409464237, "loss": 3.0625200271606445, "step": 5010, "token_acc": 0.2905061745436784 }, { "epoch": 2.9375549692172385, "grad_norm": 0.27756265092410454, "learning_rate": 0.00019491336275894342, "loss": 3.0758347511291504, "step": 5011, "token_acc": 0.29020997669296045 }, { "epoch": 2.9381413075344476, "grad_norm": 0.27831303293579907, "learning_rate": 0.0001949103105316613, "loss": 3.0570194721221924, "step": 5012, "token_acc": 0.2916499981816198 }, { "epoch": 2.9387276458516562, "grad_norm": 0.39776774872097453, "learning_rate": 0.00019490725741282475, "loss": 3.0666885375976562, "step": 5013, "token_acc": 0.2909821121610218 }, { "epoch": 2.9393139841688654, "grad_norm": 0.28961615331830315, "learning_rate": 0.0001949042034024624, "loss": 3.0699684619903564, "step": 5014, "token_acc": 0.2905948440215713 }, { "epoch": 2.9399003224860745, "grad_norm": 0.35643917902424743, "learning_rate": 0.00019490114850060294, "loss": 3.0714263916015625, "step": 5015, "token_acc": 0.28959007195194403 }, { "epoch": 2.9404866608032836, "grad_norm": 0.33408277575780176, "learning_rate": 0.00019489809270727503, "loss": 3.1006455421447754, "step": 5016, "token_acc": 0.28470290320279623 }, { "epoch": 2.9410729991204922, "grad_norm": 0.2949549434916837, "learning_rate": 0.00019489503602250748, "loss": 3.0249931812286377, "step": 5017, "token_acc": 0.29810700677122814 }, { "epoch": 2.9416593374377014, "grad_norm": 0.3783333342199044, "learning_rate": 0.00019489197844632888, "loss": 3.0486960411071777, "step": 5018, "token_acc": 0.2921325680151058 }, { "epoch": 2.9422456757549105, "grad_norm": 0.3112857463269938, "learning_rate": 0.00019488891997876805, "loss": 3.0538716316223145, "step": 5019, "token_acc": 0.29226542528724075 }, { "epoch": 2.9428320140721196, "grad_norm": 0.33954124774377736, "learning_rate": 0.00019488586061985368, "loss": 3.079308032989502, "step": 5020, "token_acc": 0.2891891891891892 }, { "epoch": 2.9434183523893287, "grad_norm": 0.28972224156682785, "learning_rate": 0.0001948828003696145, "loss": 3.0692856311798096, "step": 5021, "token_acc": 0.2927767548723609 }, { "epoch": 2.944004690706538, "grad_norm": 0.35896931661555187, "learning_rate": 0.00019487973922807926, "loss": 3.0493698120117188, "step": 5022, "token_acc": 0.2933337573945678 }, { "epoch": 2.944591029023747, "grad_norm": 0.3452991960705216, "learning_rate": 0.00019487667719527674, "loss": 3.084245204925537, "step": 5023, "token_acc": 0.28967244641583173 }, { "epoch": 2.9451773673409556, "grad_norm": 0.36452231075163316, "learning_rate": 0.00019487361427123569, "loss": 3.063624858856201, "step": 5024, "token_acc": 0.293508327781479 }, { "epoch": 2.9457637056581647, "grad_norm": 0.28587299764911195, "learning_rate": 0.00019487055045598487, "loss": 3.0622692108154297, "step": 5025, "token_acc": 0.2903872186489029 }, { "epoch": 2.946350043975374, "grad_norm": 0.40573345242216996, "learning_rate": 0.00019486748574955304, "loss": 3.086578845977783, "step": 5026, "token_acc": 0.28955412222867305 }, { "epoch": 2.946936382292583, "grad_norm": 0.3180596990704223, "learning_rate": 0.00019486442015196904, "loss": 3.1424732208251953, "step": 5027, "token_acc": 0.2800487991877163 }, { "epoch": 2.9475227206097916, "grad_norm": 0.3461832212807444, "learning_rate": 0.00019486135366326166, "loss": 3.0806894302368164, "step": 5028, "token_acc": 0.28855204355035446 }, { "epoch": 2.9481090589270007, "grad_norm": 0.23505485145733143, "learning_rate": 0.00019485828628345965, "loss": 3.0355334281921387, "step": 5029, "token_acc": 0.29463172007458177 }, { "epoch": 2.94869539724421, "grad_norm": 0.3035267087257974, "learning_rate": 0.0001948552180125919, "loss": 3.058065891265869, "step": 5030, "token_acc": 0.29121044780856264 }, { "epoch": 2.949281735561419, "grad_norm": 0.2720137156926839, "learning_rate": 0.0001948521488506872, "loss": 3.0414891242980957, "step": 5031, "token_acc": 0.29394376643516956 }, { "epoch": 2.949868073878628, "grad_norm": 0.2900881447741355, "learning_rate": 0.00019484907879777433, "loss": 3.0601658821105957, "step": 5032, "token_acc": 0.2912491827503176 }, { "epoch": 2.950454412195837, "grad_norm": 0.3115283956790449, "learning_rate": 0.00019484600785388222, "loss": 3.1212158203125, "step": 5033, "token_acc": 0.2840862131629915 }, { "epoch": 2.9510407505130463, "grad_norm": 0.32101519084641617, "learning_rate": 0.00019484293601903965, "loss": 3.0383353233337402, "step": 5034, "token_acc": 0.29502749831615904 }, { "epoch": 2.951627088830255, "grad_norm": 0.327139323056121, "learning_rate": 0.0001948398632932755, "loss": 3.0773799419403076, "step": 5035, "token_acc": 0.2901628727259103 }, { "epoch": 2.952213427147464, "grad_norm": 0.32345577854914903, "learning_rate": 0.00019483678967661865, "loss": 3.0634474754333496, "step": 5036, "token_acc": 0.29101146210178097 }, { "epoch": 2.952799765464673, "grad_norm": 0.2794735094951863, "learning_rate": 0.00019483371516909793, "loss": 3.073577880859375, "step": 5037, "token_acc": 0.28845179247560904 }, { "epoch": 2.9533861037818823, "grad_norm": 0.3306208381665803, "learning_rate": 0.00019483063977074227, "loss": 3.0366334915161133, "step": 5038, "token_acc": 0.29502401691218777 }, { "epoch": 2.953972442099091, "grad_norm": 0.2834816971284872, "learning_rate": 0.0001948275634815805, "loss": 3.0630838871002197, "step": 5039, "token_acc": 0.2918791487981077 }, { "epoch": 2.9545587804163, "grad_norm": 0.37454056065945, "learning_rate": 0.0001948244863016416, "loss": 3.0759198665618896, "step": 5040, "token_acc": 0.29131172365702745 }, { "epoch": 2.955145118733509, "grad_norm": 0.3821083846045049, "learning_rate": 0.00019482140823095438, "loss": 3.118229866027832, "step": 5041, "token_acc": 0.28398220890293513 }, { "epoch": 2.9557314570507183, "grad_norm": 0.3811173427739997, "learning_rate": 0.00019481832926954783, "loss": 3.0890636444091797, "step": 5042, "token_acc": 0.2887295377637665 }, { "epoch": 2.9563177953679274, "grad_norm": 0.31037224595026536, "learning_rate": 0.00019481524941745087, "loss": 3.0643105506896973, "step": 5043, "token_acc": 0.29118336050855503 }, { "epoch": 2.9569041336851365, "grad_norm": 0.31979574438554376, "learning_rate": 0.0001948121686746924, "loss": 3.047090768814087, "step": 5044, "token_acc": 0.2938781676438642 }, { "epoch": 2.9574904720023456, "grad_norm": 0.300147036133735, "learning_rate": 0.00019480908704130133, "loss": 3.073992967605591, "step": 5045, "token_acc": 0.2889291955727374 }, { "epoch": 2.9580768103195543, "grad_norm": 0.3098519553080766, "learning_rate": 0.00019480600451730667, "loss": 3.0736918449401855, "step": 5046, "token_acc": 0.29125437701763424 }, { "epoch": 2.9586631486367634, "grad_norm": 0.2943943551142779, "learning_rate": 0.00019480292110273732, "loss": 3.0501723289489746, "step": 5047, "token_acc": 0.29159042862671486 }, { "epoch": 2.9592494869539725, "grad_norm": 0.2534114135452284, "learning_rate": 0.0001947998367976223, "loss": 3.1003289222717285, "step": 5048, "token_acc": 0.2876312198548982 }, { "epoch": 2.9598358252711816, "grad_norm": 0.326650918324217, "learning_rate": 0.00019479675160199056, "loss": 3.0861802101135254, "step": 5049, "token_acc": 0.28796822841762215 }, { "epoch": 2.9604221635883903, "grad_norm": 0.2614504042885108, "learning_rate": 0.00019479366551587108, "loss": 3.0408339500427246, "step": 5050, "token_acc": 0.29532137239743006 }, { "epoch": 2.9610085019055994, "grad_norm": 0.29156987513359794, "learning_rate": 0.00019479057853929283, "loss": 3.0684614181518555, "step": 5051, "token_acc": 0.2913058118391811 }, { "epoch": 2.9615948402228085, "grad_norm": 0.31009327474398224, "learning_rate": 0.00019478749067228484, "loss": 3.0783987045288086, "step": 5052, "token_acc": 0.29032592305783617 }, { "epoch": 2.9621811785400176, "grad_norm": 0.28356523304854436, "learning_rate": 0.0001947844019148761, "loss": 3.0161633491516113, "step": 5053, "token_acc": 0.29830786826593153 }, { "epoch": 2.9627675168572267, "grad_norm": 0.29066555233193414, "learning_rate": 0.00019478131226709564, "loss": 3.055677890777588, "step": 5054, "token_acc": 0.29240020805202327 }, { "epoch": 2.963353855174436, "grad_norm": 0.25840812363500565, "learning_rate": 0.00019477822172897247, "loss": 3.032134532928467, "step": 5055, "token_acc": 0.29633352339782265 }, { "epoch": 2.963940193491645, "grad_norm": 0.2707797261331279, "learning_rate": 0.00019477513030053558, "loss": 3.0665526390075684, "step": 5056, "token_acc": 0.29080377202769414 }, { "epoch": 2.9645265318088536, "grad_norm": 0.2605838223033037, "learning_rate": 0.0001947720379818141, "loss": 3.0407423973083496, "step": 5057, "token_acc": 0.2947788773188956 }, { "epoch": 2.9651128701260627, "grad_norm": 0.3205731368842676, "learning_rate": 0.000194768944772837, "loss": 3.021084785461426, "step": 5058, "token_acc": 0.2978306186623326 }, { "epoch": 2.965699208443272, "grad_norm": 0.34301685824444256, "learning_rate": 0.0001947658506736334, "loss": 3.0289154052734375, "step": 5059, "token_acc": 0.2946764699098693 }, { "epoch": 2.966285546760481, "grad_norm": 0.27033898795538125, "learning_rate": 0.00019476275568423233, "loss": 3.116395950317383, "step": 5060, "token_acc": 0.2839116387248866 }, { "epoch": 2.9668718850776896, "grad_norm": 0.2820350415568705, "learning_rate": 0.0001947596598046628, "loss": 3.0663981437683105, "step": 5061, "token_acc": 0.2906437908842085 }, { "epoch": 2.9674582233948987, "grad_norm": 0.32115416307914063, "learning_rate": 0.00019475656303495403, "loss": 3.074695348739624, "step": 5062, "token_acc": 0.2889860764478382 }, { "epoch": 2.968044561712108, "grad_norm": 0.2559949880100656, "learning_rate": 0.000194753465375135, "loss": 3.026939630508423, "step": 5063, "token_acc": 0.2960930522273386 }, { "epoch": 2.968630900029317, "grad_norm": 0.3067721216418737, "learning_rate": 0.00019475036682523486, "loss": 3.0147125720977783, "step": 5064, "token_acc": 0.2973514841830167 }, { "epoch": 2.969217238346526, "grad_norm": 0.31270101904996167, "learning_rate": 0.0001947472673852827, "loss": 3.048008441925049, "step": 5065, "token_acc": 0.2940753744325173 }, { "epoch": 2.969803576663735, "grad_norm": 0.24831209272499682, "learning_rate": 0.00019474416705530763, "loss": 3.059325933456421, "step": 5066, "token_acc": 0.29287347763654736 }, { "epoch": 2.970389914980944, "grad_norm": 0.34086496717941267, "learning_rate": 0.00019474106583533877, "loss": 3.0491175651550293, "step": 5067, "token_acc": 0.29249320220578895 }, { "epoch": 2.970976253298153, "grad_norm": 0.31107896325796147, "learning_rate": 0.00019473796372540528, "loss": 3.0021114349365234, "step": 5068, "token_acc": 0.2997472673783575 }, { "epoch": 2.971562591615362, "grad_norm": 0.3011632955524233, "learning_rate": 0.0001947348607255363, "loss": 3.035175323486328, "step": 5069, "token_acc": 0.2955362464485884 }, { "epoch": 2.972148929932571, "grad_norm": 0.3032956738281971, "learning_rate": 0.0001947317568357609, "loss": 3.0940096378326416, "step": 5070, "token_acc": 0.28624644747616934 }, { "epoch": 2.97273526824978, "grad_norm": 0.33323611702462846, "learning_rate": 0.00019472865205610835, "loss": 3.0671091079711914, "step": 5071, "token_acc": 0.2897530582374347 }, { "epoch": 2.973321606566989, "grad_norm": 0.36946921838589714, "learning_rate": 0.00019472554638660773, "loss": 3.065242290496826, "step": 5072, "token_acc": 0.2910355582318156 }, { "epoch": 2.973907944884198, "grad_norm": 0.41988922224095526, "learning_rate": 0.00019472243982728826, "loss": 3.076535224914551, "step": 5073, "token_acc": 0.28871121562479585 }, { "epoch": 2.974494283201407, "grad_norm": 0.4794590296986978, "learning_rate": 0.00019471933237817911, "loss": 3.040800094604492, "step": 5074, "token_acc": 0.29476912099932584 }, { "epoch": 2.9750806215186163, "grad_norm": 0.38863128098786015, "learning_rate": 0.00019471622403930948, "loss": 3.0877060890197754, "step": 5075, "token_acc": 0.2879726612746187 }, { "epoch": 2.9756669598358254, "grad_norm": 0.39644370845163085, "learning_rate": 0.00019471311481070855, "loss": 3.0534019470214844, "step": 5076, "token_acc": 0.29385373662482095 }, { "epoch": 2.9762532981530345, "grad_norm": 0.39108778994140214, "learning_rate": 0.00019471000469240552, "loss": 3.0581254959106445, "step": 5077, "token_acc": 0.2904801765073657 }, { "epoch": 2.976839636470243, "grad_norm": 0.3897971884991325, "learning_rate": 0.00019470689368442963, "loss": 3.026871919631958, "step": 5078, "token_acc": 0.2973275434047617 }, { "epoch": 2.9774259747874523, "grad_norm": 0.36666015103995236, "learning_rate": 0.00019470378178681008, "loss": 3.0175135135650635, "step": 5079, "token_acc": 0.2992721414365105 }, { "epoch": 2.9780123131046614, "grad_norm": 0.3919834615243031, "learning_rate": 0.00019470066899957616, "loss": 3.058434009552002, "step": 5080, "token_acc": 0.2925285501618797 }, { "epoch": 2.9785986514218705, "grad_norm": 0.3502630441441781, "learning_rate": 0.000194697555322757, "loss": 3.0767104625701904, "step": 5081, "token_acc": 0.28790827663376944 }, { "epoch": 2.979184989739079, "grad_norm": 0.3308124297422489, "learning_rate": 0.00019469444075638194, "loss": 3.0623905658721924, "step": 5082, "token_acc": 0.290237226766078 }, { "epoch": 2.9797713280562883, "grad_norm": 0.31461500140887944, "learning_rate": 0.00019469132530048025, "loss": 3.112497329711914, "step": 5083, "token_acc": 0.285479396835668 }, { "epoch": 2.9803576663734974, "grad_norm": 0.2915778103955219, "learning_rate": 0.00019468820895508112, "loss": 3.061370849609375, "step": 5084, "token_acc": 0.2924669537692402 }, { "epoch": 2.9809440046907065, "grad_norm": 0.2898295500975545, "learning_rate": 0.00019468509172021386, "loss": 3.0631327629089355, "step": 5085, "token_acc": 0.2925041229983508 }, { "epoch": 2.9815303430079156, "grad_norm": 0.2907425806482855, "learning_rate": 0.00019468197359590774, "loss": 3.0638012886047363, "step": 5086, "token_acc": 0.2903084219203306 }, { "epoch": 2.9821166813251248, "grad_norm": 0.26812391809142333, "learning_rate": 0.0001946788545821921, "loss": 3.0960702896118164, "step": 5087, "token_acc": 0.2849869660052136 }, { "epoch": 2.982703019642334, "grad_norm": 0.2973472960867228, "learning_rate": 0.00019467573467909618, "loss": 3.1034975051879883, "step": 5088, "token_acc": 0.2855327043776892 }, { "epoch": 2.9832893579595425, "grad_norm": 0.28333137227821464, "learning_rate": 0.00019467261388664931, "loss": 3.0710272789001465, "step": 5089, "token_acc": 0.2913264778834229 }, { "epoch": 2.9838756962767516, "grad_norm": 0.3127538475349362, "learning_rate": 0.0001946694922048808, "loss": 3.072453498840332, "step": 5090, "token_acc": 0.290902084169183 }, { "epoch": 2.9844620345939608, "grad_norm": 0.348293748569071, "learning_rate": 0.00019466636963382002, "loss": 3.028822422027588, "step": 5091, "token_acc": 0.29601545321907813 }, { "epoch": 2.98504837291117, "grad_norm": 0.30294241217904117, "learning_rate": 0.0001946632461734962, "loss": 3.0734779834747314, "step": 5092, "token_acc": 0.2890665115742924 }, { "epoch": 2.9856347112283785, "grad_norm": 0.33273280994821236, "learning_rate": 0.00019466012182393878, "loss": 3.045283794403076, "step": 5093, "token_acc": 0.29408210442837374 }, { "epoch": 2.9862210495455876, "grad_norm": 0.3273073076525325, "learning_rate": 0.00019465699658517707, "loss": 3.0563011169433594, "step": 5094, "token_acc": 0.29140590822525336 }, { "epoch": 2.9868073878627968, "grad_norm": 0.32046910625811176, "learning_rate": 0.00019465387045724042, "loss": 3.005497455596924, "step": 5095, "token_acc": 0.2990356718224541 }, { "epoch": 2.987393726180006, "grad_norm": 0.25805871783594836, "learning_rate": 0.0001946507434401582, "loss": 3.035773754119873, "step": 5096, "token_acc": 0.2937105680819528 }, { "epoch": 2.987980064497215, "grad_norm": 0.32529885878062686, "learning_rate": 0.00019464761553395982, "loss": 3.089299201965332, "step": 5097, "token_acc": 0.28769768629048476 }, { "epoch": 2.988566402814424, "grad_norm": 0.3002496596254249, "learning_rate": 0.0001946444867386746, "loss": 3.0452799797058105, "step": 5098, "token_acc": 0.2938198736018796 }, { "epoch": 2.989152741131633, "grad_norm": 0.27622070860940207, "learning_rate": 0.00019464135705433196, "loss": 3.0655341148376465, "step": 5099, "token_acc": 0.29159283323488877 }, { "epoch": 2.989739079448842, "grad_norm": 0.3154041915479705, "learning_rate": 0.00019463822648096133, "loss": 3.029757022857666, "step": 5100, "token_acc": 0.2951072141492627 }, { "epoch": 2.990325417766051, "grad_norm": 0.30749729824971944, "learning_rate": 0.00019463509501859206, "loss": 3.0653178691864014, "step": 5101, "token_acc": 0.2916677521375092 }, { "epoch": 2.99091175608326, "grad_norm": 0.2634593593998019, "learning_rate": 0.0001946319626672536, "loss": 3.072056293487549, "step": 5102, "token_acc": 0.289483906257019 }, { "epoch": 2.991498094400469, "grad_norm": 0.30658755150656564, "learning_rate": 0.0001946288294269754, "loss": 3.059781074523926, "step": 5103, "token_acc": 0.29289004426051907 }, { "epoch": 2.992084432717678, "grad_norm": 0.3155458965303423, "learning_rate": 0.00019462569529778682, "loss": 3.0822203159332275, "step": 5104, "token_acc": 0.2868187089292617 }, { "epoch": 2.992670771034887, "grad_norm": 0.42860585615712365, "learning_rate": 0.00019462256027971735, "loss": 3.0829527378082275, "step": 5105, "token_acc": 0.2894435666963183 }, { "epoch": 2.993257109352096, "grad_norm": 0.5931398595389955, "learning_rate": 0.00019461942437279644, "loss": 3.096341609954834, "step": 5106, "token_acc": 0.28655393844658966 }, { "epoch": 2.993843447669305, "grad_norm": 0.47090810251505416, "learning_rate": 0.00019461628757705356, "loss": 3.055384635925293, "step": 5107, "token_acc": 0.2936589543578994 }, { "epoch": 2.9944297859865143, "grad_norm": 0.3358624284703214, "learning_rate": 0.0001946131498925181, "loss": 3.0655620098114014, "step": 5108, "token_acc": 0.29105537915074536 }, { "epoch": 2.9950161243037234, "grad_norm": 0.34646293213878354, "learning_rate": 0.00019461001131921963, "loss": 3.060105800628662, "step": 5109, "token_acc": 0.292280753352287 }, { "epoch": 2.9956024626209325, "grad_norm": 0.35487299696414576, "learning_rate": 0.00019460687185718757, "loss": 3.071585178375244, "step": 5110, "token_acc": 0.29089450799949346 }, { "epoch": 2.996188800938141, "grad_norm": 0.33429780373443735, "learning_rate": 0.00019460373150645145, "loss": 3.086029529571533, "step": 5111, "token_acc": 0.28776614036163367 }, { "epoch": 2.9967751392553503, "grad_norm": 0.34637765097363293, "learning_rate": 0.00019460059026704077, "loss": 3.0650389194488525, "step": 5112, "token_acc": 0.29114903192536684 }, { "epoch": 2.9973614775725594, "grad_norm": 0.3257436694676698, "learning_rate": 0.000194597448138985, "loss": 3.0570435523986816, "step": 5113, "token_acc": 0.2934961144709703 }, { "epoch": 2.9979478158897686, "grad_norm": 0.2806794087998649, "learning_rate": 0.00019459430512231367, "loss": 3.0268986225128174, "step": 5114, "token_acc": 0.29709987086334855 }, { "epoch": 2.998534154206977, "grad_norm": 0.34635888337478654, "learning_rate": 0.00019459116121705634, "loss": 3.108278512954712, "step": 5115, "token_acc": 0.28588031140050624 }, { "epoch": 2.9991204925241863, "grad_norm": 0.31972129246257647, "learning_rate": 0.0001945880164232425, "loss": 2.9964239597320557, "step": 5116, "token_acc": 0.300626272156039 }, { "epoch": 2.9997068308413954, "grad_norm": 0.29687809473523824, "learning_rate": 0.00019458487074090167, "loss": 3.0663998126983643, "step": 5117, "token_acc": 0.28906655746873877 }, { "epoch": 3.0, "grad_norm": 0.3301195576428841, "learning_rate": 0.00019458172417006347, "loss": 3.0576601028442383, "step": 5118, "token_acc": 0.29402854740291945 }, { "epoch": 3.0, "eval_loss": 3.0766077041625977, "eval_runtime": 16.7656, "eval_samples_per_second": 15.269, "eval_steps_per_second": 1.909, "eval_token_acc": 0.2895441487563783, "step": 5118 }, { "epoch": 3.000586338317209, "grad_norm": 0.32542610512145914, "learning_rate": 0.00019457857671075743, "loss": 2.99609375, "step": 5119, "token_acc": 0.29915804399768886 }, { "epoch": 3.0011726766344182, "grad_norm": 0.3484215275556144, "learning_rate": 0.00019457542836301308, "loss": 2.924553871154785, "step": 5120, "token_acc": 0.3090295235605253 }, { "epoch": 3.001759014951627, "grad_norm": 0.2952268380512712, "learning_rate": 0.00019457227912686006, "loss": 2.9160537719726562, "step": 5121, "token_acc": 0.3113160260643334 }, { "epoch": 3.002345353268836, "grad_norm": 0.3470832856831942, "learning_rate": 0.00019456912900232788, "loss": 2.9686379432678223, "step": 5122, "token_acc": 0.30280014084096885 }, { "epoch": 3.002931691586045, "grad_norm": 0.342045200920826, "learning_rate": 0.00019456597798944616, "loss": 2.933501958847046, "step": 5123, "token_acc": 0.3072107322526551 }, { "epoch": 3.0035180299032542, "grad_norm": 0.27745103972243146, "learning_rate": 0.00019456282608824453, "loss": 2.9482598304748535, "step": 5124, "token_acc": 0.3061465909999201 }, { "epoch": 3.0041043682204633, "grad_norm": 0.31857739348476116, "learning_rate": 0.00019455967329875255, "loss": 2.9381589889526367, "step": 5125, "token_acc": 0.3080063358742829 }, { "epoch": 3.0046907065376725, "grad_norm": 0.29967830571185755, "learning_rate": 0.00019455651962099987, "loss": 2.9471874237060547, "step": 5126, "token_acc": 0.30596984509541264 }, { "epoch": 3.005277044854881, "grad_norm": 0.36330079033086976, "learning_rate": 0.00019455336505501614, "loss": 2.9471545219421387, "step": 5127, "token_acc": 0.30482352599829604 }, { "epoch": 3.0058633831720902, "grad_norm": 0.368025000630573, "learning_rate": 0.00019455020960083093, "loss": 2.928900957107544, "step": 5128, "token_acc": 0.30792494672646964 }, { "epoch": 3.0064497214892993, "grad_norm": 0.29734071190141903, "learning_rate": 0.00019454705325847388, "loss": 2.931608200073242, "step": 5129, "token_acc": 0.3085685284912736 }, { "epoch": 3.0070360598065085, "grad_norm": 0.3345639676785342, "learning_rate": 0.0001945438960279747, "loss": 2.9155187606811523, "step": 5130, "token_acc": 0.3093837019912783 }, { "epoch": 3.0076223981237176, "grad_norm": 0.3429953825908296, "learning_rate": 0.00019454073790936303, "loss": 2.9233827590942383, "step": 5131, "token_acc": 0.3102762783305516 }, { "epoch": 3.0082087364409262, "grad_norm": 0.3556854334183303, "learning_rate": 0.00019453757890266848, "loss": 2.9480414390563965, "step": 5132, "token_acc": 0.3053006594160858 }, { "epoch": 3.0087950747581353, "grad_norm": 0.3162687449231907, "learning_rate": 0.00019453441900792079, "loss": 2.9260048866271973, "step": 5133, "token_acc": 0.3093626474483996 }, { "epoch": 3.0093814130753445, "grad_norm": 0.3036523233786287, "learning_rate": 0.00019453125822514964, "loss": 2.8807201385498047, "step": 5134, "token_acc": 0.3163385884271098 }, { "epoch": 3.0099677513925536, "grad_norm": 0.32081759374360624, "learning_rate": 0.00019452809655438468, "loss": 2.967503309249878, "step": 5135, "token_acc": 0.3018798321127022 }, { "epoch": 3.0105540897097627, "grad_norm": 0.314895990341606, "learning_rate": 0.00019452493399565565, "loss": 2.8954648971557617, "step": 5136, "token_acc": 0.3130605014443449 }, { "epoch": 3.0111404280269713, "grad_norm": 0.2836142606794178, "learning_rate": 0.00019452177054899222, "loss": 2.939119338989258, "step": 5137, "token_acc": 0.3079360261561113 }, { "epoch": 3.0117267663441805, "grad_norm": 0.3496845442846436, "learning_rate": 0.00019451860621442411, "loss": 2.9109444618225098, "step": 5138, "token_acc": 0.3122114496768236 }, { "epoch": 3.0123131046613896, "grad_norm": 0.33079850545999845, "learning_rate": 0.00019451544099198108, "loss": 2.9014229774475098, "step": 5139, "token_acc": 0.31152785176667913 }, { "epoch": 3.0128994429785987, "grad_norm": 0.378778424049442, "learning_rate": 0.00019451227488169286, "loss": 2.9063029289245605, "step": 5140, "token_acc": 0.31162722404115395 }, { "epoch": 3.013485781295808, "grad_norm": 0.3741936508738994, "learning_rate": 0.00019450910788358912, "loss": 2.9214630126953125, "step": 5141, "token_acc": 0.3092676590440075 }, { "epoch": 3.014072119613017, "grad_norm": 0.3353306802973895, "learning_rate": 0.00019450593999769972, "loss": 2.9295449256896973, "step": 5142, "token_acc": 0.3067986460097664 }, { "epoch": 3.0146584579302256, "grad_norm": 0.3654156316820181, "learning_rate": 0.00019450277122405436, "loss": 2.9232547283172607, "step": 5143, "token_acc": 0.30931743903144726 }, { "epoch": 3.0152447962474347, "grad_norm": 0.29056176985813614, "learning_rate": 0.00019449960156268277, "loss": 2.922541618347168, "step": 5144, "token_acc": 0.30918517351813934 }, { "epoch": 3.015831134564644, "grad_norm": 0.3727929623909324, "learning_rate": 0.0001944964310136148, "loss": 2.9728665351867676, "step": 5145, "token_acc": 0.300348150764418 }, { "epoch": 3.016417472881853, "grad_norm": 0.30239551930947406, "learning_rate": 0.00019449325957688018, "loss": 2.9441027641296387, "step": 5146, "token_acc": 0.30542540708844707 }, { "epoch": 3.017003811199062, "grad_norm": 0.3192694127207607, "learning_rate": 0.0001944900872525087, "loss": 2.9099204540252686, "step": 5147, "token_acc": 0.3105837393911463 }, { "epoch": 3.0175901495162707, "grad_norm": 0.27908739227935875, "learning_rate": 0.0001944869140405302, "loss": 2.8911123275756836, "step": 5148, "token_acc": 0.31453582229851235 }, { "epoch": 3.01817648783348, "grad_norm": 0.32842921861514035, "learning_rate": 0.00019448373994097447, "loss": 2.9719126224517822, "step": 5149, "token_acc": 0.3030040631452741 }, { "epoch": 3.018762826150689, "grad_norm": 0.3078400062879974, "learning_rate": 0.00019448056495387133, "loss": 2.9632067680358887, "step": 5150, "token_acc": 0.3045764317700938 }, { "epoch": 3.019349164467898, "grad_norm": 0.3034017359841723, "learning_rate": 0.00019447738907925056, "loss": 2.946415901184082, "step": 5151, "token_acc": 0.30527063459937526 }, { "epoch": 3.019935502785107, "grad_norm": 0.30233509036689804, "learning_rate": 0.00019447421231714204, "loss": 2.9368433952331543, "step": 5152, "token_acc": 0.30598607039055287 }, { "epoch": 3.0205218411023163, "grad_norm": 0.2900018232501726, "learning_rate": 0.0001944710346675756, "loss": 2.886044979095459, "step": 5153, "token_acc": 0.31436047825461716 }, { "epoch": 3.021108179419525, "grad_norm": 0.2574826967617441, "learning_rate": 0.00019446785613058112, "loss": 2.9603962898254395, "step": 5154, "token_acc": 0.3052950295123544 }, { "epoch": 3.021694517736734, "grad_norm": 0.34091645301319146, "learning_rate": 0.00019446467670618839, "loss": 2.9147567749023438, "step": 5155, "token_acc": 0.3103328161968227 }, { "epoch": 3.022280856053943, "grad_norm": 0.2709027236500024, "learning_rate": 0.00019446149639442735, "loss": 2.9413373470306396, "step": 5156, "token_acc": 0.30683219729603045 }, { "epoch": 3.0228671943711523, "grad_norm": 0.31274479899862245, "learning_rate": 0.00019445831519532782, "loss": 2.947415351867676, "step": 5157, "token_acc": 0.3064849277829296 }, { "epoch": 3.0234535326883614, "grad_norm": 0.2679146091813962, "learning_rate": 0.00019445513310891973, "loss": 2.9493980407714844, "step": 5158, "token_acc": 0.30504360406703374 }, { "epoch": 3.02403987100557, "grad_norm": 0.2749909295233927, "learning_rate": 0.0001944519501352329, "loss": 2.9203662872314453, "step": 5159, "token_acc": 0.3096459643646366 }, { "epoch": 3.024626209322779, "grad_norm": 0.3269742588051401, "learning_rate": 0.0001944487662742973, "loss": 2.944547653198242, "step": 5160, "token_acc": 0.30578701715202017 }, { "epoch": 3.0252125476399883, "grad_norm": 0.3384153780227322, "learning_rate": 0.0001944455815261428, "loss": 2.9541068077087402, "step": 5161, "token_acc": 0.30482071774222397 }, { "epoch": 3.0257988859571974, "grad_norm": 0.2793764852752462, "learning_rate": 0.00019444239589079933, "loss": 2.916184663772583, "step": 5162, "token_acc": 0.3095581834780578 }, { "epoch": 3.0263852242744065, "grad_norm": 0.31434799352913817, "learning_rate": 0.00019443920936829681, "loss": 2.893907070159912, "step": 5163, "token_acc": 0.31364041557867833 }, { "epoch": 3.026971562591615, "grad_norm": 0.3429591571424557, "learning_rate": 0.0001944360219586652, "loss": 2.9555184841156006, "step": 5164, "token_acc": 0.3046860659720348 }, { "epoch": 3.0275579009088243, "grad_norm": 0.3101454811030327, "learning_rate": 0.0001944328336619344, "loss": 2.903646469116211, "step": 5165, "token_acc": 0.3118201896842926 }, { "epoch": 3.0281442392260334, "grad_norm": 0.3618964956564648, "learning_rate": 0.00019442964447813438, "loss": 2.9094157218933105, "step": 5166, "token_acc": 0.31061253617108037 }, { "epoch": 3.0287305775432425, "grad_norm": 0.27160278823182604, "learning_rate": 0.0001944264544072951, "loss": 2.9083375930786133, "step": 5167, "token_acc": 0.31002800140007003 }, { "epoch": 3.0293169158604516, "grad_norm": 0.3361325535675269, "learning_rate": 0.0001944232634494465, "loss": 2.954824447631836, "step": 5168, "token_acc": 0.30405085220758704 }, { "epoch": 3.0299032541776607, "grad_norm": 0.31725615593714823, "learning_rate": 0.00019442007160461858, "loss": 2.947997570037842, "step": 5169, "token_acc": 0.30623556229844395 }, { "epoch": 3.0304895924948694, "grad_norm": 0.3348319942756914, "learning_rate": 0.00019441687887284136, "loss": 2.8970162868499756, "step": 5170, "token_acc": 0.31299684141352047 }, { "epoch": 3.0310759308120785, "grad_norm": 0.30433916871070316, "learning_rate": 0.00019441368525414477, "loss": 2.920292854309082, "step": 5171, "token_acc": 0.3080676802989585 }, { "epoch": 3.0316622691292876, "grad_norm": 0.30170991706218725, "learning_rate": 0.00019441049074855885, "loss": 2.901003837585449, "step": 5172, "token_acc": 0.3120196562793094 }, { "epoch": 3.0322486074464967, "grad_norm": 0.31314765821490453, "learning_rate": 0.00019440729535611352, "loss": 2.9486870765686035, "step": 5173, "token_acc": 0.3048625281359783 }, { "epoch": 3.032834945763706, "grad_norm": 0.3289469414154259, "learning_rate": 0.00019440409907683895, "loss": 2.9477133750915527, "step": 5174, "token_acc": 0.304574505640415 }, { "epoch": 3.0334212840809145, "grad_norm": 0.3122404877796078, "learning_rate": 0.00019440090191076502, "loss": 2.8961567878723145, "step": 5175, "token_acc": 0.31156532686274707 }, { "epoch": 3.0340076223981236, "grad_norm": 0.2825798868947717, "learning_rate": 0.00019439770385792183, "loss": 2.9087021350860596, "step": 5176, "token_acc": 0.31188267369776373 }, { "epoch": 3.0345939607153327, "grad_norm": 0.2868566570902747, "learning_rate": 0.00019439450491833945, "loss": 2.9344851970672607, "step": 5177, "token_acc": 0.30849164544689234 }, { "epoch": 3.035180299032542, "grad_norm": 0.3237326209538367, "learning_rate": 0.00019439130509204787, "loss": 2.965400457382202, "step": 5178, "token_acc": 0.3029042170644001 }, { "epoch": 3.035766637349751, "grad_norm": 0.3170684735408952, "learning_rate": 0.00019438810437907717, "loss": 2.911686658859253, "step": 5179, "token_acc": 0.3099352164312794 }, { "epoch": 3.03635297566696, "grad_norm": 0.31935816249145776, "learning_rate": 0.00019438490277945745, "loss": 2.9230916500091553, "step": 5180, "token_acc": 0.30899603771078155 }, { "epoch": 3.0369393139841687, "grad_norm": 0.30826402757073124, "learning_rate": 0.0001943817002932187, "loss": 2.974609375, "step": 5181, "token_acc": 0.30249739517256646 }, { "epoch": 3.037525652301378, "grad_norm": 0.3076041601625042, "learning_rate": 0.0001943784969203911, "loss": 2.9336605072021484, "step": 5182, "token_acc": 0.30774229411166004 }, { "epoch": 3.038111990618587, "grad_norm": 0.3013814829956809, "learning_rate": 0.0001943752926610047, "loss": 2.9661457538604736, "step": 5183, "token_acc": 0.3022054183641409 }, { "epoch": 3.038698328935796, "grad_norm": 0.3376232199583679, "learning_rate": 0.00019437208751508958, "loss": 2.917414665222168, "step": 5184, "token_acc": 0.3103650981942797 }, { "epoch": 3.039284667253005, "grad_norm": 0.26918587394579985, "learning_rate": 0.00019436888148267585, "loss": 2.8925657272338867, "step": 5185, "token_acc": 0.3134051155421553 }, { "epoch": 3.039871005570214, "grad_norm": 0.3230848419086703, "learning_rate": 0.00019436567456379366, "loss": 2.920283317565918, "step": 5186, "token_acc": 0.3095187287849614 }, { "epoch": 3.040457343887423, "grad_norm": 0.375243178051357, "learning_rate": 0.00019436246675847313, "loss": 2.9641270637512207, "step": 5187, "token_acc": 0.30445870466105807 }, { "epoch": 3.041043682204632, "grad_norm": 0.3244980721865245, "learning_rate": 0.00019435925806674437, "loss": 2.976930618286133, "step": 5188, "token_acc": 0.3030080328586683 }, { "epoch": 3.041630020521841, "grad_norm": 0.26752102786035004, "learning_rate": 0.00019435604848863752, "loss": 2.9252562522888184, "step": 5189, "token_acc": 0.31041034224643405 }, { "epoch": 3.0422163588390503, "grad_norm": 0.2870771300758046, "learning_rate": 0.00019435283802418275, "loss": 2.917541980743408, "step": 5190, "token_acc": 0.3107720171341012 }, { "epoch": 3.042802697156259, "grad_norm": 0.28587770938788876, "learning_rate": 0.0001943496266734102, "loss": 2.931736469268799, "step": 5191, "token_acc": 0.3058618181070361 }, { "epoch": 3.043389035473468, "grad_norm": 0.2842974678076354, "learning_rate": 0.00019434641443635006, "loss": 2.940046787261963, "step": 5192, "token_acc": 0.3065712764714284 }, { "epoch": 3.043975373790677, "grad_norm": 0.2665458085804807, "learning_rate": 0.0001943432013130325, "loss": 2.907958984375, "step": 5193, "token_acc": 0.3116108062023765 }, { "epoch": 3.0445617121078863, "grad_norm": 0.340157062638762, "learning_rate": 0.00019433998730348766, "loss": 2.9279537200927734, "step": 5194, "token_acc": 0.3082989255474905 }, { "epoch": 3.0451480504250954, "grad_norm": 0.264892515717637, "learning_rate": 0.0001943367724077458, "loss": 2.9474236965179443, "step": 5195, "token_acc": 0.3056360593505725 }, { "epoch": 3.0457343887423045, "grad_norm": 0.29087808765955064, "learning_rate": 0.00019433355662583704, "loss": 2.8737950325012207, "step": 5196, "token_acc": 0.31799685884599865 }, { "epoch": 3.046320727059513, "grad_norm": 0.2862129675068749, "learning_rate": 0.00019433033995779164, "loss": 2.94284987449646, "step": 5197, "token_acc": 0.30531864935694486 }, { "epoch": 3.0469070653767223, "grad_norm": 0.2966285430106267, "learning_rate": 0.00019432712240363985, "loss": 2.9285879135131836, "step": 5198, "token_acc": 0.30816626132366753 }, { "epoch": 3.0474934036939314, "grad_norm": 0.2851217920002604, "learning_rate": 0.0001943239039634118, "loss": 2.9066271781921387, "step": 5199, "token_acc": 0.3099789263590375 }, { "epoch": 3.0480797420111405, "grad_norm": 0.2553354121019608, "learning_rate": 0.0001943206846371378, "loss": 2.908294200897217, "step": 5200, "token_acc": 0.31121503686382745 }, { "epoch": 3.0486660803283496, "grad_norm": 0.30311014096611794, "learning_rate": 0.00019431746442484808, "loss": 2.906442165374756, "step": 5201, "token_acc": 0.312074025242161 }, { "epoch": 3.0492524186455583, "grad_norm": 0.3300061461013664, "learning_rate": 0.00019431424332657288, "loss": 2.9103844165802, "step": 5202, "token_acc": 0.3095899485031731 }, { "epoch": 3.0498387569627674, "grad_norm": 0.3703954377911579, "learning_rate": 0.00019431102134234243, "loss": 2.9320430755615234, "step": 5203, "token_acc": 0.3089385535089563 }, { "epoch": 3.0504250952799765, "grad_norm": 0.351683438408679, "learning_rate": 0.000194307798472187, "loss": 2.921031951904297, "step": 5204, "token_acc": 0.3102155003286775 }, { "epoch": 3.0510114335971856, "grad_norm": 0.2855525728809736, "learning_rate": 0.00019430457471613692, "loss": 2.936224937438965, "step": 5205, "token_acc": 0.30642035098408965 }, { "epoch": 3.0515977719143947, "grad_norm": 0.26523595453276416, "learning_rate": 0.00019430135007422243, "loss": 2.9507851600646973, "step": 5206, "token_acc": 0.3052540442246888 }, { "epoch": 3.052184110231604, "grad_norm": 0.3826239846267414, "learning_rate": 0.00019429812454647386, "loss": 2.9499897956848145, "step": 5207, "token_acc": 0.30487685641653717 }, { "epoch": 3.0527704485488125, "grad_norm": 0.3425011585771168, "learning_rate": 0.00019429489813292143, "loss": 2.9314184188842773, "step": 5208, "token_acc": 0.3081917495434594 }, { "epoch": 3.0533567868660216, "grad_norm": 0.27309155129912394, "learning_rate": 0.0001942916708335955, "loss": 2.972264289855957, "step": 5209, "token_acc": 0.3037004493894338 }, { "epoch": 3.0539431251832307, "grad_norm": 0.3692863638983722, "learning_rate": 0.0001942884426485264, "loss": 2.9193172454833984, "step": 5210, "token_acc": 0.30953969141064636 }, { "epoch": 3.05452946350044, "grad_norm": 0.2867693017933724, "learning_rate": 0.00019428521357774443, "loss": 2.948643684387207, "step": 5211, "token_acc": 0.3054388346585644 }, { "epoch": 3.055115801817649, "grad_norm": 0.38337303810177514, "learning_rate": 0.00019428198362127992, "loss": 2.8976097106933594, "step": 5212, "token_acc": 0.3114708679540835 }, { "epoch": 3.0557021401348576, "grad_norm": 0.3922767667766906, "learning_rate": 0.00019427875277916324, "loss": 2.9133996963500977, "step": 5213, "token_acc": 0.30922146435042097 }, { "epoch": 3.0562884784520667, "grad_norm": 0.40924739212001626, "learning_rate": 0.0001942755210514247, "loss": 2.9343833923339844, "step": 5214, "token_acc": 0.30736532199782923 }, { "epoch": 3.056874816769276, "grad_norm": 0.38696392641882704, "learning_rate": 0.0001942722884380947, "loss": 2.9053173065185547, "step": 5215, "token_acc": 0.31169289448761694 }, { "epoch": 3.057461155086485, "grad_norm": 0.26676459614221326, "learning_rate": 0.00019426905493920358, "loss": 2.9307477474212646, "step": 5216, "token_acc": 0.3074609762794922 }, { "epoch": 3.058047493403694, "grad_norm": 0.3135862707371322, "learning_rate": 0.0001942658205547817, "loss": 2.936640739440918, "step": 5217, "token_acc": 0.30614871342958816 }, { "epoch": 3.0586338317209028, "grad_norm": 0.27982570220829195, "learning_rate": 0.00019426258528485946, "loss": 2.9156956672668457, "step": 5218, "token_acc": 0.30996228995402336 }, { "epoch": 3.059220170038112, "grad_norm": 0.2798509316664159, "learning_rate": 0.00019425934912946726, "loss": 2.9157357215881348, "step": 5219, "token_acc": 0.30957495614626906 }, { "epoch": 3.059806508355321, "grad_norm": 0.2834774616842488, "learning_rate": 0.0001942561120886355, "loss": 2.8836545944213867, "step": 5220, "token_acc": 0.3152024039585758 }, { "epoch": 3.06039284667253, "grad_norm": 0.2723891098461981, "learning_rate": 0.00019425287416239458, "loss": 2.9562482833862305, "step": 5221, "token_acc": 0.30617459739284963 }, { "epoch": 3.060979184989739, "grad_norm": 0.3603106516806401, "learning_rate": 0.00019424963535077488, "loss": 2.9155402183532715, "step": 5222, "token_acc": 0.3117574179227893 }, { "epoch": 3.0615655233069483, "grad_norm": 0.2882600230014994, "learning_rate": 0.0001942463956538069, "loss": 2.9290101528167725, "step": 5223, "token_acc": 0.30854587172978787 }, { "epoch": 3.062151861624157, "grad_norm": 0.28898272488602517, "learning_rate": 0.00019424315507152103, "loss": 2.945805549621582, "step": 5224, "token_acc": 0.30653511026961444 }, { "epoch": 3.062738199941366, "grad_norm": 0.36021558008686405, "learning_rate": 0.0001942399136039477, "loss": 2.8986129760742188, "step": 5225, "token_acc": 0.3120431803080341 }, { "epoch": 3.063324538258575, "grad_norm": 0.25730225647986243, "learning_rate": 0.00019423667125111735, "loss": 2.930884838104248, "step": 5226, "token_acc": 0.3087518142625948 }, { "epoch": 3.0639108765757843, "grad_norm": 0.33721514069597724, "learning_rate": 0.00019423342801306047, "loss": 2.929837703704834, "step": 5227, "token_acc": 0.30877443023608747 }, { "epoch": 3.0644972148929934, "grad_norm": 0.3271741677189038, "learning_rate": 0.00019423018388980753, "loss": 2.93837833404541, "step": 5228, "token_acc": 0.3065539991009461 }, { "epoch": 3.065083553210202, "grad_norm": 0.35905935161449604, "learning_rate": 0.000194226938881389, "loss": 2.910773754119873, "step": 5229, "token_acc": 0.3107101818710129 }, { "epoch": 3.065669891527411, "grad_norm": 0.3574399221056548, "learning_rate": 0.00019422369298783534, "loss": 2.9578981399536133, "step": 5230, "token_acc": 0.3033850733019151 }, { "epoch": 3.0662562298446203, "grad_norm": 0.28197756106453786, "learning_rate": 0.00019422044620917702, "loss": 2.9434781074523926, "step": 5231, "token_acc": 0.3050355614396995 }, { "epoch": 3.0668425681618294, "grad_norm": 0.3586332047428451, "learning_rate": 0.00019421719854544463, "loss": 2.929109573364258, "step": 5232, "token_acc": 0.30949500979190886 }, { "epoch": 3.0674289064790385, "grad_norm": 0.33396164067036244, "learning_rate": 0.00019421394999666856, "loss": 2.894759178161621, "step": 5233, "token_acc": 0.3138705020293645 }, { "epoch": 3.068015244796247, "grad_norm": 0.29983078959669274, "learning_rate": 0.00019421070056287944, "loss": 2.9345834255218506, "step": 5234, "token_acc": 0.30858320212621904 }, { "epoch": 3.0686015831134563, "grad_norm": 0.3288632239006383, "learning_rate": 0.00019420745024410768, "loss": 2.910721778869629, "step": 5235, "token_acc": 0.3104366347177849 }, { "epoch": 3.0691879214306654, "grad_norm": 0.2666188132915057, "learning_rate": 0.0001942041990403839, "loss": 2.9346981048583984, "step": 5236, "token_acc": 0.30712224167750984 }, { "epoch": 3.0697742597478745, "grad_norm": 0.3438648318086929, "learning_rate": 0.00019420094695173863, "loss": 2.928624153137207, "step": 5237, "token_acc": 0.30755583385514507 }, { "epoch": 3.0703605980650837, "grad_norm": 0.27603459777552103, "learning_rate": 0.0001941976939782024, "loss": 2.9580626487731934, "step": 5238, "token_acc": 0.3028971383462629 }, { "epoch": 3.0709469363822928, "grad_norm": 0.32230224379971845, "learning_rate": 0.00019419444011980574, "loss": 2.9392714500427246, "step": 5239, "token_acc": 0.305870022685417 }, { "epoch": 3.0715332746995014, "grad_norm": 0.3315450069377555, "learning_rate": 0.00019419118537657927, "loss": 2.9567298889160156, "step": 5240, "token_acc": 0.30397701058514914 }, { "epoch": 3.0721196130167105, "grad_norm": 0.2972335683417017, "learning_rate": 0.0001941879297485535, "loss": 2.9409749507904053, "step": 5241, "token_acc": 0.307252599385238 }, { "epoch": 3.0727059513339197, "grad_norm": 0.43243676421492266, "learning_rate": 0.00019418467323575908, "loss": 2.9394798278808594, "step": 5242, "token_acc": 0.3069827264659353 }, { "epoch": 3.0732922896511288, "grad_norm": 0.3406213396174589, "learning_rate": 0.00019418141583822657, "loss": 2.925457000732422, "step": 5243, "token_acc": 0.3092428335502499 }, { "epoch": 3.073878627968338, "grad_norm": 0.34384397596044963, "learning_rate": 0.00019417815755598655, "loss": 2.9534716606140137, "step": 5244, "token_acc": 0.3054742010509088 }, { "epoch": 3.0744649662855466, "grad_norm": 0.32242024908318306, "learning_rate": 0.00019417489838906965, "loss": 2.913316249847412, "step": 5245, "token_acc": 0.31162490183667196 }, { "epoch": 3.0750513046027557, "grad_norm": 0.3227160881569984, "learning_rate": 0.0001941716383375065, "loss": 2.9513397216796875, "step": 5246, "token_acc": 0.3066730376931983 }, { "epoch": 3.0756376429199648, "grad_norm": 0.3157340048043223, "learning_rate": 0.0001941683774013277, "loss": 2.908053159713745, "step": 5247, "token_acc": 0.310422894154325 }, { "epoch": 3.076223981237174, "grad_norm": 0.246754061348291, "learning_rate": 0.0001941651155805639, "loss": 2.93650484085083, "step": 5248, "token_acc": 0.30523670465213987 }, { "epoch": 3.076810319554383, "grad_norm": 0.35242963962301815, "learning_rate": 0.0001941618528752457, "loss": 2.880807876586914, "step": 5249, "token_acc": 0.31444160586907816 }, { "epoch": 3.077396657871592, "grad_norm": 0.31465438424943026, "learning_rate": 0.0001941585892854038, "loss": 2.9655680656433105, "step": 5250, "token_acc": 0.30255321720078704 }, { "epoch": 3.077982996188801, "grad_norm": 0.2872862543584576, "learning_rate": 0.00019415532481106883, "loss": 2.9440109729766846, "step": 5251, "token_acc": 0.30753821992884844 }, { "epoch": 3.07856933450601, "grad_norm": 0.2897755761081667, "learning_rate": 0.00019415205945227143, "loss": 2.9169178009033203, "step": 5252, "token_acc": 0.30991938153516074 }, { "epoch": 3.079155672823219, "grad_norm": 0.31472093047981187, "learning_rate": 0.00019414879320904237, "loss": 2.9474191665649414, "step": 5253, "token_acc": 0.30488561418495974 }, { "epoch": 3.079742011140428, "grad_norm": 0.30690989633985, "learning_rate": 0.0001941455260814122, "loss": 2.9245386123657227, "step": 5254, "token_acc": 0.30772148797559556 }, { "epoch": 3.0803283494576372, "grad_norm": 0.2662382184115055, "learning_rate": 0.00019414225806941172, "loss": 2.9700088500976562, "step": 5255, "token_acc": 0.3019841070725904 }, { "epoch": 3.080914687774846, "grad_norm": 0.2930865937032236, "learning_rate": 0.00019413898917307153, "loss": 2.923515796661377, "step": 5256, "token_acc": 0.3065538293081007 }, { "epoch": 3.081501026092055, "grad_norm": 0.34113560735904935, "learning_rate": 0.00019413571939242243, "loss": 2.9175162315368652, "step": 5257, "token_acc": 0.3096439991914962 }, { "epoch": 3.082087364409264, "grad_norm": 0.2668450414223363, "learning_rate": 0.0001941324487274951, "loss": 2.928525447845459, "step": 5258, "token_acc": 0.30860076271816606 }, { "epoch": 3.0826737027264732, "grad_norm": 0.2942871762951993, "learning_rate": 0.00019412917717832024, "loss": 2.9353747367858887, "step": 5259, "token_acc": 0.3087878284230517 }, { "epoch": 3.0832600410436823, "grad_norm": 0.2935801286227263, "learning_rate": 0.0001941259047449286, "loss": 2.9370031356811523, "step": 5260, "token_acc": 0.306471992138144 }, { "epoch": 3.0838463793608915, "grad_norm": 0.24544981147428738, "learning_rate": 0.00019412263142735094, "loss": 2.964984893798828, "step": 5261, "token_acc": 0.3027644170050066 }, { "epoch": 3.0844327176781, "grad_norm": 0.26775146706493064, "learning_rate": 0.00019411935722561796, "loss": 2.943545341491699, "step": 5262, "token_acc": 0.3068719280348697 }, { "epoch": 3.0850190559953092, "grad_norm": 0.2681334063986024, "learning_rate": 0.00019411608213976047, "loss": 2.9939284324645996, "step": 5263, "token_acc": 0.3000466770160275 }, { "epoch": 3.0856053943125183, "grad_norm": 0.295830919167988, "learning_rate": 0.00019411280616980921, "loss": 2.9422836303710938, "step": 5264, "token_acc": 0.3080937824270129 }, { "epoch": 3.0861917326297275, "grad_norm": 0.39210307916236614, "learning_rate": 0.00019410952931579492, "loss": 2.9263384342193604, "step": 5265, "token_acc": 0.31013316714601974 }, { "epoch": 3.0867780709469366, "grad_norm": 0.39608985235795396, "learning_rate": 0.00019410625157774848, "loss": 2.9281978607177734, "step": 5266, "token_acc": 0.30927426400214886 }, { "epoch": 3.0873644092641452, "grad_norm": 0.295105837637503, "learning_rate": 0.00019410297295570058, "loss": 3.008229970932007, "step": 5267, "token_acc": 0.29762900387559066 }, { "epoch": 3.0879507475813543, "grad_norm": 0.27846917577705105, "learning_rate": 0.00019409969344968208, "loss": 2.9562511444091797, "step": 5268, "token_acc": 0.3051255189539441 }, { "epoch": 3.0885370858985635, "grad_norm": 0.28678201631484906, "learning_rate": 0.00019409641305972373, "loss": 3.0088610649108887, "step": 5269, "token_acc": 0.2969082764191819 }, { "epoch": 3.0891234242157726, "grad_norm": 0.2942488709892208, "learning_rate": 0.00019409313178585634, "loss": 2.943333625793457, "step": 5270, "token_acc": 0.30531008425944955 }, { "epoch": 3.0897097625329817, "grad_norm": 0.2937022682363384, "learning_rate": 0.00019408984962811083, "loss": 2.89353609085083, "step": 5271, "token_acc": 0.311956298105583 }, { "epoch": 3.0902961008501904, "grad_norm": 0.3114187104421518, "learning_rate": 0.00019408656658651796, "loss": 2.9096574783325195, "step": 5272, "token_acc": 0.31030414566503106 }, { "epoch": 3.0908824391673995, "grad_norm": 0.24161713507387406, "learning_rate": 0.00019408328266110858, "loss": 2.899527072906494, "step": 5273, "token_acc": 0.31258726021095784 }, { "epoch": 3.0914687774846086, "grad_norm": 0.24702987248540267, "learning_rate": 0.00019407999785191353, "loss": 2.9260406494140625, "step": 5274, "token_acc": 0.30848256834712817 }, { "epoch": 3.0920551158018177, "grad_norm": 0.2644247465153701, "learning_rate": 0.0001940767121589637, "loss": 2.941545248031616, "step": 5275, "token_acc": 0.3052283599310318 }, { "epoch": 3.092641454119027, "grad_norm": 0.2778480258797623, "learning_rate": 0.00019407342558228988, "loss": 2.9225854873657227, "step": 5276, "token_acc": 0.30939987369276184 }, { "epoch": 3.093227792436236, "grad_norm": 0.24224388215503337, "learning_rate": 0.00019407013812192304, "loss": 2.959624767303467, "step": 5277, "token_acc": 0.3035603087202888 }, { "epoch": 3.0938141307534446, "grad_norm": 0.2873938600489093, "learning_rate": 0.00019406684977789395, "loss": 2.9027814865112305, "step": 5278, "token_acc": 0.31272768032281567 }, { "epoch": 3.0944004690706537, "grad_norm": 0.27584699508693716, "learning_rate": 0.00019406356055023363, "loss": 2.9270944595336914, "step": 5279, "token_acc": 0.3091854529649046 }, { "epoch": 3.094986807387863, "grad_norm": 0.28728778175043224, "learning_rate": 0.00019406027043897286, "loss": 2.965500831604004, "step": 5280, "token_acc": 0.30353420553699184 }, { "epoch": 3.095573145705072, "grad_norm": 0.31176559324320124, "learning_rate": 0.00019405697944414264, "loss": 2.9307422637939453, "step": 5281, "token_acc": 0.3094938768073504 }, { "epoch": 3.096159484022281, "grad_norm": 0.36881157131464337, "learning_rate": 0.0001940536875657738, "loss": 2.9457814693450928, "step": 5282, "token_acc": 0.306535019931067 }, { "epoch": 3.0967458223394897, "grad_norm": 0.3551174431255161, "learning_rate": 0.00019405039480389734, "loss": 2.9334702491760254, "step": 5283, "token_acc": 0.3071493085470508 }, { "epoch": 3.097332160656699, "grad_norm": 0.3243456682694211, "learning_rate": 0.00019404710115854417, "loss": 2.929581642150879, "step": 5284, "token_acc": 0.308254501221437 }, { "epoch": 3.097918498973908, "grad_norm": 0.3730683604152476, "learning_rate": 0.00019404380662974515, "loss": 2.9672653675079346, "step": 5285, "token_acc": 0.3034021562514886 }, { "epoch": 3.098504837291117, "grad_norm": 0.4005567694623175, "learning_rate": 0.00019404051121753134, "loss": 2.953073024749756, "step": 5286, "token_acc": 0.30552695261537505 }, { "epoch": 3.099091175608326, "grad_norm": 0.29494731724503725, "learning_rate": 0.00019403721492193364, "loss": 2.8535995483398438, "step": 5287, "token_acc": 0.3194527677448265 }, { "epoch": 3.099677513925535, "grad_norm": 0.40836372664406084, "learning_rate": 0.00019403391774298304, "loss": 2.9541144371032715, "step": 5288, "token_acc": 0.305690288430377 }, { "epoch": 3.100263852242744, "grad_norm": 0.3512534198082233, "learning_rate": 0.00019403061968071046, "loss": 2.952315330505371, "step": 5289, "token_acc": 0.3046761325219743 }, { "epoch": 3.100850190559953, "grad_norm": 0.2880404152189814, "learning_rate": 0.00019402732073514693, "loss": 2.9204468727111816, "step": 5290, "token_acc": 0.30863308811172674 }, { "epoch": 3.101436528877162, "grad_norm": 0.33061426487576784, "learning_rate": 0.00019402402090632344, "loss": 2.92392897605896, "step": 5291, "token_acc": 0.3094264198821996 }, { "epoch": 3.1020228671943713, "grad_norm": 0.3375679762301312, "learning_rate": 0.00019402072019427094, "loss": 2.946640968322754, "step": 5292, "token_acc": 0.30646521972543905 }, { "epoch": 3.1026092055115804, "grad_norm": 0.3295037384591374, "learning_rate": 0.0001940174185990205, "loss": 2.918299436569214, "step": 5293, "token_acc": 0.3092454091707742 }, { "epoch": 3.103195543828789, "grad_norm": 0.2773453587785817, "learning_rate": 0.0001940141161206031, "loss": 2.967357635498047, "step": 5294, "token_acc": 0.30174333475687876 }, { "epoch": 3.103781882145998, "grad_norm": 0.39711589345577736, "learning_rate": 0.00019401081275904973, "loss": 2.9420785903930664, "step": 5295, "token_acc": 0.3061059802187725 }, { "epoch": 3.1043682204632073, "grad_norm": 0.2776497907277249, "learning_rate": 0.00019400750851439148, "loss": 2.913527250289917, "step": 5296, "token_acc": 0.31029434683413004 }, { "epoch": 3.1049545587804164, "grad_norm": 0.3041669323881054, "learning_rate": 0.00019400420338665936, "loss": 2.924229621887207, "step": 5297, "token_acc": 0.30887089248648797 }, { "epoch": 3.1055408970976255, "grad_norm": 0.2812968130014946, "learning_rate": 0.00019400089737588446, "loss": 2.918821334838867, "step": 5298, "token_acc": 0.31000584099964534 }, { "epoch": 3.106127235414834, "grad_norm": 0.27556102386814124, "learning_rate": 0.00019399759048209774, "loss": 2.9605867862701416, "step": 5299, "token_acc": 0.3023844111606472 }, { "epoch": 3.1067135737320433, "grad_norm": 0.2865140845766617, "learning_rate": 0.00019399428270533035, "loss": 2.9104089736938477, "step": 5300, "token_acc": 0.3106944126838936 }, { "epoch": 3.1072999120492524, "grad_norm": 0.30365446503702104, "learning_rate": 0.00019399097404561332, "loss": 2.9876503944396973, "step": 5301, "token_acc": 0.30132215277205426 }, { "epoch": 3.1078862503664615, "grad_norm": 0.27622312934625304, "learning_rate": 0.00019398766450297777, "loss": 2.948108673095703, "step": 5302, "token_acc": 0.30504282026766194 }, { "epoch": 3.1084725886836706, "grad_norm": 0.29743522738915484, "learning_rate": 0.0001939843540774547, "loss": 2.936128616333008, "step": 5303, "token_acc": 0.3070857847545153 }, { "epoch": 3.1090589270008797, "grad_norm": 0.29427151603144447, "learning_rate": 0.00019398104276907533, "loss": 2.93275785446167, "step": 5304, "token_acc": 0.3068207520869387 }, { "epoch": 3.1096452653180884, "grad_norm": 0.29719659656441677, "learning_rate": 0.00019397773057787068, "loss": 2.957824468612671, "step": 5305, "token_acc": 0.30477761629810024 }, { "epoch": 3.1102316036352975, "grad_norm": 0.3105909186102596, "learning_rate": 0.00019397441750387188, "loss": 2.9183998107910156, "step": 5306, "token_acc": 0.31048551916654205 }, { "epoch": 3.1108179419525066, "grad_norm": 0.26957806088464414, "learning_rate": 0.00019397110354711007, "loss": 2.9416487216949463, "step": 5307, "token_acc": 0.30642212201210056 }, { "epoch": 3.1114042802697157, "grad_norm": 0.32748789515052046, "learning_rate": 0.00019396778870761638, "loss": 2.9190382957458496, "step": 5308, "token_acc": 0.3102018203403245 }, { "epoch": 3.111990618586925, "grad_norm": 0.2565018977444655, "learning_rate": 0.00019396447298542193, "loss": 2.9649786949157715, "step": 5309, "token_acc": 0.30458631912135414 }, { "epoch": 3.1125769569041335, "grad_norm": 0.2836750798123025, "learning_rate": 0.0001939611563805579, "loss": 2.9511170387268066, "step": 5310, "token_acc": 0.30518066818034384 }, { "epoch": 3.1131632952213426, "grad_norm": 0.3176591172033794, "learning_rate": 0.0001939578388930554, "loss": 2.935601234436035, "step": 5311, "token_acc": 0.3084529149768539 }, { "epoch": 3.1137496335385517, "grad_norm": 0.30908198506023205, "learning_rate": 0.0001939545205229456, "loss": 2.885695457458496, "step": 5312, "token_acc": 0.31453876770135825 }, { "epoch": 3.114335971855761, "grad_norm": 0.26903683298519443, "learning_rate": 0.0001939512012702597, "loss": 2.9344825744628906, "step": 5313, "token_acc": 0.3069585598263705 }, { "epoch": 3.11492231017297, "grad_norm": 0.31262387201186514, "learning_rate": 0.00019394788113502885, "loss": 2.9133381843566895, "step": 5314, "token_acc": 0.3097518346519134 }, { "epoch": 3.115508648490179, "grad_norm": 0.2730652191570408, "learning_rate": 0.00019394456011728424, "loss": 2.937570095062256, "step": 5315, "token_acc": 0.30798466211323156 }, { "epoch": 3.1160949868073877, "grad_norm": 0.3339072777217726, "learning_rate": 0.00019394123821705713, "loss": 2.9239795207977295, "step": 5316, "token_acc": 0.30889251532197126 }, { "epoch": 3.116681325124597, "grad_norm": 0.34983158565428213, "learning_rate": 0.00019393791543437865, "loss": 2.9083948135375977, "step": 5317, "token_acc": 0.3097774490963108 }, { "epoch": 3.117267663441806, "grad_norm": 0.2671857251093449, "learning_rate": 0.00019393459176928003, "loss": 2.943286418914795, "step": 5318, "token_acc": 0.30715212969304206 }, { "epoch": 3.117854001759015, "grad_norm": 0.33354673698416265, "learning_rate": 0.0001939312672217925, "loss": 2.9155914783477783, "step": 5319, "token_acc": 0.3104030969644832 }, { "epoch": 3.118440340076224, "grad_norm": 0.282534874042418, "learning_rate": 0.0001939279417919473, "loss": 2.86663556098938, "step": 5320, "token_acc": 0.31875527837328654 }, { "epoch": 3.119026678393433, "grad_norm": 0.31113542467319527, "learning_rate": 0.00019392461547977562, "loss": 2.9302291870117188, "step": 5321, "token_acc": 0.30801729193812477 }, { "epoch": 3.119613016710642, "grad_norm": 0.3273603579807016, "learning_rate": 0.00019392128828530877, "loss": 2.9027786254882812, "step": 5322, "token_acc": 0.3111974730943965 }, { "epoch": 3.120199355027851, "grad_norm": 0.31017293100333126, "learning_rate": 0.00019391796020857798, "loss": 2.9264841079711914, "step": 5323, "token_acc": 0.30917253100424485 }, { "epoch": 3.12078569334506, "grad_norm": 0.2936857303423182, "learning_rate": 0.0001939146312496145, "loss": 2.8960013389587402, "step": 5324, "token_acc": 0.31130930254925254 }, { "epoch": 3.1213720316622693, "grad_norm": 0.3279480394304328, "learning_rate": 0.0001939113014084496, "loss": 2.889918804168701, "step": 5325, "token_acc": 0.3125660559002961 }, { "epoch": 3.121958369979478, "grad_norm": 0.34142747123428907, "learning_rate": 0.00019390797068511462, "loss": 2.928982734680176, "step": 5326, "token_acc": 0.3078903750530008 }, { "epoch": 3.122544708296687, "grad_norm": 0.30541291499605416, "learning_rate": 0.00019390463907964075, "loss": 2.9199109077453613, "step": 5327, "token_acc": 0.3091409535881333 }, { "epoch": 3.123131046613896, "grad_norm": 0.27028448814851974, "learning_rate": 0.00019390130659205937, "loss": 2.904547691345215, "step": 5328, "token_acc": 0.31227825020908767 }, { "epoch": 3.1237173849311053, "grad_norm": 0.3175142317651791, "learning_rate": 0.00019389797322240173, "loss": 2.9465131759643555, "step": 5329, "token_acc": 0.3052346904946499 }, { "epoch": 3.1243037232483144, "grad_norm": 0.3003521630194517, "learning_rate": 0.00019389463897069912, "loss": 2.9653706550598145, "step": 5330, "token_acc": 0.30306370957697437 }, { "epoch": 3.1248900615655235, "grad_norm": 0.28948798864015673, "learning_rate": 0.00019389130383698296, "loss": 2.9111251831054688, "step": 5331, "token_acc": 0.31135627241604175 }, { "epoch": 3.125476399882732, "grad_norm": 0.2986607780908537, "learning_rate": 0.00019388796782128452, "loss": 2.9010655879974365, "step": 5332, "token_acc": 0.31159625016052395 }, { "epoch": 3.1260627381999413, "grad_norm": 0.3239253619905989, "learning_rate": 0.0001938846309236351, "loss": 2.9301304817199707, "step": 5333, "token_acc": 0.3074228903646658 }, { "epoch": 3.1266490765171504, "grad_norm": 0.350882769602478, "learning_rate": 0.00019388129314406612, "loss": 2.928159475326538, "step": 5334, "token_acc": 0.3082110160666083 }, { "epoch": 3.1272354148343595, "grad_norm": 0.30329498553723977, "learning_rate": 0.00019387795448260885, "loss": 2.968932867050171, "step": 5335, "token_acc": 0.30453604115171357 }, { "epoch": 3.1278217531515686, "grad_norm": 0.31103423677324843, "learning_rate": 0.00019387461493929476, "loss": 2.920726776123047, "step": 5336, "token_acc": 0.3084453834820862 }, { "epoch": 3.1284080914687773, "grad_norm": 0.31044657187543334, "learning_rate": 0.0001938712745141551, "loss": 2.927861213684082, "step": 5337, "token_acc": 0.30866695903497327 }, { "epoch": 3.1289944297859864, "grad_norm": 0.28145881657673516, "learning_rate": 0.00019386793320722134, "loss": 2.952730417251587, "step": 5338, "token_acc": 0.30477910573533795 }, { "epoch": 3.1295807681031955, "grad_norm": 0.3199176917717775, "learning_rate": 0.00019386459101852484, "loss": 2.94476318359375, "step": 5339, "token_acc": 0.30487715517826275 }, { "epoch": 3.1301671064204046, "grad_norm": 0.31147484197388536, "learning_rate": 0.00019386124794809698, "loss": 2.9064829349517822, "step": 5340, "token_acc": 0.312238571458904 }, { "epoch": 3.1307534447376137, "grad_norm": 0.36301557524582373, "learning_rate": 0.0001938579039959692, "loss": 2.919233560562134, "step": 5341, "token_acc": 0.3094864253814416 }, { "epoch": 3.1313397830548224, "grad_norm": 0.4599309404358186, "learning_rate": 0.00019385455916217287, "loss": 2.9691269397735596, "step": 5342, "token_acc": 0.30298467578650934 }, { "epoch": 3.1319261213720315, "grad_norm": 0.49096570313464677, "learning_rate": 0.0001938512134467394, "loss": 2.9045820236206055, "step": 5343, "token_acc": 0.3110213366184494 }, { "epoch": 3.1325124596892406, "grad_norm": 0.3421944630402371, "learning_rate": 0.00019384786684970029, "loss": 2.9322409629821777, "step": 5344, "token_acc": 0.30842541079052915 }, { "epoch": 3.1330987980064497, "grad_norm": 0.33629500292160724, "learning_rate": 0.0001938445193710869, "loss": 2.9425601959228516, "step": 5345, "token_acc": 0.30608184942847094 }, { "epoch": 3.133685136323659, "grad_norm": 0.3896565098209385, "learning_rate": 0.00019384117101093072, "loss": 2.9608845710754395, "step": 5346, "token_acc": 0.3027661229643044 }, { "epoch": 3.134271474640868, "grad_norm": 0.28891675514816956, "learning_rate": 0.00019383782176926321, "loss": 2.9589552879333496, "step": 5347, "token_acc": 0.3032106503847205 }, { "epoch": 3.1348578129580766, "grad_norm": 0.3149987301232355, "learning_rate": 0.0001938344716461158, "loss": 2.9914369583129883, "step": 5348, "token_acc": 0.2991584495511558 }, { "epoch": 3.1354441512752858, "grad_norm": 0.2926856937917814, "learning_rate": 0.00019383112064151996, "loss": 2.935220718383789, "step": 5349, "token_acc": 0.3078945046732852 }, { "epoch": 3.136030489592495, "grad_norm": 0.34515804976702014, "learning_rate": 0.00019382776875550718, "loss": 2.9218838214874268, "step": 5350, "token_acc": 0.3100083287799133 }, { "epoch": 3.136616827909704, "grad_norm": 0.2812203941084217, "learning_rate": 0.00019382441598810894, "loss": 2.8866593837738037, "step": 5351, "token_acc": 0.3134866353654479 }, { "epoch": 3.137203166226913, "grad_norm": 0.32565338480113465, "learning_rate": 0.00019382106233935677, "loss": 2.9165635108947754, "step": 5352, "token_acc": 0.3096304347268826 }, { "epoch": 3.1377895045441218, "grad_norm": 0.2578356065473407, "learning_rate": 0.00019381770780928212, "loss": 2.951174736022949, "step": 5353, "token_acc": 0.30529785025627854 }, { "epoch": 3.138375842861331, "grad_norm": 0.29404377190990555, "learning_rate": 0.00019381435239791656, "loss": 2.951747417449951, "step": 5354, "token_acc": 0.3045936322717008 }, { "epoch": 3.13896218117854, "grad_norm": 0.2888957756054559, "learning_rate": 0.00019381099610529153, "loss": 2.9502921104431152, "step": 5355, "token_acc": 0.3047815599726547 }, { "epoch": 3.139548519495749, "grad_norm": 0.3260797383022326, "learning_rate": 0.00019380763893143862, "loss": 2.9583778381347656, "step": 5356, "token_acc": 0.3054860186418109 }, { "epoch": 3.140134857812958, "grad_norm": 0.2999891889790078, "learning_rate": 0.00019380428087638937, "loss": 2.949016809463501, "step": 5357, "token_acc": 0.30712072511875765 }, { "epoch": 3.1407211961301673, "grad_norm": 0.2616016911648799, "learning_rate": 0.0001938009219401753, "loss": 2.940258026123047, "step": 5358, "token_acc": 0.3056002603471465 }, { "epoch": 3.141307534447376, "grad_norm": 0.2808921271452929, "learning_rate": 0.00019379756212282797, "loss": 2.9346680641174316, "step": 5359, "token_acc": 0.30609792882834697 }, { "epoch": 3.141893872764585, "grad_norm": 0.24868411859685202, "learning_rate": 0.0001937942014243789, "loss": 2.9106290340423584, "step": 5360, "token_acc": 0.3103786107072878 }, { "epoch": 3.142480211081794, "grad_norm": 0.27297900191098723, "learning_rate": 0.00019379083984485973, "loss": 2.9510788917541504, "step": 5361, "token_acc": 0.3051438491312532 }, { "epoch": 3.1430665493990033, "grad_norm": 0.2867743444581692, "learning_rate": 0.000193787477384302, "loss": 2.935410976409912, "step": 5362, "token_acc": 0.3080153247304243 }, { "epoch": 3.1436528877162124, "grad_norm": 0.26065456018674693, "learning_rate": 0.00019378411404273732, "loss": 2.945131301879883, "step": 5363, "token_acc": 0.30903977104510016 }, { "epoch": 3.144239226033421, "grad_norm": 0.2660856830950308, "learning_rate": 0.0001937807498201972, "loss": 2.9169299602508545, "step": 5364, "token_acc": 0.3121596855514387 }, { "epoch": 3.14482556435063, "grad_norm": 0.29784719047378566, "learning_rate": 0.00019377738471671336, "loss": 2.95880126953125, "step": 5365, "token_acc": 0.3025050587023027 }, { "epoch": 3.1454119026678393, "grad_norm": 0.289038301231129, "learning_rate": 0.00019377401873231734, "loss": 2.936163902282715, "step": 5366, "token_acc": 0.3070291624876544 }, { "epoch": 3.1459982409850484, "grad_norm": 0.2672568109746362, "learning_rate": 0.0001937706518670408, "loss": 2.9431324005126953, "step": 5367, "token_acc": 0.30522628533373186 }, { "epoch": 3.1465845793022575, "grad_norm": 0.2556023038534498, "learning_rate": 0.00019376728412091532, "loss": 2.9482779502868652, "step": 5368, "token_acc": 0.3041297623071148 }, { "epoch": 3.1471709176194667, "grad_norm": 0.33757198042767206, "learning_rate": 0.00019376391549397255, "loss": 2.917387008666992, "step": 5369, "token_acc": 0.31004216550157887 }, { "epoch": 3.1477572559366753, "grad_norm": 0.3010495931187666, "learning_rate": 0.00019376054598624416, "loss": 2.949450731277466, "step": 5370, "token_acc": 0.30425278012277357 }, { "epoch": 3.1483435942538844, "grad_norm": 0.3096502416314198, "learning_rate": 0.00019375717559776178, "loss": 2.8806324005126953, "step": 5371, "token_acc": 0.3144549379384781 }, { "epoch": 3.1489299325710935, "grad_norm": 0.3037628555209738, "learning_rate": 0.00019375380432855709, "loss": 2.959415912628174, "step": 5372, "token_acc": 0.3040190737467887 }, { "epoch": 3.1495162708883027, "grad_norm": 0.36340861540468805, "learning_rate": 0.00019375043217866172, "loss": 2.9353561401367188, "step": 5373, "token_acc": 0.30908735409289845 }, { "epoch": 3.1501026092055118, "grad_norm": 0.3036538922365206, "learning_rate": 0.00019374705914810736, "loss": 2.925947666168213, "step": 5374, "token_acc": 0.30884161618002093 }, { "epoch": 3.1506889475227204, "grad_norm": 0.3001600341269259, "learning_rate": 0.00019374368523692573, "loss": 2.9768333435058594, "step": 5375, "token_acc": 0.3018817971278239 }, { "epoch": 3.1512752858399296, "grad_norm": 0.37424349002926516, "learning_rate": 0.00019374031044514848, "loss": 2.9330921173095703, "step": 5376, "token_acc": 0.3070908457242889 }, { "epoch": 3.1518616241571387, "grad_norm": 0.3068749590060571, "learning_rate": 0.00019373693477280735, "loss": 2.958658218383789, "step": 5377, "token_acc": 0.3038902251574628 }, { "epoch": 3.1524479624743478, "grad_norm": 0.27317204923169974, "learning_rate": 0.00019373355821993403, "loss": 2.929501533508301, "step": 5378, "token_acc": 0.3080402243160217 }, { "epoch": 3.153034300791557, "grad_norm": 0.3556376693671732, "learning_rate": 0.00019373018078656023, "loss": 2.9665989875793457, "step": 5379, "token_acc": 0.3017731480313769 }, { "epoch": 3.1536206391087656, "grad_norm": 0.3254599522710315, "learning_rate": 0.00019372680247271767, "loss": 2.9713268280029297, "step": 5380, "token_acc": 0.30193895805851206 }, { "epoch": 3.1542069774259747, "grad_norm": 0.30618231650994454, "learning_rate": 0.0001937234232784381, "loss": 2.9442286491394043, "step": 5381, "token_acc": 0.30383559193636633 }, { "epoch": 3.154793315743184, "grad_norm": 0.31758797844402153, "learning_rate": 0.00019372004320375327, "loss": 2.9296531677246094, "step": 5382, "token_acc": 0.30862708957325347 }, { "epoch": 3.155379654060393, "grad_norm": 0.2976222161357982, "learning_rate": 0.00019371666224869493, "loss": 2.9181623458862305, "step": 5383, "token_acc": 0.30918101008406884 }, { "epoch": 3.155965992377602, "grad_norm": 0.2943687092180571, "learning_rate": 0.00019371328041329483, "loss": 2.971268653869629, "step": 5384, "token_acc": 0.30291594653305215 }, { "epoch": 3.1565523306948107, "grad_norm": 0.3427956489913068, "learning_rate": 0.00019370989769758478, "loss": 2.9433507919311523, "step": 5385, "token_acc": 0.3054969363270203 }, { "epoch": 3.15713866901202, "grad_norm": 0.31929296213015407, "learning_rate": 0.00019370651410159645, "loss": 2.984891176223755, "step": 5386, "token_acc": 0.30059255618361047 }, { "epoch": 3.157725007329229, "grad_norm": 0.2930381641512369, "learning_rate": 0.0001937031296253617, "loss": 2.939363956451416, "step": 5387, "token_acc": 0.3052507541003419 }, { "epoch": 3.158311345646438, "grad_norm": 0.32795796501839464, "learning_rate": 0.00019369974426891235, "loss": 2.949565887451172, "step": 5388, "token_acc": 0.307638846920892 }, { "epoch": 3.158897683963647, "grad_norm": 0.3458212580688221, "learning_rate": 0.00019369635803228016, "loss": 2.975759983062744, "step": 5389, "token_acc": 0.3014857125182363 }, { "epoch": 3.1594840222808562, "grad_norm": 0.2683708646001357, "learning_rate": 0.00019369297091549693, "loss": 2.9430699348449707, "step": 5390, "token_acc": 0.30690886841196957 }, { "epoch": 3.160070360598065, "grad_norm": 0.2992389306741696, "learning_rate": 0.00019368958291859448, "loss": 2.961652994155884, "step": 5391, "token_acc": 0.3039216947939804 }, { "epoch": 3.160656698915274, "grad_norm": 0.26411189200969504, "learning_rate": 0.00019368619404160466, "loss": 2.9590036869049072, "step": 5392, "token_acc": 0.30535378623386494 }, { "epoch": 3.161243037232483, "grad_norm": 0.2975888052416875, "learning_rate": 0.0001936828042845593, "loss": 2.9536237716674805, "step": 5393, "token_acc": 0.3039029875145867 }, { "epoch": 3.1618293755496922, "grad_norm": 0.28949195044322307, "learning_rate": 0.00019367941364749022, "loss": 2.9468026161193848, "step": 5394, "token_acc": 0.3053276191513247 }, { "epoch": 3.1624157138669013, "grad_norm": 0.2828267080384573, "learning_rate": 0.0001936760221304293, "loss": 2.9120049476623535, "step": 5395, "token_acc": 0.31054327341992743 }, { "epoch": 3.16300205218411, "grad_norm": 0.27990839248494787, "learning_rate": 0.00019367262973340833, "loss": 2.9396982192993164, "step": 5396, "token_acc": 0.3061681808332408 }, { "epoch": 3.163588390501319, "grad_norm": 0.2715808957041378, "learning_rate": 0.00019366923645645928, "loss": 2.962836503982544, "step": 5397, "token_acc": 0.3031536113936928 }, { "epoch": 3.1641747288185282, "grad_norm": 0.2753853877873122, "learning_rate": 0.00019366584229961396, "loss": 2.9541263580322266, "step": 5398, "token_acc": 0.30370695171351875 }, { "epoch": 3.1647610671357373, "grad_norm": 0.2518873068212432, "learning_rate": 0.00019366244726290427, "loss": 2.9393396377563477, "step": 5399, "token_acc": 0.3072903921500082 }, { "epoch": 3.1653474054529465, "grad_norm": 0.24005377521421326, "learning_rate": 0.0001936590513463621, "loss": 2.9471359252929688, "step": 5400, "token_acc": 0.3066001002762932 }, { "epoch": 3.1659337437701556, "grad_norm": 0.2333059238297225, "learning_rate": 0.00019365565455001934, "loss": 2.9684319496154785, "step": 5401, "token_acc": 0.3024221663848032 }, { "epoch": 3.1665200820873642, "grad_norm": 0.250323384350809, "learning_rate": 0.00019365225687390794, "loss": 2.9609391689300537, "step": 5402, "token_acc": 0.30253484091506705 }, { "epoch": 3.1671064204045734, "grad_norm": 0.25459963632168203, "learning_rate": 0.00019364885831805973, "loss": 2.934001922607422, "step": 5403, "token_acc": 0.30783589139026807 }, { "epoch": 3.1676927587217825, "grad_norm": 0.25809724109803706, "learning_rate": 0.00019364545888250675, "loss": 2.949033260345459, "step": 5404, "token_acc": 0.3048904295498301 }, { "epoch": 3.1682790970389916, "grad_norm": 0.26172141249526054, "learning_rate": 0.00019364205856728083, "loss": 2.919278621673584, "step": 5405, "token_acc": 0.31015854098386364 }, { "epoch": 3.1688654353562007, "grad_norm": 0.28186947948424884, "learning_rate": 0.00019363865737241398, "loss": 2.9443674087524414, "step": 5406, "token_acc": 0.3080535616485473 }, { "epoch": 3.1694517736734094, "grad_norm": 0.25873923019452383, "learning_rate": 0.00019363525529793812, "loss": 2.9504218101501465, "step": 5407, "token_acc": 0.30423842246158384 }, { "epoch": 3.1700381119906185, "grad_norm": 0.2633502020229431, "learning_rate": 0.00019363185234388519, "loss": 2.9494967460632324, "step": 5408, "token_acc": 0.3055652556292761 }, { "epoch": 3.1706244503078276, "grad_norm": 0.28583949519563906, "learning_rate": 0.00019362844851028717, "loss": 2.9417672157287598, "step": 5409, "token_acc": 0.30632624447355494 }, { "epoch": 3.1712107886250367, "grad_norm": 0.3569574017227077, "learning_rate": 0.0001936250437971761, "loss": 2.962395668029785, "step": 5410, "token_acc": 0.3029179884248003 }, { "epoch": 3.171797126942246, "grad_norm": 0.4449873235306184, "learning_rate": 0.00019362163820458385, "loss": 2.980628252029419, "step": 5411, "token_acc": 0.29927414701488914 }, { "epoch": 3.172383465259455, "grad_norm": 0.41960741424517234, "learning_rate": 0.00019361823173254247, "loss": 2.945840835571289, "step": 5412, "token_acc": 0.3062181400334698 }, { "epoch": 3.1729698035766636, "grad_norm": 0.29673124927088296, "learning_rate": 0.00019361482438108398, "loss": 2.933629035949707, "step": 5413, "token_acc": 0.30711911223883476 }, { "epoch": 3.1735561418938727, "grad_norm": 0.39812278897062275, "learning_rate": 0.00019361141615024035, "loss": 2.9264397621154785, "step": 5414, "token_acc": 0.30798003720154904 }, { "epoch": 3.174142480211082, "grad_norm": 0.34948643822917974, "learning_rate": 0.00019360800704004363, "loss": 2.964817523956299, "step": 5415, "token_acc": 0.30409428362158947 }, { "epoch": 3.174728818528291, "grad_norm": 0.2971428397046301, "learning_rate": 0.00019360459705052577, "loss": 2.9055821895599365, "step": 5416, "token_acc": 0.3122780038729587 }, { "epoch": 3.1753151568455, "grad_norm": 0.28491817616925313, "learning_rate": 0.0001936011861817189, "loss": 2.9157862663269043, "step": 5417, "token_acc": 0.31049192040177354 }, { "epoch": 3.1759014951627087, "grad_norm": 0.3064024195188568, "learning_rate": 0.000193597774433655, "loss": 2.9807801246643066, "step": 5418, "token_acc": 0.3022662889518414 }, { "epoch": 3.176487833479918, "grad_norm": 0.2823644680375918, "learning_rate": 0.00019359436180636612, "loss": 2.9004621505737305, "step": 5419, "token_acc": 0.31279571528072436 }, { "epoch": 3.177074171797127, "grad_norm": 0.28848919001436, "learning_rate": 0.00019359094829988436, "loss": 2.9565320014953613, "step": 5420, "token_acc": 0.3040721750381949 }, { "epoch": 3.177660510114336, "grad_norm": 0.31960333194769136, "learning_rate": 0.00019358753391424176, "loss": 2.9562735557556152, "step": 5421, "token_acc": 0.302570677441496 }, { "epoch": 3.178246848431545, "grad_norm": 0.31295974426585066, "learning_rate": 0.00019358411864947036, "loss": 2.924454689025879, "step": 5422, "token_acc": 0.3079727940136544 }, { "epoch": 3.1788331867487543, "grad_norm": 0.33653835342619737, "learning_rate": 0.00019358070250560227, "loss": 2.949678897857666, "step": 5423, "token_acc": 0.3048583317872194 }, { "epoch": 3.179419525065963, "grad_norm": 0.34397649896274735, "learning_rate": 0.0001935772854826696, "loss": 2.9417948722839355, "step": 5424, "token_acc": 0.3051552724914744 }, { "epoch": 3.180005863383172, "grad_norm": 0.27835006280904545, "learning_rate": 0.00019357386758070443, "loss": 2.971139907836914, "step": 5425, "token_acc": 0.30102860900332956 }, { "epoch": 3.180592201700381, "grad_norm": 0.2689938583651941, "learning_rate": 0.00019357044879973886, "loss": 2.9225621223449707, "step": 5426, "token_acc": 0.30885179937957874 }, { "epoch": 3.1811785400175903, "grad_norm": 0.30389340963963807, "learning_rate": 0.00019356702913980503, "loss": 2.9366111755371094, "step": 5427, "token_acc": 0.3065503243399579 }, { "epoch": 3.1817648783347994, "grad_norm": 0.3234591281125431, "learning_rate": 0.000193563608600935, "loss": 2.969336986541748, "step": 5428, "token_acc": 0.30234184387026963 }, { "epoch": 3.182351216652008, "grad_norm": 0.27777468741234285, "learning_rate": 0.000193560187183161, "loss": 2.9282541275024414, "step": 5429, "token_acc": 0.3085211440528681 }, { "epoch": 3.182937554969217, "grad_norm": 0.34837767627990823, "learning_rate": 0.00019355676488651508, "loss": 2.967630386352539, "step": 5430, "token_acc": 0.3030258835088581 }, { "epoch": 3.1835238932864263, "grad_norm": 0.33739995511290216, "learning_rate": 0.0001935533417110294, "loss": 2.9137308597564697, "step": 5431, "token_acc": 0.3110948905109489 }, { "epoch": 3.1841102316036354, "grad_norm": 0.28000891194144556, "learning_rate": 0.0001935499176567362, "loss": 2.9572746753692627, "step": 5432, "token_acc": 0.30369068806750527 }, { "epoch": 3.1846965699208445, "grad_norm": 0.3381436665561972, "learning_rate": 0.00019354649272366754, "loss": 2.929831027984619, "step": 5433, "token_acc": 0.3072453410600399 }, { "epoch": 3.185282908238053, "grad_norm": 0.3724357762112872, "learning_rate": 0.00019354306691185565, "loss": 2.9519662857055664, "step": 5434, "token_acc": 0.30512366903141075 }, { "epoch": 3.1858692465552623, "grad_norm": 0.2898397164437663, "learning_rate": 0.0001935396402213327, "loss": 2.928469657897949, "step": 5435, "token_acc": 0.30796954004186516 }, { "epoch": 3.1864555848724714, "grad_norm": 0.3085488336756759, "learning_rate": 0.00019353621265213086, "loss": 2.946547508239746, "step": 5436, "token_acc": 0.30662347463376227 }, { "epoch": 3.1870419231896805, "grad_norm": 0.3225034949415428, "learning_rate": 0.00019353278420428235, "loss": 2.9231815338134766, "step": 5437, "token_acc": 0.30875465644581296 }, { "epoch": 3.1876282615068896, "grad_norm": 0.27385182708442546, "learning_rate": 0.00019352935487781942, "loss": 2.9150285720825195, "step": 5438, "token_acc": 0.310208522496008 }, { "epoch": 3.1882145998240983, "grad_norm": 0.30777369736773025, "learning_rate": 0.00019352592467277417, "loss": 2.935791015625, "step": 5439, "token_acc": 0.3061763078363768 }, { "epoch": 3.1888009381413074, "grad_norm": 0.29001024259253677, "learning_rate": 0.0001935224935891789, "loss": 2.973250150680542, "step": 5440, "token_acc": 0.3031434158182186 }, { "epoch": 3.1893872764585165, "grad_norm": 0.29514472722988533, "learning_rate": 0.00019351906162706582, "loss": 2.959594249725342, "step": 5441, "token_acc": 0.3032567898483172 }, { "epoch": 3.1899736147757256, "grad_norm": 0.2751018755807374, "learning_rate": 0.00019351562878646718, "loss": 2.9430789947509766, "step": 5442, "token_acc": 0.3068423869413968 }, { "epoch": 3.1905599530929347, "grad_norm": 0.298249531995445, "learning_rate": 0.00019351219506741525, "loss": 2.9636337757110596, "step": 5443, "token_acc": 0.3028956503350062 }, { "epoch": 3.191146291410144, "grad_norm": 0.27117408091130357, "learning_rate": 0.00019350876046994223, "loss": 2.9704248905181885, "step": 5444, "token_acc": 0.3038214350970387 }, { "epoch": 3.1917326297273525, "grad_norm": 0.31475536959904754, "learning_rate": 0.0001935053249940804, "loss": 2.9319753646850586, "step": 5445, "token_acc": 0.30702200064567686 }, { "epoch": 3.1923189680445616, "grad_norm": 0.32835287632516824, "learning_rate": 0.00019350188863986208, "loss": 2.9596691131591797, "step": 5446, "token_acc": 0.30352112303846174 }, { "epoch": 3.1929053063617707, "grad_norm": 0.29828913908736276, "learning_rate": 0.0001934984514073195, "loss": 2.9990763664245605, "step": 5447, "token_acc": 0.2982541814186302 }, { "epoch": 3.19349164467898, "grad_norm": 0.2849555689360862, "learning_rate": 0.00019349501329648492, "loss": 2.9852724075317383, "step": 5448, "token_acc": 0.30089769446751297 }, { "epoch": 3.194077982996189, "grad_norm": 0.28346145087374514, "learning_rate": 0.00019349157430739071, "loss": 2.926243305206299, "step": 5449, "token_acc": 0.30862306631949765 }, { "epoch": 3.1946643213133976, "grad_norm": 0.24426226158845873, "learning_rate": 0.00019348813444006915, "loss": 2.930239677429199, "step": 5450, "token_acc": 0.3079855419337792 }, { "epoch": 3.1952506596306067, "grad_norm": 0.2553827354932598, "learning_rate": 0.00019348469369455252, "loss": 2.992568254470825, "step": 5451, "token_acc": 0.30089702620006326 }, { "epoch": 3.195836997947816, "grad_norm": 0.27511896233710137, "learning_rate": 0.00019348125207087317, "loss": 2.9348673820495605, "step": 5452, "token_acc": 0.307259675292545 }, { "epoch": 3.196423336265025, "grad_norm": 0.2824181800161575, "learning_rate": 0.00019347780956906343, "loss": 2.8963232040405273, "step": 5453, "token_acc": 0.3111624003357113 }, { "epoch": 3.197009674582234, "grad_norm": 0.2994826572413537, "learning_rate": 0.00019347436618915562, "loss": 2.9110381603240967, "step": 5454, "token_acc": 0.31093106267678866 }, { "epoch": 3.197596012899443, "grad_norm": 0.282598293275501, "learning_rate": 0.00019347092193118212, "loss": 2.935211420059204, "step": 5455, "token_acc": 0.30886008781820834 }, { "epoch": 3.198182351216652, "grad_norm": 0.29627676008012005, "learning_rate": 0.00019346747679517524, "loss": 2.931037187576294, "step": 5456, "token_acc": 0.3095992002847154 }, { "epoch": 3.198768689533861, "grad_norm": 0.30596325176166284, "learning_rate": 0.00019346403078116737, "loss": 2.922698497772217, "step": 5457, "token_acc": 0.30839446493195 }, { "epoch": 3.19935502785107, "grad_norm": 0.29713075964557495, "learning_rate": 0.00019346058388919088, "loss": 2.9282379150390625, "step": 5458, "token_acc": 0.3065554992207012 }, { "epoch": 3.199941366168279, "grad_norm": 0.2565061554649424, "learning_rate": 0.00019345713611927816, "loss": 2.8797237873077393, "step": 5459, "token_acc": 0.317002777034522 }, { "epoch": 3.2005277044854883, "grad_norm": 0.29439983110843776, "learning_rate": 0.00019345368747146155, "loss": 2.9747731685638428, "step": 5460, "token_acc": 0.3022374137323714 }, { "epoch": 3.201114042802697, "grad_norm": 0.3150538379039204, "learning_rate": 0.00019345023794577348, "loss": 2.937020778656006, "step": 5461, "token_acc": 0.30632220503957064 }, { "epoch": 3.201700381119906, "grad_norm": 0.2970739906773659, "learning_rate": 0.00019344678754224637, "loss": 2.9328978061676025, "step": 5462, "token_acc": 0.3067275675018051 }, { "epoch": 3.202286719437115, "grad_norm": 0.27600595667268685, "learning_rate": 0.0001934433362609126, "loss": 2.9669508934020996, "step": 5463, "token_acc": 0.30264190784093303 }, { "epoch": 3.2028730577543243, "grad_norm": 0.33063274040076696, "learning_rate": 0.0001934398841018046, "loss": 2.9089369773864746, "step": 5464, "token_acc": 0.31372093084015457 }, { "epoch": 3.2034593960715334, "grad_norm": 0.4093476264669585, "learning_rate": 0.00019343643106495482, "loss": 2.9509329795837402, "step": 5465, "token_acc": 0.30470074351987514 }, { "epoch": 3.2040457343887425, "grad_norm": 0.3553618297293979, "learning_rate": 0.00019343297715039565, "loss": 2.9705023765563965, "step": 5466, "token_acc": 0.3017262565352249 }, { "epoch": 3.204632072705951, "grad_norm": 0.3107527204189969, "learning_rate": 0.00019342952235815958, "loss": 2.9682536125183105, "step": 5467, "token_acc": 0.30191107443132 }, { "epoch": 3.2052184110231603, "grad_norm": 0.3412864289018294, "learning_rate": 0.00019342606668827905, "loss": 2.9663305282592773, "step": 5468, "token_acc": 0.30306588424158276 }, { "epoch": 3.2058047493403694, "grad_norm": 0.33719025633437755, "learning_rate": 0.0001934226101407865, "loss": 2.9414260387420654, "step": 5469, "token_acc": 0.30434286188785786 }, { "epoch": 3.2063910876575785, "grad_norm": 0.30130417864234116, "learning_rate": 0.00019341915271571444, "loss": 2.962266445159912, "step": 5470, "token_acc": 0.3041836836070812 }, { "epoch": 3.2069774259747876, "grad_norm": 0.2955779693777121, "learning_rate": 0.00019341569441309528, "loss": 2.9580588340759277, "step": 5471, "token_acc": 0.3052344766716054 }, { "epoch": 3.2075637642919963, "grad_norm": 0.2803147039519151, "learning_rate": 0.0001934122352329616, "loss": 2.942302703857422, "step": 5472, "token_acc": 0.30634347601856626 }, { "epoch": 3.2081501026092054, "grad_norm": 0.33085506451581237, "learning_rate": 0.00019340877517534582, "loss": 2.948922634124756, "step": 5473, "token_acc": 0.30460266952749465 }, { "epoch": 3.2087364409264145, "grad_norm": 0.2572760720654979, "learning_rate": 0.00019340531424028048, "loss": 2.930830955505371, "step": 5474, "token_acc": 0.30740315585688055 }, { "epoch": 3.2093227792436236, "grad_norm": 0.29317711744069486, "learning_rate": 0.00019340185242779808, "loss": 2.985015869140625, "step": 5475, "token_acc": 0.3022481774537696 }, { "epoch": 3.2099091175608327, "grad_norm": 0.3090127648650641, "learning_rate": 0.0001933983897379311, "loss": 2.9688525199890137, "step": 5476, "token_acc": 0.30239920877443355 }, { "epoch": 3.210495455878042, "grad_norm": 0.3039687955769756, "learning_rate": 0.00019339492617071214, "loss": 2.9415416717529297, "step": 5477, "token_acc": 0.3066315508346621 }, { "epoch": 3.2110817941952505, "grad_norm": 0.2983207522056923, "learning_rate": 0.00019339146172617366, "loss": 2.9704155921936035, "step": 5478, "token_acc": 0.304569453927583 }, { "epoch": 3.2116681325124596, "grad_norm": 0.258094086082446, "learning_rate": 0.0001933879964043483, "loss": 2.968628406524658, "step": 5479, "token_acc": 0.3020112159520329 }, { "epoch": 3.2122544708296688, "grad_norm": 0.26412346547226784, "learning_rate": 0.00019338453020526853, "loss": 2.9922337532043457, "step": 5480, "token_acc": 0.29749492678472944 }, { "epoch": 3.212840809146878, "grad_norm": 0.2845160462526243, "learning_rate": 0.00019338106312896694, "loss": 2.894908905029297, "step": 5481, "token_acc": 0.3141979555464065 }, { "epoch": 3.213427147464087, "grad_norm": 0.283510982160887, "learning_rate": 0.00019337759517547607, "loss": 2.9345791339874268, "step": 5482, "token_acc": 0.3063695516442463 }, { "epoch": 3.2140134857812956, "grad_norm": 0.28876894796961844, "learning_rate": 0.00019337412634482854, "loss": 2.906684637069702, "step": 5483, "token_acc": 0.3120758967897623 }, { "epoch": 3.2145998240985048, "grad_norm": 0.310608861866727, "learning_rate": 0.0001933706566370569, "loss": 2.926724672317505, "step": 5484, "token_acc": 0.3080563545328646 }, { "epoch": 3.215186162415714, "grad_norm": 0.3268638938848502, "learning_rate": 0.0001933671860521938, "loss": 2.9390642642974854, "step": 5485, "token_acc": 0.30722162894004235 }, { "epoch": 3.215772500732923, "grad_norm": 0.3204292200792293, "learning_rate": 0.00019336371459027177, "loss": 2.94765305519104, "step": 5486, "token_acc": 0.30603392107136057 }, { "epoch": 3.216358839050132, "grad_norm": 0.2976423670287878, "learning_rate": 0.00019336024225132347, "loss": 2.935096025466919, "step": 5487, "token_acc": 0.30903870358716173 }, { "epoch": 3.2169451773673408, "grad_norm": 0.289126134572972, "learning_rate": 0.00019335676903538146, "loss": 2.9308056831359863, "step": 5488, "token_acc": 0.30913258479855305 }, { "epoch": 3.21753151568455, "grad_norm": 0.3390484709452385, "learning_rate": 0.00019335329494247846, "loss": 2.963320255279541, "step": 5489, "token_acc": 0.30352812818926656 }, { "epoch": 3.218117854001759, "grad_norm": 0.337154626330191, "learning_rate": 0.00019334981997264701, "loss": 2.9333834648132324, "step": 5490, "token_acc": 0.30716619843074444 }, { "epoch": 3.218704192318968, "grad_norm": 0.2637196302222183, "learning_rate": 0.0001933463441259198, "loss": 2.9198415279388428, "step": 5491, "token_acc": 0.30909407712206305 }, { "epoch": 3.219290530636177, "grad_norm": 0.34280919035092056, "learning_rate": 0.00019334286740232948, "loss": 2.9395952224731445, "step": 5492, "token_acc": 0.3052960497398726 }, { "epoch": 3.219876868953386, "grad_norm": 0.28690420509662506, "learning_rate": 0.0001933393898019087, "loss": 2.935025215148926, "step": 5493, "token_acc": 0.3072363724145724 }, { "epoch": 3.220463207270595, "grad_norm": 0.3019453077610104, "learning_rate": 0.0001933359113246901, "loss": 2.9721508026123047, "step": 5494, "token_acc": 0.3033023506645941 }, { "epoch": 3.221049545587804, "grad_norm": 0.39362648374799203, "learning_rate": 0.00019333243197070644, "loss": 2.9954428672790527, "step": 5495, "token_acc": 0.2989623224854377 }, { "epoch": 3.221635883905013, "grad_norm": 0.28128572526351425, "learning_rate": 0.0001933289517399903, "loss": 2.9351303577423096, "step": 5496, "token_acc": 0.30926390519910324 }, { "epoch": 3.2222222222222223, "grad_norm": 0.35144269923284666, "learning_rate": 0.00019332547063257444, "loss": 2.94991397857666, "step": 5497, "token_acc": 0.3053250711669837 }, { "epoch": 3.2228085605394314, "grad_norm": 0.33273853778154117, "learning_rate": 0.00019332198864849157, "loss": 2.9399988651275635, "step": 5498, "token_acc": 0.30674416883418976 }, { "epoch": 3.22339489885664, "grad_norm": 0.2917690000244775, "learning_rate": 0.00019331850578777432, "loss": 2.9403092861175537, "step": 5499, "token_acc": 0.30710172744721687 }, { "epoch": 3.223981237173849, "grad_norm": 0.34727191022192466, "learning_rate": 0.00019331502205045546, "loss": 2.9610671997070312, "step": 5500, "token_acc": 0.30261669498923055 }, { "epoch": 3.2245675754910583, "grad_norm": 0.26293768685557267, "learning_rate": 0.00019331153743656774, "loss": 2.9483139514923096, "step": 5501, "token_acc": 0.30786009640407064 }, { "epoch": 3.2251539138082674, "grad_norm": 0.30069552675860994, "learning_rate": 0.00019330805194614387, "loss": 2.8998894691467285, "step": 5502, "token_acc": 0.3125118678826612 }, { "epoch": 3.2257402521254765, "grad_norm": 0.29072127003909826, "learning_rate": 0.00019330456557921654, "loss": 2.9421591758728027, "step": 5503, "token_acc": 0.3060407689127529 }, { "epoch": 3.226326590442685, "grad_norm": 0.29888417341793727, "learning_rate": 0.0001933010783358186, "loss": 2.9115114212036133, "step": 5504, "token_acc": 0.3115030077121504 }, { "epoch": 3.2269129287598943, "grad_norm": 0.26148903678665786, "learning_rate": 0.00019329759021598274, "loss": 2.9659783840179443, "step": 5505, "token_acc": 0.3025953763928274 }, { "epoch": 3.2274992670771034, "grad_norm": 0.2918553668517596, "learning_rate": 0.0001932941012197417, "loss": 2.9634170532226562, "step": 5506, "token_acc": 0.30281166622538136 }, { "epoch": 3.2280856053943126, "grad_norm": 0.277041645156445, "learning_rate": 0.00019329061134712832, "loss": 2.908942461013794, "step": 5507, "token_acc": 0.3126126976273244 }, { "epoch": 3.2286719437115217, "grad_norm": 0.29487714155284045, "learning_rate": 0.00019328712059817535, "loss": 2.972054958343506, "step": 5508, "token_acc": 0.301691821655524 }, { "epoch": 3.2292582820287308, "grad_norm": 0.2766429547646211, "learning_rate": 0.00019328362897291562, "loss": 2.9334068298339844, "step": 5509, "token_acc": 0.30700255740989746 }, { "epoch": 3.2298446203459394, "grad_norm": 0.31559357387305625, "learning_rate": 0.00019328013647138188, "loss": 2.9141879081726074, "step": 5510, "token_acc": 0.3102283436406022 }, { "epoch": 3.2304309586631486, "grad_norm": 0.2766219908449118, "learning_rate": 0.00019327664309360694, "loss": 2.945896625518799, "step": 5511, "token_acc": 0.3038982405054667 }, { "epoch": 3.2310172969803577, "grad_norm": 0.28842161073908024, "learning_rate": 0.00019327314883962364, "loss": 2.994135856628418, "step": 5512, "token_acc": 0.2979240908661452 }, { "epoch": 3.231603635297567, "grad_norm": 0.3162449636531089, "learning_rate": 0.0001932696537094648, "loss": 2.9820802211761475, "step": 5513, "token_acc": 0.3019043464886957 }, { "epoch": 3.232189973614776, "grad_norm": 0.38214157279002486, "learning_rate": 0.00019326615770316323, "loss": 2.9423506259918213, "step": 5514, "token_acc": 0.30582138097063877 }, { "epoch": 3.2327763119319846, "grad_norm": 0.3161189041597134, "learning_rate": 0.0001932626608207518, "loss": 2.937432289123535, "step": 5515, "token_acc": 0.30602913721383096 }, { "epoch": 3.2333626502491937, "grad_norm": 0.2679121421836545, "learning_rate": 0.00019325916306226333, "loss": 2.948369026184082, "step": 5516, "token_acc": 0.30627930354848326 }, { "epoch": 3.233948988566403, "grad_norm": 0.29145037105152877, "learning_rate": 0.00019325566442773072, "loss": 2.9340388774871826, "step": 5517, "token_acc": 0.30677909773801487 }, { "epoch": 3.234535326883612, "grad_norm": 0.3097700973196709, "learning_rate": 0.00019325216491718677, "loss": 2.9388587474823, "step": 5518, "token_acc": 0.3073891161483319 }, { "epoch": 3.235121665200821, "grad_norm": 0.2648594504750986, "learning_rate": 0.00019324866453066441, "loss": 2.982898235321045, "step": 5519, "token_acc": 0.30241451573014694 }, { "epoch": 3.23570800351803, "grad_norm": 0.3155344145102964, "learning_rate": 0.0001932451632681965, "loss": 2.9290456771850586, "step": 5520, "token_acc": 0.3075735025694308 }, { "epoch": 3.236294341835239, "grad_norm": 0.2983294681981736, "learning_rate": 0.00019324166112981593, "loss": 2.9440011978149414, "step": 5521, "token_acc": 0.3061988580133509 }, { "epoch": 3.236880680152448, "grad_norm": 0.2628931205630277, "learning_rate": 0.0001932381581155556, "loss": 2.91029953956604, "step": 5522, "token_acc": 0.3116043847605972 }, { "epoch": 3.237467018469657, "grad_norm": 0.26116218606040925, "learning_rate": 0.0001932346542254484, "loss": 2.96248459815979, "step": 5523, "token_acc": 0.3037071203595103 }, { "epoch": 3.238053356786866, "grad_norm": 0.2949981135017047, "learning_rate": 0.00019323114945952728, "loss": 2.993692398071289, "step": 5524, "token_acc": 0.29872926872611244 }, { "epoch": 3.2386396951040752, "grad_norm": 0.27945935240377145, "learning_rate": 0.00019322764381782511, "loss": 2.9792943000793457, "step": 5525, "token_acc": 0.30059622349012755 }, { "epoch": 3.239226033421284, "grad_norm": 0.23254189415376042, "learning_rate": 0.00019322413730037488, "loss": 2.9363675117492676, "step": 5526, "token_acc": 0.30773914897881055 }, { "epoch": 3.239812371738493, "grad_norm": 0.28575895972987014, "learning_rate": 0.0001932206299072095, "loss": 2.975721836090088, "step": 5527, "token_acc": 0.30154219616443495 }, { "epoch": 3.240398710055702, "grad_norm": 0.2795863778793751, "learning_rate": 0.00019321712163836193, "loss": 2.957705497741699, "step": 5528, "token_acc": 0.30354256288207077 }, { "epoch": 3.2409850483729112, "grad_norm": 0.24182803777945375, "learning_rate": 0.00019321361249386508, "loss": 2.9397261142730713, "step": 5529, "token_acc": 0.30653341368331793 }, { "epoch": 3.2415713866901203, "grad_norm": 0.2953613344764257, "learning_rate": 0.00019321010247375195, "loss": 2.951720952987671, "step": 5530, "token_acc": 0.3036856786435161 }, { "epoch": 3.2421577250073295, "grad_norm": 0.2746381200976614, "learning_rate": 0.00019320659157805555, "loss": 2.8933863639831543, "step": 5531, "token_acc": 0.3124295133165611 }, { "epoch": 3.242744063324538, "grad_norm": 0.259522127627178, "learning_rate": 0.00019320307980680879, "loss": 2.92635440826416, "step": 5532, "token_acc": 0.30861736334405143 }, { "epoch": 3.2433304016417472, "grad_norm": 0.2856971625672185, "learning_rate": 0.0001931995671600447, "loss": 2.937255382537842, "step": 5533, "token_acc": 0.3067993821692447 }, { "epoch": 3.2439167399589564, "grad_norm": 0.3153184954543292, "learning_rate": 0.00019319605363779624, "loss": 2.92566180229187, "step": 5534, "token_acc": 0.3100002431374456 }, { "epoch": 3.2445030782761655, "grad_norm": 0.24304077661249268, "learning_rate": 0.00019319253924009647, "loss": 2.911578893661499, "step": 5535, "token_acc": 0.3114368255129598 }, { "epoch": 3.2450894165933746, "grad_norm": 0.35252001190129306, "learning_rate": 0.00019318902396697833, "loss": 2.974607229232788, "step": 5536, "token_acc": 0.3024739040203515 }, { "epoch": 3.2456757549105832, "grad_norm": 0.476780621648762, "learning_rate": 0.00019318550781847492, "loss": 2.9159560203552246, "step": 5537, "token_acc": 0.30912701926604347 }, { "epoch": 3.2462620932277924, "grad_norm": 0.28637009930706986, "learning_rate": 0.00019318199079461923, "loss": 2.9334349632263184, "step": 5538, "token_acc": 0.3071231293970545 }, { "epoch": 3.2468484315450015, "grad_norm": 0.3195328704025498, "learning_rate": 0.0001931784728954443, "loss": 2.9469428062438965, "step": 5539, "token_acc": 0.30526967766214896 }, { "epoch": 3.2474347698622106, "grad_norm": 0.26832654316894783, "learning_rate": 0.00019317495412098315, "loss": 2.948988914489746, "step": 5540, "token_acc": 0.3053295526577332 }, { "epoch": 3.2480211081794197, "grad_norm": 0.2908674552889672, "learning_rate": 0.0001931714344712689, "loss": 2.9275853633880615, "step": 5541, "token_acc": 0.30855676855329955 }, { "epoch": 3.2486074464966284, "grad_norm": 0.2742391942429433, "learning_rate": 0.00019316791394633455, "loss": 2.9371752738952637, "step": 5542, "token_acc": 0.3057878008252863 }, { "epoch": 3.2491937848138375, "grad_norm": 0.3203997047729603, "learning_rate": 0.0001931643925462132, "loss": 2.955483913421631, "step": 5543, "token_acc": 0.30583247242921796 }, { "epoch": 3.2497801231310466, "grad_norm": 0.3264253911083781, "learning_rate": 0.00019316087027093794, "loss": 2.9473843574523926, "step": 5544, "token_acc": 0.3046910805513191 }, { "epoch": 3.2503664614482557, "grad_norm": 0.2878037423961283, "learning_rate": 0.00019315734712054182, "loss": 2.9345297813415527, "step": 5545, "token_acc": 0.306335490288383 }, { "epoch": 3.250952799765465, "grad_norm": 0.33619404662806934, "learning_rate": 0.00019315382309505793, "loss": 2.929159641265869, "step": 5546, "token_acc": 0.30800573151046456 }, { "epoch": 3.2515391380826735, "grad_norm": 0.24730700509543874, "learning_rate": 0.00019315029819451943, "loss": 2.9884674549102783, "step": 5547, "token_acc": 0.2981429902618348 }, { "epoch": 3.2521254763998826, "grad_norm": 0.3368426692667576, "learning_rate": 0.0001931467724189594, "loss": 2.981721878051758, "step": 5548, "token_acc": 0.300923505386252 }, { "epoch": 3.2527118147170917, "grad_norm": 0.2496026186218317, "learning_rate": 0.00019314324576841097, "loss": 2.958937168121338, "step": 5549, "token_acc": 0.30521662382707254 }, { "epoch": 3.253298153034301, "grad_norm": 0.31512215306855007, "learning_rate": 0.00019313971824290723, "loss": 2.9585373401641846, "step": 5550, "token_acc": 0.30412078526188535 }, { "epoch": 3.25388449135151, "grad_norm": 0.2644606780569012, "learning_rate": 0.00019313618984248136, "loss": 2.952260971069336, "step": 5551, "token_acc": 0.30490610742329005 }, { "epoch": 3.254470829668719, "grad_norm": 0.2964840750623536, "learning_rate": 0.00019313266056716647, "loss": 2.997602939605713, "step": 5552, "token_acc": 0.2986626734592716 }, { "epoch": 3.2550571679859277, "grad_norm": 0.2826460479050315, "learning_rate": 0.00019312913041699575, "loss": 2.959132671356201, "step": 5553, "token_acc": 0.30406453928313276 }, { "epoch": 3.255643506303137, "grad_norm": 0.27683518655715794, "learning_rate": 0.00019312559939200236, "loss": 2.9416816234588623, "step": 5554, "token_acc": 0.30738426146784126 }, { "epoch": 3.256229844620346, "grad_norm": 0.2540551054228169, "learning_rate": 0.00019312206749221944, "loss": 2.924468517303467, "step": 5555, "token_acc": 0.3078947368421053 }, { "epoch": 3.256816182937555, "grad_norm": 0.31725721408448015, "learning_rate": 0.00019311853471768017, "loss": 2.9105281829833984, "step": 5556, "token_acc": 0.31106961084735063 }, { "epoch": 3.257402521254764, "grad_norm": 0.2889341434441898, "learning_rate": 0.00019311500106841773, "loss": 2.9713759422302246, "step": 5557, "token_acc": 0.30036918559479603 }, { "epoch": 3.257988859571973, "grad_norm": 0.29955933575191535, "learning_rate": 0.00019311146654446537, "loss": 2.941173791885376, "step": 5558, "token_acc": 0.30646073132674523 }, { "epoch": 3.258575197889182, "grad_norm": 0.32307366990645614, "learning_rate": 0.0001931079311458562, "loss": 2.998309373855591, "step": 5559, "token_acc": 0.2973397650449419 }, { "epoch": 3.259161536206391, "grad_norm": 0.2685595619034012, "learning_rate": 0.00019310439487262352, "loss": 2.9513182640075684, "step": 5560, "token_acc": 0.304503698244321 }, { "epoch": 3.2597478745236, "grad_norm": 0.32304434803990695, "learning_rate": 0.0001931008577248005, "loss": 2.977510929107666, "step": 5561, "token_acc": 0.3002807304160807 }, { "epoch": 3.2603342128408093, "grad_norm": 0.3582189781919564, "learning_rate": 0.0001930973197024204, "loss": 2.9453773498535156, "step": 5562, "token_acc": 0.3076506685681867 }, { "epoch": 3.2609205511580184, "grad_norm": 0.2791830581366631, "learning_rate": 0.00019309378080551638, "loss": 2.9175667762756348, "step": 5563, "token_acc": 0.30815402068878583 }, { "epoch": 3.261506889475227, "grad_norm": 0.3591353990952763, "learning_rate": 0.00019309024103412176, "loss": 2.954418659210205, "step": 5564, "token_acc": 0.30361485627066903 }, { "epoch": 3.262093227792436, "grad_norm": 0.327223338948454, "learning_rate": 0.0001930867003882698, "loss": 2.945157766342163, "step": 5565, "token_acc": 0.3074716964918688 }, { "epoch": 3.2626795661096453, "grad_norm": 0.315860252281925, "learning_rate": 0.0001930831588679937, "loss": 2.9620490074157715, "step": 5566, "token_acc": 0.3021040157585365 }, { "epoch": 3.2632659044268544, "grad_norm": 0.3552765522701381, "learning_rate": 0.00019307961647332673, "loss": 2.9642271995544434, "step": 5567, "token_acc": 0.3040826684641185 }, { "epoch": 3.2638522427440635, "grad_norm": 0.2906476224767005, "learning_rate": 0.00019307607320430222, "loss": 2.954462766647339, "step": 5568, "token_acc": 0.30464290095823293 }, { "epoch": 3.264438581061272, "grad_norm": 0.34450279972302605, "learning_rate": 0.0001930725290609534, "loss": 2.8958821296691895, "step": 5569, "token_acc": 0.3142881672426005 }, { "epoch": 3.2650249193784813, "grad_norm": 0.28448146226764515, "learning_rate": 0.0001930689840433136, "loss": 2.952406644821167, "step": 5570, "token_acc": 0.3042650046631563 }, { "epoch": 3.2656112576956904, "grad_norm": 0.36471392926214036, "learning_rate": 0.00019306543815141608, "loss": 2.965480089187622, "step": 5571, "token_acc": 0.3038153815512583 }, { "epoch": 3.2661975960128995, "grad_norm": 0.2928851335776934, "learning_rate": 0.00019306189138529423, "loss": 2.9288558959960938, "step": 5572, "token_acc": 0.30830668039970366 }, { "epoch": 3.2667839343301086, "grad_norm": 0.34877336445248747, "learning_rate": 0.00019305834374498128, "loss": 2.930997133255005, "step": 5573, "token_acc": 0.3078150489396411 }, { "epoch": 3.2673702726473177, "grad_norm": 0.2689034327724174, "learning_rate": 0.00019305479523051058, "loss": 2.950197458267212, "step": 5574, "token_acc": 0.3068243674085134 }, { "epoch": 3.2679566109645264, "grad_norm": 0.3272056172117976, "learning_rate": 0.0001930512458419155, "loss": 2.9441757202148438, "step": 5575, "token_acc": 0.30482656306307493 }, { "epoch": 3.2685429492817355, "grad_norm": 0.26113695234790235, "learning_rate": 0.00019304769557922932, "loss": 2.916006088256836, "step": 5576, "token_acc": 0.30992925325117365 }, { "epoch": 3.2691292875989446, "grad_norm": 0.3149577947348922, "learning_rate": 0.00019304414444248544, "loss": 2.9441823959350586, "step": 5577, "token_acc": 0.30545243415380824 }, { "epoch": 3.2697156259161537, "grad_norm": 0.28637643170327226, "learning_rate": 0.00019304059243171722, "loss": 3.007596492767334, "step": 5578, "token_acc": 0.29620854954026654 }, { "epoch": 3.270301964233363, "grad_norm": 0.26873875197723673, "learning_rate": 0.00019303703954695798, "loss": 2.925732135772705, "step": 5579, "token_acc": 0.3097641609963769 }, { "epoch": 3.2708883025505715, "grad_norm": 0.27131174680061176, "learning_rate": 0.00019303348578824113, "loss": 2.9266598224639893, "step": 5580, "token_acc": 0.3092149611778994 }, { "epoch": 3.2714746408677806, "grad_norm": 0.27173814004788016, "learning_rate": 0.00019302993115560005, "loss": 2.9532389640808105, "step": 5581, "token_acc": 0.3041657917513841 }, { "epoch": 3.2720609791849897, "grad_norm": 0.2535122313665315, "learning_rate": 0.00019302637564906814, "loss": 2.950535535812378, "step": 5582, "token_acc": 0.30556480490141336 }, { "epoch": 3.272647317502199, "grad_norm": 0.30450078721574747, "learning_rate": 0.00019302281926867875, "loss": 2.9460368156433105, "step": 5583, "token_acc": 0.3038447535881119 }, { "epoch": 3.273233655819408, "grad_norm": 0.27252593105743816, "learning_rate": 0.00019301926201446533, "loss": 2.964747905731201, "step": 5584, "token_acc": 0.30350385477974706 }, { "epoch": 3.273819994136617, "grad_norm": 0.27433829771297846, "learning_rate": 0.00019301570388646132, "loss": 2.9628806114196777, "step": 5585, "token_acc": 0.3040403093627057 }, { "epoch": 3.2744063324538257, "grad_norm": 0.34769133592402734, "learning_rate": 0.00019301214488470008, "loss": 3.0120959281921387, "step": 5586, "token_acc": 0.29608672798948754 }, { "epoch": 3.274992670771035, "grad_norm": 0.2858622620201363, "learning_rate": 0.0001930085850092151, "loss": 2.975733995437622, "step": 5587, "token_acc": 0.3004513742099638 }, { "epoch": 3.275579009088244, "grad_norm": 0.3226812869987855, "learning_rate": 0.00019300502426003978, "loss": 2.961763858795166, "step": 5588, "token_acc": 0.3029758664975722 }, { "epoch": 3.276165347405453, "grad_norm": 0.2349015610205357, "learning_rate": 0.0001930014626372076, "loss": 2.920170307159424, "step": 5589, "token_acc": 0.31051243294508163 }, { "epoch": 3.2767516857226617, "grad_norm": 0.32615475800750326, "learning_rate": 0.00019299790014075193, "loss": 2.913259983062744, "step": 5590, "token_acc": 0.309280436936712 }, { "epoch": 3.277338024039871, "grad_norm": 0.3181641601521066, "learning_rate": 0.00019299433677070636, "loss": 2.9367003440856934, "step": 5591, "token_acc": 0.3072254128067674 }, { "epoch": 3.27792436235708, "grad_norm": 0.2684538628402062, "learning_rate": 0.00019299077252710433, "loss": 2.9809985160827637, "step": 5592, "token_acc": 0.3012360351794628 }, { "epoch": 3.278510700674289, "grad_norm": 0.31631293400830973, "learning_rate": 0.00019298720740997926, "loss": 2.934690475463867, "step": 5593, "token_acc": 0.3066995180425111 }, { "epoch": 3.279097038991498, "grad_norm": 0.28334465338371556, "learning_rate": 0.0001929836414193647, "loss": 2.9570372104644775, "step": 5594, "token_acc": 0.3021197168782687 }, { "epoch": 3.2796833773087073, "grad_norm": 0.3073094621139582, "learning_rate": 0.00019298007455529413, "loss": 2.981383800506592, "step": 5595, "token_acc": 0.2994228275473159 }, { "epoch": 3.280269715625916, "grad_norm": 0.2673146351515476, "learning_rate": 0.00019297650681780103, "loss": 2.9781689643859863, "step": 5596, "token_acc": 0.3017087253436502 }, { "epoch": 3.280856053943125, "grad_norm": 0.32240127724091405, "learning_rate": 0.00019297293820691894, "loss": 2.968599557876587, "step": 5597, "token_acc": 0.3031788002575551 }, { "epoch": 3.281442392260334, "grad_norm": 0.27184758006611964, "learning_rate": 0.0001929693687226814, "loss": 2.9974708557128906, "step": 5598, "token_acc": 0.29767199760563195 }, { "epoch": 3.2820287305775433, "grad_norm": 0.30325717457172857, "learning_rate": 0.0001929657983651219, "loss": 2.932379722595215, "step": 5599, "token_acc": 0.30830088664113253 }, { "epoch": 3.2826150688947524, "grad_norm": 0.26450258926836745, "learning_rate": 0.000192962227134274, "loss": 2.9293246269226074, "step": 5600, "token_acc": 0.30824227780352365 }, { "epoch": 3.283201407211961, "grad_norm": 0.253994119682815, "learning_rate": 0.00019295865503017124, "loss": 2.9782087802886963, "step": 5601, "token_acc": 0.2998636960295485 }, { "epoch": 3.28378774552917, "grad_norm": 0.26392188798613786, "learning_rate": 0.00019295508205284718, "loss": 2.9324090480804443, "step": 5602, "token_acc": 0.3082858484130916 }, { "epoch": 3.2843740838463793, "grad_norm": 0.29120151706337566, "learning_rate": 0.00019295150820233537, "loss": 2.9344115257263184, "step": 5603, "token_acc": 0.3076634395735886 }, { "epoch": 3.2849604221635884, "grad_norm": 0.265867571543838, "learning_rate": 0.00019294793347866942, "loss": 2.958077907562256, "step": 5604, "token_acc": 0.30318789577703387 }, { "epoch": 3.2855467604807975, "grad_norm": 0.2903039371646153, "learning_rate": 0.0001929443578818829, "loss": 2.9747567176818848, "step": 5605, "token_acc": 0.30161072827852753 }, { "epoch": 3.2861330987980066, "grad_norm": 0.27459667584453373, "learning_rate": 0.00019294078141200935, "loss": 2.934018611907959, "step": 5606, "token_acc": 0.3075634179436999 }, { "epoch": 3.2867194371152153, "grad_norm": 0.2827367549242991, "learning_rate": 0.0001929372040690824, "loss": 2.91848087310791, "step": 5607, "token_acc": 0.30907608062449027 }, { "epoch": 3.2873057754324244, "grad_norm": 0.24622937538907255, "learning_rate": 0.00019293362585313565, "loss": 2.968585729598999, "step": 5608, "token_acc": 0.30346410948504915 }, { "epoch": 3.2878921137496335, "grad_norm": 0.30342375238963964, "learning_rate": 0.0001929300467642027, "loss": 2.969679355621338, "step": 5609, "token_acc": 0.30328407348954517 }, { "epoch": 3.2884784520668426, "grad_norm": 0.2921242190379111, "learning_rate": 0.00019292646680231723, "loss": 2.961977481842041, "step": 5610, "token_acc": 0.3045719373330626 }, { "epoch": 3.2890647903840518, "grad_norm": 0.22171135317389842, "learning_rate": 0.0001929228859675128, "loss": 2.9748570919036865, "step": 5611, "token_acc": 0.300853377828259 }, { "epoch": 3.2896511287012604, "grad_norm": 0.3267266266540225, "learning_rate": 0.00019291930425982307, "loss": 2.978346824645996, "step": 5612, "token_acc": 0.3013138367176197 }, { "epoch": 3.2902374670184695, "grad_norm": 0.35054062230463917, "learning_rate": 0.0001929157216792817, "loss": 2.972545623779297, "step": 5613, "token_acc": 0.30031920130856654 }, { "epoch": 3.2908238053356786, "grad_norm": 0.3098351010504053, "learning_rate": 0.00019291213822592232, "loss": 2.961137056350708, "step": 5614, "token_acc": 0.30347959381196277 }, { "epoch": 3.2914101436528878, "grad_norm": 0.26935602775795514, "learning_rate": 0.00019290855389977857, "loss": 2.9546191692352295, "step": 5615, "token_acc": 0.30577952418076315 }, { "epoch": 3.291996481970097, "grad_norm": 0.30172629714914667, "learning_rate": 0.0001929049687008842, "loss": 2.960115909576416, "step": 5616, "token_acc": 0.3040097262285351 }, { "epoch": 3.292582820287306, "grad_norm": 0.3059663164886852, "learning_rate": 0.00019290138262927282, "loss": 2.955659866333008, "step": 5617, "token_acc": 0.3050338389361736 }, { "epoch": 3.2931691586045146, "grad_norm": 0.29407980002025524, "learning_rate": 0.00019289779568497814, "loss": 2.986624002456665, "step": 5618, "token_acc": 0.2984275965809823 }, { "epoch": 3.2937554969217238, "grad_norm": 0.2993730763091169, "learning_rate": 0.00019289420786803386, "loss": 2.954629421234131, "step": 5619, "token_acc": 0.30345018811576435 }, { "epoch": 3.294341835238933, "grad_norm": 0.2617401469708947, "learning_rate": 0.00019289061917847366, "loss": 2.964477062225342, "step": 5620, "token_acc": 0.30397145713545165 }, { "epoch": 3.294928173556142, "grad_norm": 0.2790695180521545, "learning_rate": 0.00019288702961633126, "loss": 2.9307661056518555, "step": 5621, "token_acc": 0.3081485368024156 }, { "epoch": 3.295514511873351, "grad_norm": 0.24246693095933747, "learning_rate": 0.0001928834391816404, "loss": 2.954713821411133, "step": 5622, "token_acc": 0.3044471138321644 }, { "epoch": 3.2961008501905598, "grad_norm": 0.2835213509827598, "learning_rate": 0.00019287984787443477, "loss": 2.9860241413116455, "step": 5623, "token_acc": 0.3005023322569071 }, { "epoch": 3.296687188507769, "grad_norm": 0.26517372014441176, "learning_rate": 0.00019287625569474815, "loss": 2.9198975563049316, "step": 5624, "token_acc": 0.3090585236520724 }, { "epoch": 3.297273526824978, "grad_norm": 0.2534006383234535, "learning_rate": 0.00019287266264261425, "loss": 2.938803195953369, "step": 5625, "token_acc": 0.30819750066102414 }, { "epoch": 3.297859865142187, "grad_norm": 0.2787641537503187, "learning_rate": 0.00019286906871806685, "loss": 2.956223964691162, "step": 5626, "token_acc": 0.30441546164713706 }, { "epoch": 3.298446203459396, "grad_norm": 0.3168997539661053, "learning_rate": 0.00019286547392113965, "loss": 2.942523956298828, "step": 5627, "token_acc": 0.30676759384965113 }, { "epoch": 3.2990325417766053, "grad_norm": 0.32357189083828575, "learning_rate": 0.00019286187825186648, "loss": 2.9433374404907227, "step": 5628, "token_acc": 0.3071390098885641 }, { "epoch": 3.299618880093814, "grad_norm": 0.2944423668480439, "learning_rate": 0.0001928582817102811, "loss": 2.943603992462158, "step": 5629, "token_acc": 0.30697099270076894 }, { "epoch": 3.300205218411023, "grad_norm": 0.26484351491760716, "learning_rate": 0.0001928546842964173, "loss": 2.9256739616394043, "step": 5630, "token_acc": 0.3088785251118085 }, { "epoch": 3.300791556728232, "grad_norm": 0.2798517635077239, "learning_rate": 0.00019285108601030886, "loss": 2.976163864135742, "step": 5631, "token_acc": 0.3009628171162906 }, { "epoch": 3.3013778950454413, "grad_norm": 0.30906028537764757, "learning_rate": 0.00019284748685198958, "loss": 2.9555845260620117, "step": 5632, "token_acc": 0.3032182009856617 }, { "epoch": 3.3019642333626504, "grad_norm": 0.3416426534833998, "learning_rate": 0.00019284388682149327, "loss": 2.9616713523864746, "step": 5633, "token_acc": 0.3050974355752416 }, { "epoch": 3.302550571679859, "grad_norm": 0.26173136486004844, "learning_rate": 0.00019284028591885378, "loss": 2.919099807739258, "step": 5634, "token_acc": 0.3094568356738325 }, { "epoch": 3.303136909997068, "grad_norm": 0.32226516550980766, "learning_rate": 0.00019283668414410486, "loss": 2.9393768310546875, "step": 5635, "token_acc": 0.3071798452318613 }, { "epoch": 3.3037232483142773, "grad_norm": 0.3119222455225087, "learning_rate": 0.00019283308149728044, "loss": 2.9706835746765137, "step": 5636, "token_acc": 0.30226297570682364 }, { "epoch": 3.3043095866314864, "grad_norm": 0.2342311201566956, "learning_rate": 0.00019282947797841427, "loss": 2.9796180725097656, "step": 5637, "token_acc": 0.30093624609897457 }, { "epoch": 3.3048959249486956, "grad_norm": 0.3048411019843206, "learning_rate": 0.0001928258735875403, "loss": 2.979363441467285, "step": 5638, "token_acc": 0.30102966958518435 }, { "epoch": 3.3054822632659047, "grad_norm": 0.26769549451694696, "learning_rate": 0.0001928222683246923, "loss": 2.972560405731201, "step": 5639, "token_acc": 0.3018837698507533 }, { "epoch": 3.3060686015831133, "grad_norm": 0.30440198034641774, "learning_rate": 0.00019281866218990413, "loss": 2.968074321746826, "step": 5640, "token_acc": 0.3025897178150616 }, { "epoch": 3.3066549399003224, "grad_norm": 0.27054162135329285, "learning_rate": 0.00019281505518320974, "loss": 2.946080207824707, "step": 5641, "token_acc": 0.3049476529505286 }, { "epoch": 3.3072412782175316, "grad_norm": 0.24653796185609123, "learning_rate": 0.000192811447304643, "loss": 2.9238927364349365, "step": 5642, "token_acc": 0.3092461682498606 }, { "epoch": 3.3078276165347407, "grad_norm": 0.31189243648968873, "learning_rate": 0.00019280783855423774, "loss": 2.977013111114502, "step": 5643, "token_acc": 0.30164238902597573 }, { "epoch": 3.3084139548519493, "grad_norm": 0.25019451087710404, "learning_rate": 0.0001928042289320279, "loss": 2.986665964126587, "step": 5644, "token_acc": 0.29997862434634814 }, { "epoch": 3.3090002931691584, "grad_norm": 0.3206138942759848, "learning_rate": 0.0001928006184380474, "loss": 2.9827170372009277, "step": 5645, "token_acc": 0.30077514574341413 }, { "epoch": 3.3095866314863676, "grad_norm": 0.28398298834414215, "learning_rate": 0.00019279700707233014, "loss": 2.94273042678833, "step": 5646, "token_acc": 0.30576990831618067 }, { "epoch": 3.3101729698035767, "grad_norm": 0.27591748504945957, "learning_rate": 0.00019279339483491004, "loss": 2.9414987564086914, "step": 5647, "token_acc": 0.3056171627293948 }, { "epoch": 3.310759308120786, "grad_norm": 0.3155387188990721, "learning_rate": 0.00019278978172582102, "loss": 2.923828601837158, "step": 5648, "token_acc": 0.3106152832380272 }, { "epoch": 3.311345646437995, "grad_norm": 0.24464084938338984, "learning_rate": 0.00019278616774509705, "loss": 2.982725143432617, "step": 5649, "token_acc": 0.30034968533451006 }, { "epoch": 3.3119319847552036, "grad_norm": 0.297217869247182, "learning_rate": 0.00019278255289277208, "loss": 2.96604585647583, "step": 5650, "token_acc": 0.3025845777246981 }, { "epoch": 3.3125183230724127, "grad_norm": 0.3132693702015149, "learning_rate": 0.00019277893716888005, "loss": 2.9986701011657715, "step": 5651, "token_acc": 0.2988245872877655 }, { "epoch": 3.313104661389622, "grad_norm": 0.29853606790193493, "learning_rate": 0.00019277532057345492, "loss": 2.9537243843078613, "step": 5652, "token_acc": 0.304517963242967 }, { "epoch": 3.313690999706831, "grad_norm": 0.2529087482548198, "learning_rate": 0.00019277170310653063, "loss": 2.950713634490967, "step": 5653, "token_acc": 0.3051230624144939 }, { "epoch": 3.31427733802404, "grad_norm": 0.27861550005316726, "learning_rate": 0.00019276808476814125, "loss": 2.945302963256836, "step": 5654, "token_acc": 0.3042957610168398 }, { "epoch": 3.3148636763412487, "grad_norm": 0.28419898766177615, "learning_rate": 0.00019276446555832068, "loss": 2.987307071685791, "step": 5655, "token_acc": 0.2997105168686938 }, { "epoch": 3.315450014658458, "grad_norm": 0.2857214025897931, "learning_rate": 0.000192760845477103, "loss": 2.9232144355773926, "step": 5656, "token_acc": 0.3090301275625728 }, { "epoch": 3.316036352975667, "grad_norm": 0.3159537158519594, "learning_rate": 0.00019275722452452215, "loss": 2.994924783706665, "step": 5657, "token_acc": 0.29901491276854314 }, { "epoch": 3.316622691292876, "grad_norm": 0.30215029431133145, "learning_rate": 0.00019275360270061217, "loss": 2.9695663452148438, "step": 5658, "token_acc": 0.30233164273499274 }, { "epoch": 3.317209029610085, "grad_norm": 0.26074971198414304, "learning_rate": 0.0001927499800054071, "loss": 2.9536001682281494, "step": 5659, "token_acc": 0.3047785690729854 }, { "epoch": 3.3177953679272942, "grad_norm": 0.31576566812968443, "learning_rate": 0.00019274635643894093, "loss": 2.9794039726257324, "step": 5660, "token_acc": 0.3027099286318575 }, { "epoch": 3.318381706244503, "grad_norm": 0.2625850334677903, "learning_rate": 0.00019274273200124773, "loss": 2.9376702308654785, "step": 5661, "token_acc": 0.30692136837750145 }, { "epoch": 3.318968044561712, "grad_norm": 0.30368499839261853, "learning_rate": 0.00019273910669236153, "loss": 2.98230242729187, "step": 5662, "token_acc": 0.2988292109058776 }, { "epoch": 3.319554382878921, "grad_norm": 0.26906632448320994, "learning_rate": 0.00019273548051231638, "loss": 2.931710720062256, "step": 5663, "token_acc": 0.30789647410229215 }, { "epoch": 3.3201407211961302, "grad_norm": 0.30584429355847903, "learning_rate": 0.00019273185346114637, "loss": 2.9478423595428467, "step": 5664, "token_acc": 0.30599305419042 }, { "epoch": 3.3207270595133394, "grad_norm": 0.2770848654454532, "learning_rate": 0.00019272822553888553, "loss": 2.9587509632110596, "step": 5665, "token_acc": 0.30257670306812273 }, { "epoch": 3.321313397830548, "grad_norm": 0.2835828703361468, "learning_rate": 0.00019272459674556797, "loss": 2.9509706497192383, "step": 5666, "token_acc": 0.30482309143970876 }, { "epoch": 3.321899736147757, "grad_norm": 0.31926620541698986, "learning_rate": 0.00019272096708122777, "loss": 2.9831361770629883, "step": 5667, "token_acc": 0.30090741568735896 }, { "epoch": 3.3224860744649662, "grad_norm": 0.2715901724645933, "learning_rate": 0.00019271733654589905, "loss": 2.9557747840881348, "step": 5668, "token_acc": 0.30477269122487016 }, { "epoch": 3.3230724127821754, "grad_norm": 0.28682925621032807, "learning_rate": 0.0001927137051396159, "loss": 2.9165279865264893, "step": 5669, "token_acc": 0.30953384942198603 }, { "epoch": 3.3236587510993845, "grad_norm": 0.28960361202311696, "learning_rate": 0.0001927100728624124, "loss": 2.9681005477905273, "step": 5670, "token_acc": 0.30274776532487097 }, { "epoch": 3.3242450894165936, "grad_norm": 0.2774494656611944, "learning_rate": 0.0001927064397143227, "loss": 2.9485220909118652, "step": 5671, "token_acc": 0.3063155649991136 }, { "epoch": 3.3248314277338022, "grad_norm": 0.2772583363505963, "learning_rate": 0.0001927028056953809, "loss": 2.980656385421753, "step": 5672, "token_acc": 0.3017393848497708 }, { "epoch": 3.3254177660510114, "grad_norm": 0.2725767530220081, "learning_rate": 0.00019269917080562117, "loss": 2.9627394676208496, "step": 5673, "token_acc": 0.30403203254099403 }, { "epoch": 3.3260041043682205, "grad_norm": 0.28705112459275384, "learning_rate": 0.00019269553504507766, "loss": 2.9782228469848633, "step": 5674, "token_acc": 0.3018384561008148 }, { "epoch": 3.3265904426854296, "grad_norm": 0.3139674222373875, "learning_rate": 0.0001926918984137845, "loss": 2.983400344848633, "step": 5675, "token_acc": 0.30057985532015963 }, { "epoch": 3.3271767810026387, "grad_norm": 0.27784376111011244, "learning_rate": 0.00019268826091177585, "loss": 2.9108805656433105, "step": 5676, "token_acc": 0.3104204884716377 }, { "epoch": 3.3277631193198474, "grad_norm": 0.2770864500206743, "learning_rate": 0.00019268462253908592, "loss": 2.9188966751098633, "step": 5677, "token_acc": 0.3093889547481979 }, { "epoch": 3.3283494576370565, "grad_norm": 0.24885516142564773, "learning_rate": 0.00019268098329574878, "loss": 2.9553380012512207, "step": 5678, "token_acc": 0.30414661284816774 }, { "epoch": 3.3289357959542656, "grad_norm": 0.28378008337627975, "learning_rate": 0.00019267734318179877, "loss": 2.9638848304748535, "step": 5679, "token_acc": 0.3019457738587691 }, { "epoch": 3.3295221342714747, "grad_norm": 0.23798786202981287, "learning_rate": 0.00019267370219726998, "loss": 2.9493443965911865, "step": 5680, "token_acc": 0.3050334735891098 }, { "epoch": 3.330108472588684, "grad_norm": 0.27419168193009863, "learning_rate": 0.00019267006034219664, "loss": 2.965242624282837, "step": 5681, "token_acc": 0.3017251243004847 }, { "epoch": 3.330694810905893, "grad_norm": 0.27020308407268856, "learning_rate": 0.00019266641761661295, "loss": 2.968632221221924, "step": 5682, "token_acc": 0.3023887490052315 }, { "epoch": 3.3312811492231016, "grad_norm": 0.32882436917410596, "learning_rate": 0.00019266277402055313, "loss": 2.984539270401001, "step": 5683, "token_acc": 0.30110208429622565 }, { "epoch": 3.3318674875403107, "grad_norm": 0.3651465503270693, "learning_rate": 0.0001926591295540514, "loss": 2.952151298522949, "step": 5684, "token_acc": 0.3050717359936002 }, { "epoch": 3.33245382585752, "grad_norm": 0.3970605265907252, "learning_rate": 0.00019265548421714207, "loss": 2.983992099761963, "step": 5685, "token_acc": 0.30074702116516544 }, { "epoch": 3.333040164174729, "grad_norm": 0.29377583005913965, "learning_rate": 0.00019265183800985924, "loss": 2.9404726028442383, "step": 5686, "token_acc": 0.3084859199721697 }, { "epoch": 3.333626502491938, "grad_norm": 0.3917996169424354, "learning_rate": 0.0001926481909322373, "loss": 2.949791431427002, "step": 5687, "token_acc": 0.30525483663026204 }, { "epoch": 3.3342128408091467, "grad_norm": 0.38980390150261435, "learning_rate": 0.00019264454298431044, "loss": 2.9654550552368164, "step": 5688, "token_acc": 0.3034991380354423 }, { "epoch": 3.334799179126356, "grad_norm": 0.35401578722629806, "learning_rate": 0.0001926408941661129, "loss": 2.923727035522461, "step": 5689, "token_acc": 0.30827883769028475 }, { "epoch": 3.335385517443565, "grad_norm": 0.2937316671555973, "learning_rate": 0.00019263724447767905, "loss": 2.9253411293029785, "step": 5690, "token_acc": 0.30914679291322983 }, { "epoch": 3.335971855760774, "grad_norm": 0.3297657933079299, "learning_rate": 0.00019263359391904307, "loss": 2.914416551589966, "step": 5691, "token_acc": 0.31069873256506647 }, { "epoch": 3.336558194077983, "grad_norm": 0.29909100307888226, "learning_rate": 0.00019262994249023932, "loss": 2.9227499961853027, "step": 5692, "token_acc": 0.30859070643234354 }, { "epoch": 3.3371445323951923, "grad_norm": 0.2644057742779, "learning_rate": 0.0001926262901913021, "loss": 2.9637508392333984, "step": 5693, "token_acc": 0.30323828556009436 }, { "epoch": 3.337730870712401, "grad_norm": 0.2732700910977264, "learning_rate": 0.00019262263702226568, "loss": 2.969503402709961, "step": 5694, "token_acc": 0.30421621912500674 }, { "epoch": 3.33831720902961, "grad_norm": 0.27387498548337913, "learning_rate": 0.00019261898298316438, "loss": 2.981738328933716, "step": 5695, "token_acc": 0.3004588418109446 }, { "epoch": 3.338903547346819, "grad_norm": 0.27257578857082854, "learning_rate": 0.0001926153280740326, "loss": 3.010274887084961, "step": 5696, "token_acc": 0.29597389029881505 }, { "epoch": 3.3394898856640283, "grad_norm": 0.3091736036529012, "learning_rate": 0.00019261167229490456, "loss": 2.9579851627349854, "step": 5697, "token_acc": 0.30173400090268815 }, { "epoch": 3.340076223981237, "grad_norm": 0.27801056606265245, "learning_rate": 0.00019260801564581468, "loss": 2.948822021484375, "step": 5698, "token_acc": 0.304778498039919 }, { "epoch": 3.340662562298446, "grad_norm": 0.27984809975899383, "learning_rate": 0.00019260435812679723, "loss": 2.938413143157959, "step": 5699, "token_acc": 0.3070150335037526 }, { "epoch": 3.341248900615655, "grad_norm": 0.29096510335926856, "learning_rate": 0.00019260069973788669, "loss": 2.92311429977417, "step": 5700, "token_acc": 0.3099208204995888 }, { "epoch": 3.3418352389328643, "grad_norm": 0.2714718464622634, "learning_rate": 0.00019259704047911732, "loss": 2.9839437007904053, "step": 5701, "token_acc": 0.3000583029941827 }, { "epoch": 3.3424215772500734, "grad_norm": 0.29123218699299125, "learning_rate": 0.00019259338035052356, "loss": 2.973917007446289, "step": 5702, "token_acc": 0.30225141016178475 }, { "epoch": 3.3430079155672825, "grad_norm": 0.2787897996735095, "learning_rate": 0.0001925897193521397, "loss": 2.9662468433380127, "step": 5703, "token_acc": 0.30249452999483495 }, { "epoch": 3.343594253884491, "grad_norm": 0.31066528908859786, "learning_rate": 0.00019258605748400024, "loss": 2.9831621646881104, "step": 5704, "token_acc": 0.3000120086253257 }, { "epoch": 3.3441805922017003, "grad_norm": 0.24338094232582197, "learning_rate": 0.00019258239474613954, "loss": 2.9529409408569336, "step": 5705, "token_acc": 0.30383015884003645 }, { "epoch": 3.3447669305189094, "grad_norm": 0.2744231406061515, "learning_rate": 0.000192578731138592, "loss": 2.990333080291748, "step": 5706, "token_acc": 0.30015577665228926 }, { "epoch": 3.3453532688361185, "grad_norm": 0.2825590446945792, "learning_rate": 0.000192575066661392, "loss": 2.934597969055176, "step": 5707, "token_acc": 0.3078815262335871 }, { "epoch": 3.3459396071533276, "grad_norm": 0.30014556387197455, "learning_rate": 0.00019257140131457402, "loss": 2.9739320278167725, "step": 5708, "token_acc": 0.30023303112109817 }, { "epoch": 3.3465259454705363, "grad_norm": 0.2867699552732021, "learning_rate": 0.00019256773509817245, "loss": 2.948106288909912, "step": 5709, "token_acc": 0.3062908207049296 }, { "epoch": 3.3471122837877454, "grad_norm": 0.25611484686755054, "learning_rate": 0.00019256406801222177, "loss": 2.9990603923797607, "step": 5710, "token_acc": 0.2977181293769557 }, { "epoch": 3.3476986221049545, "grad_norm": 0.2963032919996877, "learning_rate": 0.00019256040005675637, "loss": 2.931915760040283, "step": 5711, "token_acc": 0.307949142652296 }, { "epoch": 3.3482849604221636, "grad_norm": 0.2656891019499225, "learning_rate": 0.00019255673123181078, "loss": 2.9224300384521484, "step": 5712, "token_acc": 0.3095964314380831 }, { "epoch": 3.3488712987393727, "grad_norm": 0.27327014798662047, "learning_rate": 0.00019255306153741938, "loss": 3.0212671756744385, "step": 5713, "token_acc": 0.2975475351883862 }, { "epoch": 3.349457637056582, "grad_norm": 0.28866700521472455, "learning_rate": 0.0001925493909736167, "loss": 2.9290785789489746, "step": 5714, "token_acc": 0.3085989534753317 }, { "epoch": 3.3500439753737905, "grad_norm": 0.2798457855291285, "learning_rate": 0.0001925457195404372, "loss": 2.998946189880371, "step": 5715, "token_acc": 0.2973970709002773 }, { "epoch": 3.3506303136909996, "grad_norm": 0.2555360480075001, "learning_rate": 0.0001925420472379154, "loss": 2.9664478302001953, "step": 5716, "token_acc": 0.3030438758561484 }, { "epoch": 3.3512166520082087, "grad_norm": 0.32201260277578936, "learning_rate": 0.00019253837406608572, "loss": 2.9732651710510254, "step": 5717, "token_acc": 0.3024322132907802 }, { "epoch": 3.351802990325418, "grad_norm": 0.3750234247351803, "learning_rate": 0.00019253470002498276, "loss": 2.9653706550598145, "step": 5718, "token_acc": 0.30171153081872426 }, { "epoch": 3.352389328642627, "grad_norm": 0.3045042616259551, "learning_rate": 0.00019253102511464096, "loss": 2.923147678375244, "step": 5719, "token_acc": 0.31095209885781394 }, { "epoch": 3.3529756669598356, "grad_norm": 0.31503768892191847, "learning_rate": 0.00019252734933509485, "loss": 2.9490671157836914, "step": 5720, "token_acc": 0.30544086299164463 }, { "epoch": 3.3535620052770447, "grad_norm": 0.417740447715003, "learning_rate": 0.000192523672686379, "loss": 2.97928524017334, "step": 5721, "token_acc": 0.3006874522237779 }, { "epoch": 3.354148343594254, "grad_norm": 0.4136862674080124, "learning_rate": 0.00019251999516852792, "loss": 2.99717378616333, "step": 5722, "token_acc": 0.2997853822676143 }, { "epoch": 3.354734681911463, "grad_norm": 0.3033397916968096, "learning_rate": 0.00019251631678157612, "loss": 2.9185729026794434, "step": 5723, "token_acc": 0.30994757999842154 }, { "epoch": 3.355321020228672, "grad_norm": 0.4411999003332541, "learning_rate": 0.00019251263752555824, "loss": 2.9884591102600098, "step": 5724, "token_acc": 0.30110112184884835 }, { "epoch": 3.355907358545881, "grad_norm": 0.34017388741115556, "learning_rate": 0.0001925089574005088, "loss": 2.950345993041992, "step": 5725, "token_acc": 0.3040848562138021 }, { "epoch": 3.35649369686309, "grad_norm": 0.3504643876493145, "learning_rate": 0.00019250527640646232, "loss": 2.9666855335235596, "step": 5726, "token_acc": 0.3017126753855008 }, { "epoch": 3.357080035180299, "grad_norm": 0.2962101506901891, "learning_rate": 0.00019250159454345346, "loss": 2.9271202087402344, "step": 5727, "token_acc": 0.30883555564420423 }, { "epoch": 3.357666373497508, "grad_norm": 0.3797009920046121, "learning_rate": 0.00019249791181151675, "loss": 3.001551628112793, "step": 5728, "token_acc": 0.29888918248709295 }, { "epoch": 3.358252711814717, "grad_norm": 0.2851303836353199, "learning_rate": 0.0001924942282106868, "loss": 2.9751412868499756, "step": 5729, "token_acc": 0.3013681532586585 }, { "epoch": 3.3588390501319263, "grad_norm": 0.33752683287973206, "learning_rate": 0.00019249054374099819, "loss": 2.960116386413574, "step": 5730, "token_acc": 0.30298590372129525 }, { "epoch": 3.359425388449135, "grad_norm": 0.28628521573140286, "learning_rate": 0.00019248685840248558, "loss": 2.959883213043213, "step": 5731, "token_acc": 0.30396260677313347 }, { "epoch": 3.360011726766344, "grad_norm": 0.2784896099867368, "learning_rate": 0.00019248317219518356, "loss": 2.9690403938293457, "step": 5732, "token_acc": 0.3037161590632962 }, { "epoch": 3.360598065083553, "grad_norm": 0.2910907024642491, "learning_rate": 0.00019247948511912677, "loss": 2.9849278926849365, "step": 5733, "token_acc": 0.301412987012987 }, { "epoch": 3.3611844034007623, "grad_norm": 0.28908645342418615, "learning_rate": 0.00019247579717434984, "loss": 2.9263014793395996, "step": 5734, "token_acc": 0.30818456446962866 }, { "epoch": 3.3617707417179714, "grad_norm": 0.2724714170784964, "learning_rate": 0.00019247210836088736, "loss": 2.9213266372680664, "step": 5735, "token_acc": 0.31018308640194525 }, { "epoch": 3.3623570800351805, "grad_norm": 0.3177989138583467, "learning_rate": 0.00019246841867877405, "loss": 2.96691632270813, "step": 5736, "token_acc": 0.3020921526690512 }, { "epoch": 3.362943418352389, "grad_norm": 0.24711114588005784, "learning_rate": 0.00019246472812804459, "loss": 2.9579567909240723, "step": 5737, "token_acc": 0.304283665972433 }, { "epoch": 3.3635297566695983, "grad_norm": 0.29805592520589924, "learning_rate": 0.00019246103670873357, "loss": 2.9577836990356445, "step": 5738, "token_acc": 0.3036515004027487 }, { "epoch": 3.3641160949868074, "grad_norm": 0.2952313865428316, "learning_rate": 0.0001924573444208757, "loss": 2.9634928703308105, "step": 5739, "token_acc": 0.3031465989548213 }, { "epoch": 3.3647024333040165, "grad_norm": 0.318628031202649, "learning_rate": 0.00019245365126450569, "loss": 2.961700916290283, "step": 5740, "token_acc": 0.3045488179982932 }, { "epoch": 3.3652887716212256, "grad_norm": 0.2624352806069112, "learning_rate": 0.00019244995723965817, "loss": 2.933213949203491, "step": 5741, "token_acc": 0.30751287108629566 }, { "epoch": 3.3658751099384343, "grad_norm": 0.3099060242222501, "learning_rate": 0.00019244626234636792, "loss": 2.9584293365478516, "step": 5742, "token_acc": 0.3024436744468745 }, { "epoch": 3.3664614482556434, "grad_norm": 0.28218955311983523, "learning_rate": 0.0001924425665846696, "loss": 2.940584182739258, "step": 5743, "token_acc": 0.30733539903992807 }, { "epoch": 3.3670477865728525, "grad_norm": 0.270669129816695, "learning_rate": 0.00019243886995459793, "loss": 2.911543846130371, "step": 5744, "token_acc": 0.3119433897134967 }, { "epoch": 3.3676341248900616, "grad_norm": 0.2838136507102207, "learning_rate": 0.00019243517245618765, "loss": 2.939208745956421, "step": 5745, "token_acc": 0.3045385939361843 }, { "epoch": 3.3682204632072708, "grad_norm": 0.2811213026772572, "learning_rate": 0.00019243147408947345, "loss": 2.954514980316162, "step": 5746, "token_acc": 0.303903032204919 }, { "epoch": 3.36880680152448, "grad_norm": 0.26376206148353515, "learning_rate": 0.00019242777485449012, "loss": 2.951934576034546, "step": 5747, "token_acc": 0.3051238011926733 }, { "epoch": 3.3693931398416885, "grad_norm": 0.2603075614460153, "learning_rate": 0.0001924240747512724, "loss": 2.971841335296631, "step": 5748, "token_acc": 0.30203551044604715 }, { "epoch": 3.3699794781588976, "grad_norm": 0.28425169389587956, "learning_rate": 0.00019242037377985508, "loss": 2.9374032020568848, "step": 5749, "token_acc": 0.30648413043928935 }, { "epoch": 3.3705658164761068, "grad_norm": 0.2975327074195619, "learning_rate": 0.00019241667194027281, "loss": 2.928086757659912, "step": 5750, "token_acc": 0.3076360087826299 }, { "epoch": 3.371152154793316, "grad_norm": 0.352366402367999, "learning_rate": 0.0001924129692325605, "loss": 2.9284329414367676, "step": 5751, "token_acc": 0.3086280717784176 }, { "epoch": 3.3717384931105245, "grad_norm": 0.2747248576501104, "learning_rate": 0.00019240926565675283, "loss": 2.9532876014709473, "step": 5752, "token_acc": 0.3050555873658247 }, { "epoch": 3.3723248314277336, "grad_norm": 0.2696115813869362, "learning_rate": 0.00019240556121288463, "loss": 2.990560531616211, "step": 5753, "token_acc": 0.2997854564729044 }, { "epoch": 3.3729111697449428, "grad_norm": 0.30870040806028043, "learning_rate": 0.00019240185590099076, "loss": 2.9389357566833496, "step": 5754, "token_acc": 0.30667883995147927 }, { "epoch": 3.373497508062152, "grad_norm": 0.24842278371776058, "learning_rate": 0.0001923981497211059, "loss": 2.94051194190979, "step": 5755, "token_acc": 0.3072416688460956 }, { "epoch": 3.374083846379361, "grad_norm": 0.31767891170471424, "learning_rate": 0.000192394442673265, "loss": 2.963165283203125, "step": 5756, "token_acc": 0.30041329162437075 }, { "epoch": 3.37467018469657, "grad_norm": 0.2904844746443188, "learning_rate": 0.00019239073475750274, "loss": 2.949331760406494, "step": 5757, "token_acc": 0.3057805075320999 }, { "epoch": 3.3752565230137788, "grad_norm": 0.2796993327438782, "learning_rate": 0.00019238702597385406, "loss": 2.9786510467529297, "step": 5758, "token_acc": 0.30180215383172604 }, { "epoch": 3.375842861330988, "grad_norm": 0.3004838477890508, "learning_rate": 0.00019238331632235375, "loss": 2.983963966369629, "step": 5759, "token_acc": 0.299472132932127 }, { "epoch": 3.376429199648197, "grad_norm": 0.32721998720120615, "learning_rate": 0.0001923796058030367, "loss": 2.961064338684082, "step": 5760, "token_acc": 0.30374782257721883 }, { "epoch": 3.377015537965406, "grad_norm": 0.27795233031942596, "learning_rate": 0.00019237589441593772, "loss": 2.971208333969116, "step": 5761, "token_acc": 0.3019162363740677 }, { "epoch": 3.377601876282615, "grad_norm": 0.2749830910531999, "learning_rate": 0.0001923721821610917, "loss": 2.9554057121276855, "step": 5762, "token_acc": 0.30349082477712713 }, { "epoch": 3.378188214599824, "grad_norm": 0.3333446360762304, "learning_rate": 0.0001923684690385335, "loss": 2.9824366569519043, "step": 5763, "token_acc": 0.30133124593406707 }, { "epoch": 3.378774552917033, "grad_norm": 0.34396770282222777, "learning_rate": 0.00019236475504829796, "loss": 2.964900016784668, "step": 5764, "token_acc": 0.3036453514499458 }, { "epoch": 3.379360891234242, "grad_norm": 0.2975301875118032, "learning_rate": 0.00019236104019042008, "loss": 2.9454212188720703, "step": 5765, "token_acc": 0.3052345129024124 }, { "epoch": 3.379947229551451, "grad_norm": 0.3437034142128457, "learning_rate": 0.00019235732446493464, "loss": 2.9291210174560547, "step": 5766, "token_acc": 0.3094125206630842 }, { "epoch": 3.3805335678686603, "grad_norm": 0.322747865861446, "learning_rate": 0.00019235360787187657, "loss": 2.9577784538269043, "step": 5767, "token_acc": 0.3048610260153332 }, { "epoch": 3.3811199061858694, "grad_norm": 0.2881548319795034, "learning_rate": 0.00019234989041128084, "loss": 2.9466099739074707, "step": 5768, "token_acc": 0.3054564643799472 }, { "epoch": 3.381706244503078, "grad_norm": 0.304900992340823, "learning_rate": 0.00019234617208318232, "loss": 2.944830894470215, "step": 5769, "token_acc": 0.30645207338944735 }, { "epoch": 3.382292582820287, "grad_norm": 0.2804240672635995, "learning_rate": 0.00019234245288761597, "loss": 2.94958758354187, "step": 5770, "token_acc": 0.3045614820049878 }, { "epoch": 3.3828789211374963, "grad_norm": 0.26921335215013226, "learning_rate": 0.00019233873282461668, "loss": 2.957679271697998, "step": 5771, "token_acc": 0.3034449476949062 }, { "epoch": 3.3834652594547054, "grad_norm": 0.2518484328120801, "learning_rate": 0.00019233501189421946, "loss": 2.9837541580200195, "step": 5772, "token_acc": 0.30064528744622604 }, { "epoch": 3.3840515977719146, "grad_norm": 0.2548911441272588, "learning_rate": 0.00019233129009645918, "loss": 2.93544340133667, "step": 5773, "token_acc": 0.30752351433958414 }, { "epoch": 3.3846379360891232, "grad_norm": 0.2908713318799221, "learning_rate": 0.00019232756743137088, "loss": 2.934262275695801, "step": 5774, "token_acc": 0.30866100649812644 }, { "epoch": 3.3852242744063323, "grad_norm": 0.30772870115115925, "learning_rate": 0.00019232384389898947, "loss": 2.949921131134033, "step": 5775, "token_acc": 0.3061487068403059 }, { "epoch": 3.3858106127235414, "grad_norm": 0.2562417150644748, "learning_rate": 0.00019232011949934998, "loss": 2.927000045776367, "step": 5776, "token_acc": 0.3079410927138847 }, { "epoch": 3.3863969510407506, "grad_norm": 0.3130537380528935, "learning_rate": 0.00019231639423248736, "loss": 2.9760966300964355, "step": 5777, "token_acc": 0.3021288733307403 }, { "epoch": 3.3869832893579597, "grad_norm": 0.284372304957134, "learning_rate": 0.0001923126680984366, "loss": 2.941946029663086, "step": 5778, "token_acc": 0.30766374647519845 }, { "epoch": 3.387569627675169, "grad_norm": 0.2855532747262567, "learning_rate": 0.00019230894109723275, "loss": 2.9145219326019287, "step": 5779, "token_acc": 0.3116067734233852 }, { "epoch": 3.3881559659923774, "grad_norm": 0.28712431488170337, "learning_rate": 0.00019230521322891075, "loss": 2.947572708129883, "step": 5780, "token_acc": 0.3063375718588172 }, { "epoch": 3.3887423043095866, "grad_norm": 0.31002664180415784, "learning_rate": 0.0001923014844935057, "loss": 2.9784586429595947, "step": 5781, "token_acc": 0.30071397054839233 }, { "epoch": 3.3893286426267957, "grad_norm": 0.29465181639612187, "learning_rate": 0.00019229775489105255, "loss": 2.93618106842041, "step": 5782, "token_acc": 0.30649953362357957 }, { "epoch": 3.389914980944005, "grad_norm": 0.23480621251682127, "learning_rate": 0.00019229402442158636, "loss": 2.9413909912109375, "step": 5783, "token_acc": 0.30645301201018854 }, { "epoch": 3.390501319261214, "grad_norm": 0.29320132725755377, "learning_rate": 0.0001922902930851422, "loss": 2.969813823699951, "step": 5784, "token_acc": 0.30218039141682196 }, { "epoch": 3.3910876575784226, "grad_norm": 0.2602273753746014, "learning_rate": 0.00019228656088175506, "loss": 2.9778590202331543, "step": 5785, "token_acc": 0.3013628458339997 }, { "epoch": 3.3916739958956317, "grad_norm": 0.2478350241891969, "learning_rate": 0.00019228282781146009, "loss": 2.996553897857666, "step": 5786, "token_acc": 0.29894062602690424 }, { "epoch": 3.392260334212841, "grad_norm": 0.2769157756672533, "learning_rate": 0.00019227909387429227, "loss": 2.968242645263672, "step": 5787, "token_acc": 0.3020055243823363 }, { "epoch": 3.39284667253005, "grad_norm": 0.3261816456990511, "learning_rate": 0.0001922753590702867, "loss": 2.9493539333343506, "step": 5788, "token_acc": 0.3049907240469285 }, { "epoch": 3.393433010847259, "grad_norm": 0.3323684665831343, "learning_rate": 0.0001922716233994785, "loss": 2.9691548347473145, "step": 5789, "token_acc": 0.30354636571863863 }, { "epoch": 3.394019349164468, "grad_norm": 0.2879545067866791, "learning_rate": 0.00019226788686190274, "loss": 2.9961557388305664, "step": 5790, "token_acc": 0.298993341315808 }, { "epoch": 3.394605687481677, "grad_norm": 0.2660184175729306, "learning_rate": 0.0001922641494575945, "loss": 2.9196767807006836, "step": 5791, "token_acc": 0.30901362459182524 }, { "epoch": 3.395192025798886, "grad_norm": 0.2848352009248185, "learning_rate": 0.0001922604111865889, "loss": 2.962125301361084, "step": 5792, "token_acc": 0.30390464990287924 }, { "epoch": 3.395778364116095, "grad_norm": 0.25878313032633177, "learning_rate": 0.0001922566720489211, "loss": 2.9233264923095703, "step": 5793, "token_acc": 0.3111245068542549 }, { "epoch": 3.396364702433304, "grad_norm": 0.29503334538679316, "learning_rate": 0.00019225293204462615, "loss": 2.9470572471618652, "step": 5794, "token_acc": 0.30614880406909645 }, { "epoch": 3.3969510407505132, "grad_norm": 0.3118013756149966, "learning_rate": 0.0001922491911737392, "loss": 2.9495840072631836, "step": 5795, "token_acc": 0.3060521248724235 }, { "epoch": 3.397537379067722, "grad_norm": 0.30792010718261004, "learning_rate": 0.00019224544943629543, "loss": 2.969783306121826, "step": 5796, "token_acc": 0.302569684299008 }, { "epoch": 3.398123717384931, "grad_norm": 0.27125341798012426, "learning_rate": 0.00019224170683232995, "loss": 2.9594855308532715, "step": 5797, "token_acc": 0.3033516194891068 }, { "epoch": 3.39871005570214, "grad_norm": 0.3132062802128377, "learning_rate": 0.00019223796336187795, "loss": 2.9931325912475586, "step": 5798, "token_acc": 0.29958113507568274 }, { "epoch": 3.3992963940193492, "grad_norm": 0.30425678226173, "learning_rate": 0.00019223421902497454, "loss": 2.995800018310547, "step": 5799, "token_acc": 0.29708317206661244 }, { "epoch": 3.3998827323365584, "grad_norm": 0.3202432698780432, "learning_rate": 0.00019223047382165497, "loss": 2.932878017425537, "step": 5800, "token_acc": 0.30660355557247215 }, { "epoch": 3.4004690706537675, "grad_norm": 0.3740212716289655, "learning_rate": 0.00019222672775195436, "loss": 2.957045078277588, "step": 5801, "token_acc": 0.30406507112468845 }, { "epoch": 3.401055408970976, "grad_norm": 0.3921320594737038, "learning_rate": 0.00019222298081590796, "loss": 2.984714984893799, "step": 5802, "token_acc": 0.30156723630742166 }, { "epoch": 3.4016417472881852, "grad_norm": 0.28843319638075454, "learning_rate": 0.00019221923301355088, "loss": 2.924182176589966, "step": 5803, "token_acc": 0.3099411092097278 }, { "epoch": 3.4022280856053944, "grad_norm": 0.3374818527928755, "learning_rate": 0.0001922154843449184, "loss": 2.970663070678711, "step": 5804, "token_acc": 0.3019865149536027 }, { "epoch": 3.4028144239226035, "grad_norm": 0.3342152905681232, "learning_rate": 0.00019221173481004568, "loss": 2.981661796569824, "step": 5805, "token_acc": 0.3004859724703163 }, { "epoch": 3.403400762239812, "grad_norm": 0.3224431405729731, "learning_rate": 0.00019220798440896795, "loss": 2.9281656742095947, "step": 5806, "token_acc": 0.3073039000510834 }, { "epoch": 3.4039871005570213, "grad_norm": 0.29048420364357325, "learning_rate": 0.00019220423314172052, "loss": 2.9836599826812744, "step": 5807, "token_acc": 0.30107193594214127 }, { "epoch": 3.4045734388742304, "grad_norm": 0.30278841862928607, "learning_rate": 0.00019220048100833853, "loss": 2.981579303741455, "step": 5808, "token_acc": 0.3015763518263512 }, { "epoch": 3.4051597771914395, "grad_norm": 0.3132937961294824, "learning_rate": 0.00019219672800885727, "loss": 2.940189838409424, "step": 5809, "token_acc": 0.3071596536665186 }, { "epoch": 3.4057461155086486, "grad_norm": 0.23417393552039273, "learning_rate": 0.000192192974143312, "loss": 2.9316890239715576, "step": 5810, "token_acc": 0.30723328386565235 }, { "epoch": 3.4063324538258577, "grad_norm": 0.26221674650673316, "learning_rate": 0.00019218921941173794, "loss": 2.941380023956299, "step": 5811, "token_acc": 0.30480195227934853 }, { "epoch": 3.4069187921430664, "grad_norm": 0.27577789523585744, "learning_rate": 0.00019218546381417038, "loss": 2.939434051513672, "step": 5812, "token_acc": 0.3080845653022677 }, { "epoch": 3.4075051304602755, "grad_norm": 0.28782867597397577, "learning_rate": 0.00019218170735064465, "loss": 2.9570720195770264, "step": 5813, "token_acc": 0.30452294257824786 }, { "epoch": 3.4080914687774846, "grad_norm": 0.27842571008664974, "learning_rate": 0.000192177950021196, "loss": 2.9782395362854004, "step": 5814, "token_acc": 0.30264572280592256 }, { "epoch": 3.4086778070946937, "grad_norm": 0.2566502665192402, "learning_rate": 0.00019217419182585967, "loss": 2.976616859436035, "step": 5815, "token_acc": 0.30056501588293494 }, { "epoch": 3.409264145411903, "grad_norm": 0.2576328191623782, "learning_rate": 0.00019217043276467105, "loss": 2.9728050231933594, "step": 5816, "token_acc": 0.3032339811588455 }, { "epoch": 3.4098504837291115, "grad_norm": 0.2612742936256722, "learning_rate": 0.00019216667283766543, "loss": 2.942906379699707, "step": 5817, "token_acc": 0.3060043720636889 }, { "epoch": 3.4104368220463206, "grad_norm": 0.2639981971791675, "learning_rate": 0.00019216291204487808, "loss": 2.9648547172546387, "step": 5818, "token_acc": 0.3018944188569666 }, { "epoch": 3.4110231603635297, "grad_norm": 0.24389115998631966, "learning_rate": 0.00019215915038634437, "loss": 2.9800033569335938, "step": 5819, "token_acc": 0.3015268600440512 }, { "epoch": 3.411609498680739, "grad_norm": 0.27209341880920535, "learning_rate": 0.00019215538786209962, "loss": 2.983247756958008, "step": 5820, "token_acc": 0.30063575032539425 }, { "epoch": 3.412195836997948, "grad_norm": 0.29210728793594526, "learning_rate": 0.00019215162447217923, "loss": 2.9713613986968994, "step": 5821, "token_acc": 0.30083496265905957 }, { "epoch": 3.412782175315157, "grad_norm": 0.3453638010395938, "learning_rate": 0.00019214786021661847, "loss": 2.9659583568573, "step": 5822, "token_acc": 0.3012512825979772 }, { "epoch": 3.4133685136323657, "grad_norm": 0.30386555011632155, "learning_rate": 0.00019214409509545272, "loss": 2.9835240840911865, "step": 5823, "token_acc": 0.29981537650006596 }, { "epoch": 3.413954851949575, "grad_norm": 0.24536556647429184, "learning_rate": 0.00019214032910871737, "loss": 2.965771436691284, "step": 5824, "token_acc": 0.30324765837020456 }, { "epoch": 3.414541190266784, "grad_norm": 0.28932881774061964, "learning_rate": 0.0001921365622564478, "loss": 2.914560317993164, "step": 5825, "token_acc": 0.30921828163416526 }, { "epoch": 3.415127528583993, "grad_norm": 0.2910470091151356, "learning_rate": 0.0001921327945386794, "loss": 2.975356340408325, "step": 5826, "token_acc": 0.3003853631532922 }, { "epoch": 3.415713866901202, "grad_norm": 0.25237221767907014, "learning_rate": 0.00019212902595544754, "loss": 2.9862780570983887, "step": 5827, "token_acc": 0.29866192632549493 }, { "epoch": 3.416300205218411, "grad_norm": 0.2653344306629598, "learning_rate": 0.00019212525650678762, "loss": 3.0299577713012695, "step": 5828, "token_acc": 0.2955012271295701 }, { "epoch": 3.41688654353562, "grad_norm": 0.28071154867753856, "learning_rate": 0.000192121486192735, "loss": 2.921548366546631, "step": 5829, "token_acc": 0.31006354572296635 }, { "epoch": 3.417472881852829, "grad_norm": 0.24848866927867, "learning_rate": 0.00019211771501332522, "loss": 2.9695606231689453, "step": 5830, "token_acc": 0.3027783784649639 }, { "epoch": 3.418059220170038, "grad_norm": 0.2717313738561597, "learning_rate": 0.0001921139429685936, "loss": 3.005025863647461, "step": 5831, "token_acc": 0.29865001135109737 }, { "epoch": 3.4186455584872473, "grad_norm": 0.3260257905875027, "learning_rate": 0.00019211017005857565, "loss": 2.9719338417053223, "step": 5832, "token_acc": 0.3020611739618521 }, { "epoch": 3.4192318968044564, "grad_norm": 0.26837644040684583, "learning_rate": 0.00019210639628330673, "loss": 2.9891929626464844, "step": 5833, "token_acc": 0.29983823775500373 }, { "epoch": 3.419818235121665, "grad_norm": 0.2785892155686165, "learning_rate": 0.00019210262164282238, "loss": 3.020864725112915, "step": 5834, "token_acc": 0.2962394602768927 }, { "epoch": 3.420404573438874, "grad_norm": 0.3181023193362203, "learning_rate": 0.00019209884613715796, "loss": 2.9873809814453125, "step": 5835, "token_acc": 0.30083260297984227 }, { "epoch": 3.4209909117560833, "grad_norm": 0.2937404926990121, "learning_rate": 0.000192095069766349, "loss": 2.955162525177002, "step": 5836, "token_acc": 0.3054529402197904 }, { "epoch": 3.4215772500732924, "grad_norm": 0.29893660694712293, "learning_rate": 0.00019209129253043098, "loss": 2.9809131622314453, "step": 5837, "token_acc": 0.3031430639521251 }, { "epoch": 3.4221635883905015, "grad_norm": 0.31912591219344894, "learning_rate": 0.00019208751442943936, "loss": 2.9565138816833496, "step": 5838, "token_acc": 0.30520688052068806 }, { "epoch": 3.42274992670771, "grad_norm": 0.2483414046933559, "learning_rate": 0.0001920837354634096, "loss": 2.964552879333496, "step": 5839, "token_acc": 0.30297436747120515 }, { "epoch": 3.4233362650249193, "grad_norm": 0.2792801130514059, "learning_rate": 0.00019207995563237727, "loss": 2.9554409980773926, "step": 5840, "token_acc": 0.30358711252972304 }, { "epoch": 3.4239226033421284, "grad_norm": 0.26189117801007433, "learning_rate": 0.0001920761749363778, "loss": 2.9567999839782715, "step": 5841, "token_acc": 0.3044390852731853 }, { "epoch": 3.4245089416593375, "grad_norm": 0.30009422800571467, "learning_rate": 0.00019207239337544677, "loss": 2.9635977745056152, "step": 5842, "token_acc": 0.30409488095207704 }, { "epoch": 3.4250952799765466, "grad_norm": 0.3435392422210084, "learning_rate": 0.00019206861094961966, "loss": 2.9902148246765137, "step": 5843, "token_acc": 0.2987323752891528 }, { "epoch": 3.4256816182937557, "grad_norm": 0.24571940208033008, "learning_rate": 0.00019206482765893201, "loss": 2.9179511070251465, "step": 5844, "token_acc": 0.3100790351129955 }, { "epoch": 3.4262679566109644, "grad_norm": 0.3143668913430003, "learning_rate": 0.00019206104350341936, "loss": 2.9577560424804688, "step": 5845, "token_acc": 0.30323600095520203 }, { "epoch": 3.4268542949281735, "grad_norm": 0.3531532322135951, "learning_rate": 0.0001920572584831173, "loss": 2.9534034729003906, "step": 5846, "token_acc": 0.3046614945261659 }, { "epoch": 3.4274406332453826, "grad_norm": 0.3843028960845201, "learning_rate": 0.0001920534725980613, "loss": 2.9844651222229004, "step": 5847, "token_acc": 0.30068123525729246 }, { "epoch": 3.4280269715625917, "grad_norm": 0.3755600065819227, "learning_rate": 0.00019204968584828698, "loss": 2.945474624633789, "step": 5848, "token_acc": 0.30626033846545364 }, { "epoch": 3.4286133098798004, "grad_norm": 0.25599193466339026, "learning_rate": 0.00019204589823382988, "loss": 2.9698615074157715, "step": 5849, "token_acc": 0.30182895494162143 }, { "epoch": 3.4291996481970095, "grad_norm": 0.34769139272050503, "learning_rate": 0.00019204210975472564, "loss": 2.931894302368164, "step": 5850, "token_acc": 0.3084536160894996 }, { "epoch": 3.4297859865142186, "grad_norm": 0.31159721794229345, "learning_rate": 0.00019203832041100977, "loss": 2.9453389644622803, "step": 5851, "token_acc": 0.30787213131139257 }, { "epoch": 3.4303723248314277, "grad_norm": 0.2579993036213398, "learning_rate": 0.0001920345302027179, "loss": 2.9483261108398438, "step": 5852, "token_acc": 0.3046747886292524 }, { "epoch": 3.430958663148637, "grad_norm": 0.2698694918074235, "learning_rate": 0.00019203073912988568, "loss": 2.9651904106140137, "step": 5853, "token_acc": 0.30332346131635135 }, { "epoch": 3.431545001465846, "grad_norm": 0.23754907248937054, "learning_rate": 0.00019202694719254866, "loss": 2.915735960006714, "step": 5854, "token_acc": 0.30981478338245355 }, { "epoch": 3.432131339783055, "grad_norm": 0.28564343428170835, "learning_rate": 0.00019202315439074247, "loss": 2.978209972381592, "step": 5855, "token_acc": 0.2997180403806455 }, { "epoch": 3.4327176781002637, "grad_norm": 0.2779476397410527, "learning_rate": 0.00019201936072450274, "loss": 2.941974639892578, "step": 5856, "token_acc": 0.30566578850835363 }, { "epoch": 3.433304016417473, "grad_norm": 0.26216289935941856, "learning_rate": 0.00019201556619386515, "loss": 2.9447860717773438, "step": 5857, "token_acc": 0.30583818274362057 }, { "epoch": 3.433890354734682, "grad_norm": 0.319434240545209, "learning_rate": 0.0001920117707988653, "loss": 2.9532337188720703, "step": 5858, "token_acc": 0.30565926066841576 }, { "epoch": 3.434476693051891, "grad_norm": 0.3189273311043452, "learning_rate": 0.0001920079745395388, "loss": 2.986436128616333, "step": 5859, "token_acc": 0.30093469577736165 }, { "epoch": 3.4350630313690997, "grad_norm": 0.24590686053583413, "learning_rate": 0.0001920041774159214, "loss": 2.943110227584839, "step": 5860, "token_acc": 0.30652719577934445 }, { "epoch": 3.435649369686309, "grad_norm": 0.2952660269152213, "learning_rate": 0.00019200037942804875, "loss": 2.9498395919799805, "step": 5861, "token_acc": 0.3048740687822666 }, { "epoch": 3.436235708003518, "grad_norm": 0.2289524545554295, "learning_rate": 0.00019199658057595647, "loss": 2.9807400703430176, "step": 5862, "token_acc": 0.30063000667556744 }, { "epoch": 3.436822046320727, "grad_norm": 0.28100380142177656, "learning_rate": 0.0001919927808596803, "loss": 2.934401512145996, "step": 5863, "token_acc": 0.30794818965495013 }, { "epoch": 3.437408384637936, "grad_norm": 0.23094904124217333, "learning_rate": 0.00019198898027925591, "loss": 2.9253382682800293, "step": 5864, "token_acc": 0.3080496356497714 }, { "epoch": 3.4379947229551453, "grad_norm": 0.266996533800148, "learning_rate": 0.000191985178834719, "loss": 2.948082685470581, "step": 5865, "token_acc": 0.3051358730984267 }, { "epoch": 3.438581061272354, "grad_norm": 0.2964391823174672, "learning_rate": 0.0001919813765261053, "loss": 2.9613304138183594, "step": 5866, "token_acc": 0.3037651005466537 }, { "epoch": 3.439167399589563, "grad_norm": 0.29947770849897243, "learning_rate": 0.00019197757335345051, "loss": 2.965766429901123, "step": 5867, "token_acc": 0.30272262836926805 }, { "epoch": 3.439753737906772, "grad_norm": 0.3322360800348083, "learning_rate": 0.00019197376931679035, "loss": 2.960585117340088, "step": 5868, "token_acc": 0.30543242247767505 }, { "epoch": 3.4403400762239813, "grad_norm": 0.30472365436020155, "learning_rate": 0.00019196996441616057, "loss": 2.949005603790283, "step": 5869, "token_acc": 0.30664137431263055 }, { "epoch": 3.4409264145411904, "grad_norm": 0.3266952442526886, "learning_rate": 0.00019196615865159692, "loss": 2.978978157043457, "step": 5870, "token_acc": 0.301803861985566 }, { "epoch": 3.441512752858399, "grad_norm": 0.330146266493025, "learning_rate": 0.00019196235202313512, "loss": 2.9840073585510254, "step": 5871, "token_acc": 0.2999031058230275 }, { "epoch": 3.442099091175608, "grad_norm": 0.2682976082927483, "learning_rate": 0.00019195854453081095, "loss": 2.951385498046875, "step": 5872, "token_acc": 0.3055874007252053 }, { "epoch": 3.4426854294928173, "grad_norm": 0.3721189871150996, "learning_rate": 0.00019195473617466017, "loss": 2.9825026988983154, "step": 5873, "token_acc": 0.2994223615158824 }, { "epoch": 3.4432717678100264, "grad_norm": 0.2998320128962938, "learning_rate": 0.00019195092695471855, "loss": 2.9249467849731445, "step": 5874, "token_acc": 0.3082879922426332 }, { "epoch": 3.4438581061272355, "grad_norm": 0.33987721764103, "learning_rate": 0.00019194711687102188, "loss": 2.962435483932495, "step": 5875, "token_acc": 0.3029061850402232 }, { "epoch": 3.4444444444444446, "grad_norm": 0.3808321135265712, "learning_rate": 0.00019194330592360595, "loss": 2.9305970668792725, "step": 5876, "token_acc": 0.3081130015709455 }, { "epoch": 3.4450307827616533, "grad_norm": 0.2838053016642811, "learning_rate": 0.00019193949411250655, "loss": 2.9481234550476074, "step": 5877, "token_acc": 0.30723581629513175 }, { "epoch": 3.4456171210788624, "grad_norm": 0.3145662793211528, "learning_rate": 0.00019193568143775948, "loss": 2.946491241455078, "step": 5878, "token_acc": 0.3062626314971239 }, { "epoch": 3.4462034593960715, "grad_norm": 0.2697320374695421, "learning_rate": 0.0001919318678994006, "loss": 2.966817617416382, "step": 5879, "token_acc": 0.3026582616760725 }, { "epoch": 3.4467897977132806, "grad_norm": 0.363552144063481, "learning_rate": 0.00019192805349746566, "loss": 2.9571943283081055, "step": 5880, "token_acc": 0.3042139293064968 }, { "epoch": 3.4473761360304898, "grad_norm": 0.27708434208363003, "learning_rate": 0.00019192423823199056, "loss": 2.964855194091797, "step": 5881, "token_acc": 0.30335577381029 }, { "epoch": 3.4479624743476984, "grad_norm": 0.3244346920691581, "learning_rate": 0.0001919204221030111, "loss": 2.9121336936950684, "step": 5882, "token_acc": 0.30956103582173994 }, { "epoch": 3.4485488126649075, "grad_norm": 0.2976823632115045, "learning_rate": 0.00019191660511056315, "loss": 2.9448509216308594, "step": 5883, "token_acc": 0.3066706904311585 }, { "epoch": 3.4491351509821166, "grad_norm": 0.3073529388971304, "learning_rate": 0.00019191278725468256, "loss": 2.990128755569458, "step": 5884, "token_acc": 0.2994873164081855 }, { "epoch": 3.4497214892993258, "grad_norm": 0.27464180728062915, "learning_rate": 0.00019190896853540516, "loss": 3.000974416732788, "step": 5885, "token_acc": 0.2983952753087156 }, { "epoch": 3.450307827616535, "grad_norm": 0.32274414349210445, "learning_rate": 0.00019190514895276687, "loss": 2.946810722351074, "step": 5886, "token_acc": 0.3046700788983221 }, { "epoch": 3.450894165933744, "grad_norm": 0.2500161355313248, "learning_rate": 0.00019190132850680356, "loss": 2.9476499557495117, "step": 5887, "token_acc": 0.30469603107243803 }, { "epoch": 3.4514805042509527, "grad_norm": 0.32535656992806533, "learning_rate": 0.00019189750719755106, "loss": 2.953047513961792, "step": 5888, "token_acc": 0.30638956270897527 }, { "epoch": 3.4520668425681618, "grad_norm": 0.2516392505890602, "learning_rate": 0.00019189368502504537, "loss": 3.0043559074401855, "step": 5889, "token_acc": 0.2995595420521008 }, { "epoch": 3.452653180885371, "grad_norm": 0.29238211454016416, "learning_rate": 0.0001918898619893223, "loss": 2.9752047061920166, "step": 5890, "token_acc": 0.30282363001947876 }, { "epoch": 3.45323951920258, "grad_norm": 0.2662407422602763, "learning_rate": 0.0001918860380904178, "loss": 2.956540822982788, "step": 5891, "token_acc": 0.30357039411435305 }, { "epoch": 3.453825857519789, "grad_norm": 0.2671895066997925, "learning_rate": 0.00019188221332836782, "loss": 2.9923410415649414, "step": 5892, "token_acc": 0.29959832367442735 }, { "epoch": 3.4544121958369978, "grad_norm": 0.24488016676535831, "learning_rate": 0.00019187838770320825, "loss": 2.9865169525146484, "step": 5893, "token_acc": 0.2999305994020069 }, { "epoch": 3.454998534154207, "grad_norm": 0.2798155865349904, "learning_rate": 0.000191874561214975, "loss": 2.9410288333892822, "step": 5894, "token_acc": 0.30575499385605914 }, { "epoch": 3.455584872471416, "grad_norm": 0.2265680719449728, "learning_rate": 0.00019187073386370412, "loss": 2.9648513793945312, "step": 5895, "token_acc": 0.3040691886510702 }, { "epoch": 3.456171210788625, "grad_norm": 0.28194767823713174, "learning_rate": 0.0001918669056494314, "loss": 2.98783802986145, "step": 5896, "token_acc": 0.30117085465315824 }, { "epoch": 3.456757549105834, "grad_norm": 0.25318336622534454, "learning_rate": 0.00019186307657219297, "loss": 2.9368224143981934, "step": 5897, "token_acc": 0.3067925409134528 }, { "epoch": 3.4573438874230433, "grad_norm": 0.28508462528369305, "learning_rate": 0.00019185924663202468, "loss": 2.9496960639953613, "step": 5898, "token_acc": 0.3055572795099772 }, { "epoch": 3.457930225740252, "grad_norm": 0.24727390420822423, "learning_rate": 0.00019185541582896257, "loss": 2.9856793880462646, "step": 5899, "token_acc": 0.29950375102035526 }, { "epoch": 3.458516564057461, "grad_norm": 0.30468635485115086, "learning_rate": 0.0001918515841630426, "loss": 2.961728811264038, "step": 5900, "token_acc": 0.30219640910766143 }, { "epoch": 3.45910290237467, "grad_norm": 0.3022250658130902, "learning_rate": 0.0001918477516343008, "loss": 3.014646291732788, "step": 5901, "token_acc": 0.2950308408458471 }, { "epoch": 3.4596892406918793, "grad_norm": 0.28444301797341726, "learning_rate": 0.00019184391824277308, "loss": 2.967391014099121, "step": 5902, "token_acc": 0.3036778113856859 }, { "epoch": 3.460275579009088, "grad_norm": 0.3159989168597951, "learning_rate": 0.00019184008398849555, "loss": 2.971372365951538, "step": 5903, "token_acc": 0.3022410567318604 }, { "epoch": 3.460861917326297, "grad_norm": 0.24972851251337871, "learning_rate": 0.00019183624887150416, "loss": 2.993523120880127, "step": 5904, "token_acc": 0.2995925671786597 }, { "epoch": 3.4614482556435062, "grad_norm": 0.22663752289101194, "learning_rate": 0.000191832412891835, "loss": 2.971950054168701, "step": 5905, "token_acc": 0.3025093106708384 }, { "epoch": 3.4620345939607153, "grad_norm": 0.2321702861276606, "learning_rate": 0.00019182857604952403, "loss": 2.9795827865600586, "step": 5906, "token_acc": 0.30172230437336683 }, { "epoch": 3.4626209322779244, "grad_norm": 0.2443493680222593, "learning_rate": 0.00019182473834460735, "loss": 3.0340938568115234, "step": 5907, "token_acc": 0.29362152555440846 }, { "epoch": 3.4632072705951336, "grad_norm": 0.25724197474042537, "learning_rate": 0.00019182089977712096, "loss": 3.009056806564331, "step": 5908, "token_acc": 0.29684896566791347 }, { "epoch": 3.4637936089123427, "grad_norm": 0.2406065997317154, "learning_rate": 0.00019181706034710098, "loss": 2.9492576122283936, "step": 5909, "token_acc": 0.30258969061448304 }, { "epoch": 3.4643799472295513, "grad_norm": 0.2362749359692757, "learning_rate": 0.00019181322005458343, "loss": 2.939565420150757, "step": 5910, "token_acc": 0.30689354389345275 }, { "epoch": 3.4649662855467604, "grad_norm": 0.25374939802019236, "learning_rate": 0.0001918093788996044, "loss": 2.971625328063965, "step": 5911, "token_acc": 0.30232774377284705 }, { "epoch": 3.4655526238639696, "grad_norm": 0.2439709356329249, "learning_rate": 0.00019180553688219996, "loss": 2.951507091522217, "step": 5912, "token_acc": 0.3063108774448814 }, { "epoch": 3.4661389621811787, "grad_norm": 0.268728123262901, "learning_rate": 0.00019180169400240623, "loss": 2.983198642730713, "step": 5913, "token_acc": 0.2990803356251058 }, { "epoch": 3.4667253004983873, "grad_norm": 0.2718096451312759, "learning_rate": 0.00019179785026025926, "loss": 2.978200912475586, "step": 5914, "token_acc": 0.3020013256588341 }, { "epoch": 3.4673116388155965, "grad_norm": 0.24381964506274523, "learning_rate": 0.00019179400565579524, "loss": 2.94586181640625, "step": 5915, "token_acc": 0.30540347839372567 }, { "epoch": 3.4678979771328056, "grad_norm": 0.24442445861638454, "learning_rate": 0.00019179016018905018, "loss": 3.0127201080322266, "step": 5916, "token_acc": 0.2972553067805314 }, { "epoch": 3.4684843154500147, "grad_norm": 0.28595755196948425, "learning_rate": 0.00019178631386006028, "loss": 2.966919422149658, "step": 5917, "token_acc": 0.3035462828743011 }, { "epoch": 3.469070653767224, "grad_norm": 0.28765299683926016, "learning_rate": 0.0001917824666688616, "loss": 3.0000627040863037, "step": 5918, "token_acc": 0.2962995553669129 }, { "epoch": 3.469656992084433, "grad_norm": 0.2480596497872555, "learning_rate": 0.00019177861861549038, "loss": 2.937091588973999, "step": 5919, "token_acc": 0.3074703205505898 }, { "epoch": 3.4702433304016416, "grad_norm": 0.30609569502955264, "learning_rate": 0.0001917747696999827, "loss": 2.99935245513916, "step": 5920, "token_acc": 0.29867321999301694 }, { "epoch": 3.4708296687188507, "grad_norm": 0.2981444080754157, "learning_rate": 0.0001917709199223747, "loss": 2.9981398582458496, "step": 5921, "token_acc": 0.29834112666814266 }, { "epoch": 3.47141600703606, "grad_norm": 0.3048733057883234, "learning_rate": 0.0001917670692827026, "loss": 2.9533164501190186, "step": 5922, "token_acc": 0.3048958453711592 }, { "epoch": 3.472002345353269, "grad_norm": 0.3315694913579945, "learning_rate": 0.00019176321778100253, "loss": 2.945148468017578, "step": 5923, "token_acc": 0.30529060275671244 }, { "epoch": 3.472588683670478, "grad_norm": 0.4083119145483531, "learning_rate": 0.00019175936541731065, "loss": 2.986443042755127, "step": 5924, "token_acc": 0.29981445173115784 }, { "epoch": 3.4731750219876867, "grad_norm": 0.5330171639928414, "learning_rate": 0.00019175551219166324, "loss": 2.99127197265625, "step": 5925, "token_acc": 0.29905280631321035 }, { "epoch": 3.473761360304896, "grad_norm": 0.36020582723180783, "learning_rate": 0.00019175165810409638, "loss": 2.9860711097717285, "step": 5926, "token_acc": 0.299273904057435 }, { "epoch": 3.474347698622105, "grad_norm": 0.3421320740531399, "learning_rate": 0.00019174780315464637, "loss": 2.9848809242248535, "step": 5927, "token_acc": 0.30097084811027097 }, { "epoch": 3.474934036939314, "grad_norm": 0.35924239148617054, "learning_rate": 0.00019174394734334935, "loss": 2.952925682067871, "step": 5928, "token_acc": 0.3055462229335591 }, { "epoch": 3.475520375256523, "grad_norm": 0.32831154255201883, "learning_rate": 0.00019174009067024158, "loss": 2.9695301055908203, "step": 5929, "token_acc": 0.3022690083716725 }, { "epoch": 3.4761067135737322, "grad_norm": 0.26454123890374015, "learning_rate": 0.0001917362331353593, "loss": 2.9432826042175293, "step": 5930, "token_acc": 0.306840718302589 }, { "epoch": 3.476693051890941, "grad_norm": 0.36257739541237993, "learning_rate": 0.0001917323747387387, "loss": 2.9671735763549805, "step": 5931, "token_acc": 0.302713078952146 }, { "epoch": 3.47727939020815, "grad_norm": 0.29407834638170904, "learning_rate": 0.0001917285154804161, "loss": 2.9727911949157715, "step": 5932, "token_acc": 0.30191120197640675 }, { "epoch": 3.477865728525359, "grad_norm": 0.3242559228594796, "learning_rate": 0.00019172465536042762, "loss": 2.987159013748169, "step": 5933, "token_acc": 0.30013832315651917 }, { "epoch": 3.4784520668425682, "grad_norm": 0.23961738964234222, "learning_rate": 0.00019172079437880965, "loss": 2.9654462337493896, "step": 5934, "token_acc": 0.3038325040293498 }, { "epoch": 3.4790384051597774, "grad_norm": 0.26835593058537643, "learning_rate": 0.00019171693253559842, "loss": 3.0013129711151123, "step": 5935, "token_acc": 0.29873808976422206 }, { "epoch": 3.479624743476986, "grad_norm": 0.2768649688424807, "learning_rate": 0.00019171306983083018, "loss": 2.9207587242126465, "step": 5936, "token_acc": 0.3091900190345786 }, { "epoch": 3.480211081794195, "grad_norm": 0.2579219523049475, "learning_rate": 0.00019170920626454126, "loss": 2.9746956825256348, "step": 5937, "token_acc": 0.3013603689964391 }, { "epoch": 3.4807974201114043, "grad_norm": 0.28624913096540316, "learning_rate": 0.0001917053418367679, "loss": 2.9514999389648438, "step": 5938, "token_acc": 0.30560697248870883 }, { "epoch": 3.4813837584286134, "grad_norm": 0.28017660361368973, "learning_rate": 0.00019170147654754645, "loss": 2.9392194747924805, "step": 5939, "token_acc": 0.3073414146542697 }, { "epoch": 3.4819700967458225, "grad_norm": 0.27024309058769097, "learning_rate": 0.00019169761039691317, "loss": 2.9616761207580566, "step": 5940, "token_acc": 0.30397879217728996 }, { "epoch": 3.4825564350630316, "grad_norm": 0.2906095967692996, "learning_rate": 0.0001916937433849044, "loss": 2.9783854484558105, "step": 5941, "token_acc": 0.3016020573287038 }, { "epoch": 3.4831427733802403, "grad_norm": 0.28464969221330594, "learning_rate": 0.0001916898755115565, "loss": 2.995286226272583, "step": 5942, "token_acc": 0.2989766216661179 }, { "epoch": 3.4837291116974494, "grad_norm": 0.2703042834407997, "learning_rate": 0.00019168600677690574, "loss": 2.960696220397949, "step": 5943, "token_acc": 0.3037984691860569 }, { "epoch": 3.4843154500146585, "grad_norm": 0.23950516711499512, "learning_rate": 0.00019168213718098853, "loss": 2.9468703269958496, "step": 5944, "token_acc": 0.30611996946750375 }, { "epoch": 3.4849017883318676, "grad_norm": 0.2650541051473338, "learning_rate": 0.00019167826672384118, "loss": 2.972216844558716, "step": 5945, "token_acc": 0.3019724566388194 }, { "epoch": 3.4854881266490767, "grad_norm": 0.23125605606574806, "learning_rate": 0.00019167439540550003, "loss": 3.006110668182373, "step": 5946, "token_acc": 0.29767965020489207 }, { "epoch": 3.4860744649662854, "grad_norm": 0.2648771795218388, "learning_rate": 0.00019167052322600147, "loss": 2.9724087715148926, "step": 5947, "token_acc": 0.3021266299233768 }, { "epoch": 3.4866608032834945, "grad_norm": 0.21692069966137825, "learning_rate": 0.0001916666501853819, "loss": 2.95220685005188, "step": 5948, "token_acc": 0.3044722180394519 }, { "epoch": 3.4872471416007036, "grad_norm": 0.2857513523779324, "learning_rate": 0.00019166277628367766, "loss": 2.9701128005981445, "step": 5949, "token_acc": 0.3019965204162137 }, { "epoch": 3.4878334799179127, "grad_norm": 0.24066926731927735, "learning_rate": 0.00019165890152092515, "loss": 2.9135375022888184, "step": 5950, "token_acc": 0.31098550489069027 }, { "epoch": 3.488419818235122, "grad_norm": 0.2797701130028548, "learning_rate": 0.00019165502589716077, "loss": 2.9858896732330322, "step": 5951, "token_acc": 0.2996186681093063 }, { "epoch": 3.489006156552331, "grad_norm": 0.26454483315812455, "learning_rate": 0.00019165114941242092, "loss": 2.9886817932128906, "step": 5952, "token_acc": 0.3003088406659224 }, { "epoch": 3.4895924948695396, "grad_norm": 0.2734291060499896, "learning_rate": 0.00019164727206674205, "loss": 2.9445204734802246, "step": 5953, "token_acc": 0.30565628358763675 }, { "epoch": 3.4901788331867487, "grad_norm": 0.27391631781102205, "learning_rate": 0.00019164339386016054, "loss": 2.954437732696533, "step": 5954, "token_acc": 0.30696444576448045 }, { "epoch": 3.490765171503958, "grad_norm": 0.2760405197135756, "learning_rate": 0.00019163951479271284, "loss": 2.947877883911133, "step": 5955, "token_acc": 0.30558461589619107 }, { "epoch": 3.491351509821167, "grad_norm": 0.2567047615665823, "learning_rate": 0.00019163563486443536, "loss": 2.9776127338409424, "step": 5956, "token_acc": 0.30010125640601754 }, { "epoch": 3.4919378481383756, "grad_norm": 0.2617237673759999, "learning_rate": 0.00019163175407536456, "loss": 2.944434642791748, "step": 5957, "token_acc": 0.30643962361322175 }, { "epoch": 3.4925241864555847, "grad_norm": 0.2596208956426614, "learning_rate": 0.00019162787242553696, "loss": 2.989205837249756, "step": 5958, "token_acc": 0.29953831852116103 }, { "epoch": 3.493110524772794, "grad_norm": 0.28449159274285146, "learning_rate": 0.00019162398991498896, "loss": 2.9705100059509277, "step": 5959, "token_acc": 0.3023345978121223 }, { "epoch": 3.493696863090003, "grad_norm": 0.3722119719732468, "learning_rate": 0.000191620106543757, "loss": 2.962167978286743, "step": 5960, "token_acc": 0.3022338679092662 }, { "epoch": 3.494283201407212, "grad_norm": 0.31678204837613766, "learning_rate": 0.00019161622231187762, "loss": 2.973284959793091, "step": 5961, "token_acc": 0.30227613870972697 }, { "epoch": 3.494869539724421, "grad_norm": 0.31562684752949294, "learning_rate": 0.00019161233721938728, "loss": 2.977548122406006, "step": 5962, "token_acc": 0.3009485836395382 }, { "epoch": 3.49545587804163, "grad_norm": 0.30816953952720527, "learning_rate": 0.0001916084512663225, "loss": 2.9610483646392822, "step": 5963, "token_acc": 0.3033335400324518 }, { "epoch": 3.496042216358839, "grad_norm": 0.29575441417277193, "learning_rate": 0.00019160456445271976, "loss": 2.94991397857666, "step": 5964, "token_acc": 0.3039774905924322 }, { "epoch": 3.496628554676048, "grad_norm": 0.3130404269373462, "learning_rate": 0.00019160067677861557, "loss": 2.925572156906128, "step": 5965, "token_acc": 0.30927930823231664 }, { "epoch": 3.497214892993257, "grad_norm": 0.2613943744192855, "learning_rate": 0.00019159678824404646, "loss": 2.9770150184631348, "step": 5966, "token_acc": 0.3013259851863771 }, { "epoch": 3.4978012313104663, "grad_norm": 0.28558824248420767, "learning_rate": 0.00019159289884904893, "loss": 2.9472172260284424, "step": 5967, "token_acc": 0.30584574848984625 }, { "epoch": 3.498387569627675, "grad_norm": 0.26391575478831697, "learning_rate": 0.00019158900859365957, "loss": 2.963912010192871, "step": 5968, "token_acc": 0.3035536272307017 }, { "epoch": 3.498973907944884, "grad_norm": 0.3059708096545487, "learning_rate": 0.00019158511747791488, "loss": 2.9502649307250977, "step": 5969, "token_acc": 0.3054786276535688 }, { "epoch": 3.499560246262093, "grad_norm": 0.2735953846247341, "learning_rate": 0.00019158122550185143, "loss": 2.949164867401123, "step": 5970, "token_acc": 0.3055238553378333 }, { "epoch": 3.5001465845793023, "grad_norm": 0.2966843000950639, "learning_rate": 0.00019157733266550575, "loss": 2.964095115661621, "step": 5971, "token_acc": 0.3029662707349402 }, { "epoch": 3.5007329228965114, "grad_norm": 0.2754068453937649, "learning_rate": 0.00019157343896891447, "loss": 2.966104507446289, "step": 5972, "token_acc": 0.3034383100359415 }, { "epoch": 3.5013192612137205, "grad_norm": 0.27796850215277635, "learning_rate": 0.00019156954441211407, "loss": 2.9578073024749756, "step": 5973, "token_acc": 0.3047558879884775 }, { "epoch": 3.5019055995309296, "grad_norm": 0.21251071188468645, "learning_rate": 0.00019156564899514125, "loss": 2.954092264175415, "step": 5974, "token_acc": 0.3045309407573559 }, { "epoch": 3.5024919378481383, "grad_norm": 0.27669042795923077, "learning_rate": 0.0001915617527180325, "loss": 2.9644598960876465, "step": 5975, "token_acc": 0.30210262335945426 }, { "epoch": 3.5030782761653474, "grad_norm": 0.22348088071516292, "learning_rate": 0.00019155785558082447, "loss": 2.977642297744751, "step": 5976, "token_acc": 0.30237052499113615 }, { "epoch": 3.5036646144825565, "grad_norm": 0.2345331764004061, "learning_rate": 0.00019155395758355378, "loss": 2.9618120193481445, "step": 5977, "token_acc": 0.30364657778486087 }, { "epoch": 3.5042509527997656, "grad_norm": 0.2457726618346513, "learning_rate": 0.00019155005872625703, "loss": 3.006176471710205, "step": 5978, "token_acc": 0.2977274147520153 }, { "epoch": 3.5048372911169743, "grad_norm": 0.24194895381652184, "learning_rate": 0.00019154615900897082, "loss": 3.0002450942993164, "step": 5979, "token_acc": 0.29823697119617626 }, { "epoch": 3.5054236294341834, "grad_norm": 0.2792292866424761, "learning_rate": 0.00019154225843173186, "loss": 2.9755301475524902, "step": 5980, "token_acc": 0.303581882585905 }, { "epoch": 3.5060099677513925, "grad_norm": 0.2471223514085659, "learning_rate": 0.0001915383569945767, "loss": 2.9741172790527344, "step": 5981, "token_acc": 0.3012835567072924 }, { "epoch": 3.5065963060686016, "grad_norm": 0.2707817599767387, "learning_rate": 0.00019153445469754203, "loss": 3.0109848976135254, "step": 5982, "token_acc": 0.297413759556689 }, { "epoch": 3.5071826443858107, "grad_norm": 0.28452447527035873, "learning_rate": 0.00019153055154066452, "loss": 2.976449489593506, "step": 5983, "token_acc": 0.3006461837274823 }, { "epoch": 3.50776898270302, "grad_norm": 0.23894854149430764, "learning_rate": 0.00019152664752398077, "loss": 2.9401111602783203, "step": 5984, "token_acc": 0.3053380672166478 }, { "epoch": 3.5083553210202285, "grad_norm": 0.2576243936857398, "learning_rate": 0.00019152274264752755, "loss": 2.944181203842163, "step": 5985, "token_acc": 0.30503817439895486 }, { "epoch": 3.5089416593374376, "grad_norm": 0.26725361499836425, "learning_rate": 0.00019151883691134145, "loss": 2.981978416442871, "step": 5986, "token_acc": 0.3019366355221683 }, { "epoch": 3.5095279976546467, "grad_norm": 0.2988927849403871, "learning_rate": 0.00019151493031545921, "loss": 2.9759116172790527, "step": 5987, "token_acc": 0.3009395901900414 }, { "epoch": 3.510114335971856, "grad_norm": 0.33410454060795103, "learning_rate": 0.00019151102285991752, "loss": 3.0012550354003906, "step": 5988, "token_acc": 0.296885525169378 }, { "epoch": 3.5107006742890645, "grad_norm": 0.3175448001381692, "learning_rate": 0.0001915071145447531, "loss": 2.9826645851135254, "step": 5989, "token_acc": 0.29973003505831564 }, { "epoch": 3.5112870126062736, "grad_norm": 0.3112057511099924, "learning_rate": 0.00019150320537000265, "loss": 2.92803955078125, "step": 5990, "token_acc": 0.3081505418881185 }, { "epoch": 3.5118733509234827, "grad_norm": 0.3075334359857697, "learning_rate": 0.00019149929533570286, "loss": 2.9625308513641357, "step": 5991, "token_acc": 0.3040020160512653 }, { "epoch": 3.512459689240692, "grad_norm": 0.2754022461201159, "learning_rate": 0.0001914953844418905, "loss": 2.977827548980713, "step": 5992, "token_acc": 0.3026272628492937 }, { "epoch": 3.513046027557901, "grad_norm": 0.2874550711683382, "learning_rate": 0.0001914914726886023, "loss": 2.95717191696167, "step": 5993, "token_acc": 0.3039518891412711 }, { "epoch": 3.51363236587511, "grad_norm": 0.29715397657401443, "learning_rate": 0.00019148756007587498, "loss": 2.951174259185791, "step": 5994, "token_acc": 0.3068908952351429 }, { "epoch": 3.514218704192319, "grad_norm": 0.31373214469219995, "learning_rate": 0.00019148364660374534, "loss": 2.9671683311462402, "step": 5995, "token_acc": 0.3023500025488097 }, { "epoch": 3.514805042509528, "grad_norm": 0.2800442659357902, "learning_rate": 0.0001914797322722501, "loss": 2.962876796722412, "step": 5996, "token_acc": 0.30265794677769825 }, { "epoch": 3.515391380826737, "grad_norm": 0.30863886341230845, "learning_rate": 0.00019147581708142604, "loss": 2.9880080223083496, "step": 5997, "token_acc": 0.2988402088882364 }, { "epoch": 3.515977719143946, "grad_norm": 0.2746478330876217, "learning_rate": 0.00019147190103130997, "loss": 2.992375373840332, "step": 5998, "token_acc": 0.300004164085801 }, { "epoch": 3.516564057461155, "grad_norm": 0.3000778096289024, "learning_rate": 0.00019146798412193863, "loss": 2.9545109272003174, "step": 5999, "token_acc": 0.3052149717260569 }, { "epoch": 3.517150395778364, "grad_norm": 0.2799244239958438, "learning_rate": 0.00019146406635334884, "loss": 2.93007230758667, "step": 6000, "token_acc": 0.30757477201709077 }, { "epoch": 3.517736734095573, "grad_norm": 0.2548022953685049, "learning_rate": 0.0001914601477255774, "loss": 2.9795355796813965, "step": 6001, "token_acc": 0.30046408201275904 }, { "epoch": 3.518323072412782, "grad_norm": 0.2783344742546853, "learning_rate": 0.00019145622823866113, "loss": 3.0100769996643066, "step": 6002, "token_acc": 0.29657779343089047 }, { "epoch": 3.518909410729991, "grad_norm": 0.2771245296191965, "learning_rate": 0.00019145230789263678, "loss": 2.9748661518096924, "step": 6003, "token_acc": 0.30046849246108753 }, { "epoch": 3.5194957490472003, "grad_norm": 0.32731669117424095, "learning_rate": 0.00019144838668754127, "loss": 2.959407329559326, "step": 6004, "token_acc": 0.30438826283074727 }, { "epoch": 3.5200820873644094, "grad_norm": 0.31399821279565865, "learning_rate": 0.0001914444646234114, "loss": 3.0213232040405273, "step": 6005, "token_acc": 0.2946626829274702 }, { "epoch": 3.5206684256816185, "grad_norm": 0.3044822240328191, "learning_rate": 0.000191440541700284, "loss": 2.960515022277832, "step": 6006, "token_acc": 0.30416292891507396 }, { "epoch": 3.521254763998827, "grad_norm": 0.2877993958307413, "learning_rate": 0.00019143661791819593, "loss": 2.9200243949890137, "step": 6007, "token_acc": 0.30947370600735613 }, { "epoch": 3.5218411023160363, "grad_norm": 0.3410656092491032, "learning_rate": 0.00019143269327718404, "loss": 3.0075554847717285, "step": 6008, "token_acc": 0.2958289740382674 }, { "epoch": 3.5224274406332454, "grad_norm": 0.26219905920051323, "learning_rate": 0.00019142876777728521, "loss": 2.9884064197540283, "step": 6009, "token_acc": 0.2993108186998548 }, { "epoch": 3.5230137789504545, "grad_norm": 0.3095821787519087, "learning_rate": 0.00019142484141853632, "loss": 2.9131128787994385, "step": 6010, "token_acc": 0.3106748877105509 }, { "epoch": 3.523600117267663, "grad_norm": 0.32243002260269915, "learning_rate": 0.0001914209142009742, "loss": 2.9589967727661133, "step": 6011, "token_acc": 0.30376123401313515 }, { "epoch": 3.5241864555848723, "grad_norm": 0.32067188911130157, "learning_rate": 0.0001914169861246358, "loss": 2.967515230178833, "step": 6012, "token_acc": 0.30306979216173263 }, { "epoch": 3.5247727939020814, "grad_norm": 0.2570945984495637, "learning_rate": 0.00019141305718955805, "loss": 2.975963830947876, "step": 6013, "token_acc": 0.30258381435792453 }, { "epoch": 3.5253591322192905, "grad_norm": 0.3187663465487429, "learning_rate": 0.00019140912739577773, "loss": 2.947457790374756, "step": 6014, "token_acc": 0.30632316241898344 }, { "epoch": 3.5259454705364996, "grad_norm": 0.24985528843312174, "learning_rate": 0.0001914051967433319, "loss": 3.013056993484497, "step": 6015, "token_acc": 0.29877900076414066 }, { "epoch": 3.5265318088537088, "grad_norm": 0.2663958130762031, "learning_rate": 0.0001914012652322574, "loss": 2.961904525756836, "step": 6016, "token_acc": 0.3045105082395942 }, { "epoch": 3.527118147170918, "grad_norm": 0.2587305019331022, "learning_rate": 0.00019139733286259117, "loss": 2.9744646549224854, "step": 6017, "token_acc": 0.30235633282448343 }, { "epoch": 3.5277044854881265, "grad_norm": 0.2420367072388504, "learning_rate": 0.00019139339963437015, "loss": 2.9683353900909424, "step": 6018, "token_acc": 0.30419167032346606 }, { "epoch": 3.5282908238053357, "grad_norm": 0.2531436895818353, "learning_rate": 0.0001913894655476313, "loss": 3.000499725341797, "step": 6019, "token_acc": 0.3002289928409032 }, { "epoch": 3.5288771621225448, "grad_norm": 0.23680796629105214, "learning_rate": 0.0001913855306024116, "loss": 2.92912220954895, "step": 6020, "token_acc": 0.30839090274004505 }, { "epoch": 3.529463500439754, "grad_norm": 0.24746751587969099, "learning_rate": 0.0001913815947987479, "loss": 2.9691295623779297, "step": 6021, "token_acc": 0.30369165351286925 }, { "epoch": 3.5300498387569625, "grad_norm": 0.25343285746236516, "learning_rate": 0.00019137765813667735, "loss": 2.951542854309082, "step": 6022, "token_acc": 0.3051863771262021 }, { "epoch": 3.5306361770741717, "grad_norm": 0.2395097438191023, "learning_rate": 0.00019137372061623674, "loss": 2.9944005012512207, "step": 6023, "token_acc": 0.29984529533459775 }, { "epoch": 3.5312225153913808, "grad_norm": 0.27779986217368247, "learning_rate": 0.00019136978223746324, "loss": 2.9297263622283936, "step": 6024, "token_acc": 0.3095740392719354 }, { "epoch": 3.53180885370859, "grad_norm": 0.2352336156518264, "learning_rate": 0.00019136584300039373, "loss": 2.963967800140381, "step": 6025, "token_acc": 0.3038155186456379 }, { "epoch": 3.532395192025799, "grad_norm": 0.24621379909754137, "learning_rate": 0.00019136190290506525, "loss": 2.945768356323242, "step": 6026, "token_acc": 0.30647433549777187 }, { "epoch": 3.532981530343008, "grad_norm": 0.27635081217937774, "learning_rate": 0.00019135796195151477, "loss": 2.9718778133392334, "step": 6027, "token_acc": 0.3021337747011262 }, { "epoch": 3.533567868660217, "grad_norm": 0.27390439039592546, "learning_rate": 0.00019135402013977935, "loss": 2.9786605834960938, "step": 6028, "token_acc": 0.3001400334206156 }, { "epoch": 3.534154206977426, "grad_norm": 0.26332098154671874, "learning_rate": 0.00019135007746989605, "loss": 2.932772636413574, "step": 6029, "token_acc": 0.3082523559287203 }, { "epoch": 3.534740545294635, "grad_norm": 0.3533609470335083, "learning_rate": 0.00019134613394190182, "loss": 2.961691379547119, "step": 6030, "token_acc": 0.30266378689704826 }, { "epoch": 3.535326883611844, "grad_norm": 0.30447870670353966, "learning_rate": 0.00019134218955583378, "loss": 2.9801721572875977, "step": 6031, "token_acc": 0.3017181579110844 }, { "epoch": 3.535913221929053, "grad_norm": 0.25792664374032487, "learning_rate": 0.00019133824431172896, "loss": 3.0026698112487793, "step": 6032, "token_acc": 0.29789519354906985 }, { "epoch": 3.536499560246262, "grad_norm": 0.29593564831233565, "learning_rate": 0.0001913342982096244, "loss": 2.9800641536712646, "step": 6033, "token_acc": 0.30277789390075666 }, { "epoch": 3.537085898563471, "grad_norm": 0.34209049206529407, "learning_rate": 0.0001913303512495572, "loss": 2.939664363861084, "step": 6034, "token_acc": 0.3076003974064003 }, { "epoch": 3.53767223688068, "grad_norm": 0.2896585574680046, "learning_rate": 0.0001913264034315644, "loss": 3.009415626525879, "step": 6035, "token_acc": 0.2963400360341404 }, { "epoch": 3.538258575197889, "grad_norm": 0.3105310238269969, "learning_rate": 0.00019132245475568312, "loss": 2.9269728660583496, "step": 6036, "token_acc": 0.30827511800404583 }, { "epoch": 3.5388449135150983, "grad_norm": 0.3147159700402227, "learning_rate": 0.0001913185052219504, "loss": 2.9490199089050293, "step": 6037, "token_acc": 0.3049728133470569 }, { "epoch": 3.5394312518323074, "grad_norm": 0.3072956211798509, "learning_rate": 0.0001913145548304034, "loss": 2.9548277854919434, "step": 6038, "token_acc": 0.30501826778422525 }, { "epoch": 3.540017590149516, "grad_norm": 0.2529286585513444, "learning_rate": 0.00019131060358107922, "loss": 2.991983413696289, "step": 6039, "token_acc": 0.29849948795583064 }, { "epoch": 3.5406039284667252, "grad_norm": 0.2806392070049623, "learning_rate": 0.00019130665147401495, "loss": 3.004472255706787, "step": 6040, "token_acc": 0.2967159994180587 }, { "epoch": 3.5411902667839343, "grad_norm": 0.2478941367237467, "learning_rate": 0.00019130269850924772, "loss": 2.9572386741638184, "step": 6041, "token_acc": 0.30560991093845113 }, { "epoch": 3.5417766051011434, "grad_norm": 0.2813861725821734, "learning_rate": 0.0001912987446868147, "loss": 2.97723388671875, "step": 6042, "token_acc": 0.2999278058253404 }, { "epoch": 3.542362943418352, "grad_norm": 0.26339464834253684, "learning_rate": 0.00019129479000675294, "loss": 2.9609756469726562, "step": 6043, "token_acc": 0.30324932208574645 }, { "epoch": 3.5429492817355612, "grad_norm": 0.28261798315166753, "learning_rate": 0.0001912908344690997, "loss": 2.9773683547973633, "step": 6044, "token_acc": 0.30172688375253487 }, { "epoch": 3.5435356200527703, "grad_norm": 0.296373810890885, "learning_rate": 0.00019128687807389206, "loss": 2.982858180999756, "step": 6045, "token_acc": 0.3007664228660713 }, { "epoch": 3.5441219583699795, "grad_norm": 0.2509418418327762, "learning_rate": 0.00019128292082116723, "loss": 2.9923007488250732, "step": 6046, "token_acc": 0.2995224830879427 }, { "epoch": 3.5447082966871886, "grad_norm": 0.3074253950779621, "learning_rate": 0.00019127896271096235, "loss": 2.970160961151123, "step": 6047, "token_acc": 0.302632838926574 }, { "epoch": 3.5452946350043977, "grad_norm": 0.2638720717452515, "learning_rate": 0.00019127500374331463, "loss": 2.9973959922790527, "step": 6048, "token_acc": 0.29808754960056416 }, { "epoch": 3.545880973321607, "grad_norm": 0.29083701292481673, "learning_rate": 0.00019127104391826122, "loss": 2.9632906913757324, "step": 6049, "token_acc": 0.3022998305278589 }, { "epoch": 3.5464673116388155, "grad_norm": 0.2731822796334414, "learning_rate": 0.00019126708323583937, "loss": 2.9164652824401855, "step": 6050, "token_acc": 0.30911774309410306 }, { "epoch": 3.5470536499560246, "grad_norm": 0.2826184794460728, "learning_rate": 0.00019126312169608623, "loss": 3.0000228881835938, "step": 6051, "token_acc": 0.2981384833181905 }, { "epoch": 3.5476399882732337, "grad_norm": 0.2741298787269225, "learning_rate": 0.0001912591592990391, "loss": 2.9693102836608887, "step": 6052, "token_acc": 0.30116591973744616 }, { "epoch": 3.548226326590443, "grad_norm": 0.2934656590170281, "learning_rate": 0.00019125519604473506, "loss": 2.961246967315674, "step": 6053, "token_acc": 0.3043683353639471 }, { "epoch": 3.5488126649076515, "grad_norm": 0.23547395084827352, "learning_rate": 0.0001912512319332115, "loss": 3.0001492500305176, "step": 6054, "token_acc": 0.2987342940426961 }, { "epoch": 3.5493990032248606, "grad_norm": 0.302082094485623, "learning_rate": 0.00019124726696450554, "loss": 2.9944260120391846, "step": 6055, "token_acc": 0.2987072569724506 }, { "epoch": 3.5499853415420697, "grad_norm": 0.29697082626082094, "learning_rate": 0.00019124330113865442, "loss": 2.9592125415802, "step": 6056, "token_acc": 0.30211237246730854 }, { "epoch": 3.550571679859279, "grad_norm": 0.2440005186383839, "learning_rate": 0.00019123933445569548, "loss": 2.954178810119629, "step": 6057, "token_acc": 0.3049324539886926 }, { "epoch": 3.551158018176488, "grad_norm": 0.33028213890965935, "learning_rate": 0.00019123536691566595, "loss": 2.952855110168457, "step": 6058, "token_acc": 0.30381315235098405 }, { "epoch": 3.551744356493697, "grad_norm": 0.30310146488583645, "learning_rate": 0.00019123139851860309, "loss": 2.974757671356201, "step": 6059, "token_acc": 0.30103614226414693 }, { "epoch": 3.552330694810906, "grad_norm": 0.242211841473373, "learning_rate": 0.00019122742926454416, "loss": 2.9565038681030273, "step": 6060, "token_acc": 0.30527864930377024 }, { "epoch": 3.552917033128115, "grad_norm": 0.2554172839321172, "learning_rate": 0.00019122345915352647, "loss": 2.9752566814422607, "step": 6061, "token_acc": 0.30418943533697634 }, { "epoch": 3.553503371445324, "grad_norm": 0.2513359516711458, "learning_rate": 0.0001912194881855873, "loss": 2.977457046508789, "step": 6062, "token_acc": 0.3022908379473136 }, { "epoch": 3.554089709762533, "grad_norm": 0.2221724307294777, "learning_rate": 0.00019121551636076397, "loss": 2.9891786575317383, "step": 6063, "token_acc": 0.30040666843516356 }, { "epoch": 3.554676048079742, "grad_norm": 0.27445306566602073, "learning_rate": 0.00019121154367909374, "loss": 2.951536178588867, "step": 6064, "token_acc": 0.3063825793129341 }, { "epoch": 3.555262386396951, "grad_norm": 0.23490876893988244, "learning_rate": 0.00019120757014061402, "loss": 2.9403176307678223, "step": 6065, "token_acc": 0.306334242107677 }, { "epoch": 3.55584872471416, "grad_norm": 0.2516118373617787, "learning_rate": 0.00019120359574536204, "loss": 2.9542317390441895, "step": 6066, "token_acc": 0.30294476735085724 }, { "epoch": 3.556435063031369, "grad_norm": 0.2679040148691342, "learning_rate": 0.0001911996204933752, "loss": 3.0217373371124268, "step": 6067, "token_acc": 0.29510657050626105 }, { "epoch": 3.557021401348578, "grad_norm": 0.24653047779150936, "learning_rate": 0.00019119564438469083, "loss": 2.995137929916382, "step": 6068, "token_acc": 0.29786876254734357 }, { "epoch": 3.5576077396657872, "grad_norm": 0.2484886990235106, "learning_rate": 0.00019119166741934622, "loss": 3.000593662261963, "step": 6069, "token_acc": 0.2986035738952966 }, { "epoch": 3.5581940779829964, "grad_norm": 0.24711761129930102, "learning_rate": 0.00019118768959737882, "loss": 2.954603672027588, "step": 6070, "token_acc": 0.3050378924199513 }, { "epoch": 3.5587804163002055, "grad_norm": 0.228907024447038, "learning_rate": 0.00019118371091882594, "loss": 2.9427480697631836, "step": 6071, "token_acc": 0.3055746658337221 }, { "epoch": 3.559366754617414, "grad_norm": 0.2305080698567949, "learning_rate": 0.00019117973138372497, "loss": 2.985908031463623, "step": 6072, "token_acc": 0.2990233761019049 }, { "epoch": 3.5599530929346233, "grad_norm": 0.27209814437291874, "learning_rate": 0.0001911757509921133, "loss": 2.967372417449951, "step": 6073, "token_acc": 0.30308258482089934 }, { "epoch": 3.5605394312518324, "grad_norm": 0.3554476935124714, "learning_rate": 0.00019117176974402827, "loss": 2.9692063331604004, "step": 6074, "token_acc": 0.30515848018391994 }, { "epoch": 3.5611257695690415, "grad_norm": 0.3855799269006361, "learning_rate": 0.00019116778763950736, "loss": 3.009383201599121, "step": 6075, "token_acc": 0.29771396399318156 }, { "epoch": 3.56171210788625, "grad_norm": 0.23904301525289712, "learning_rate": 0.00019116380467858792, "loss": 2.9908876419067383, "step": 6076, "token_acc": 0.3011338105439156 }, { "epoch": 3.5622984462034593, "grad_norm": 0.3575263181958856, "learning_rate": 0.00019115982086130738, "loss": 2.9455742835998535, "step": 6077, "token_acc": 0.30734467085492934 }, { "epoch": 3.5628847845206684, "grad_norm": 0.33921296331241757, "learning_rate": 0.00019115583618770318, "loss": 2.958333969116211, "step": 6078, "token_acc": 0.30518011687452984 }, { "epoch": 3.5634711228378775, "grad_norm": 0.2646770780071063, "learning_rate": 0.00019115185065781272, "loss": 2.951984405517578, "step": 6079, "token_acc": 0.3062160521807272 }, { "epoch": 3.5640574611550866, "grad_norm": 0.3218159409310935, "learning_rate": 0.00019114786427167343, "loss": 2.9511821269989014, "step": 6080, "token_acc": 0.3055288741601833 }, { "epoch": 3.5646437994722957, "grad_norm": 0.2776972995036099, "learning_rate": 0.00019114387702932282, "loss": 2.9740653038024902, "step": 6081, "token_acc": 0.30222206701659565 }, { "epoch": 3.565230137789505, "grad_norm": 0.3172573973687408, "learning_rate": 0.00019113988893079825, "loss": 2.9364614486694336, "step": 6082, "token_acc": 0.30756670628201216 }, { "epoch": 3.5658164761067135, "grad_norm": 0.3393852101247649, "learning_rate": 0.0001911358999761373, "loss": 2.982854127883911, "step": 6083, "token_acc": 0.30059094504676376 }, { "epoch": 3.5664028144239226, "grad_norm": 0.32023888074587303, "learning_rate": 0.00019113191016537732, "loss": 2.972132682800293, "step": 6084, "token_acc": 0.30243817034460185 }, { "epoch": 3.5669891527411317, "grad_norm": 0.30657874179084604, "learning_rate": 0.00019112791949855588, "loss": 2.9916868209838867, "step": 6085, "token_acc": 0.299861806279459 }, { "epoch": 3.567575491058341, "grad_norm": 0.288638301280867, "learning_rate": 0.00019112392797571043, "loss": 2.993163824081421, "step": 6086, "token_acc": 0.2995280981383094 }, { "epoch": 3.5681618293755495, "grad_norm": 0.2925492244708194, "learning_rate": 0.00019111993559687846, "loss": 3.0008814334869385, "step": 6087, "token_acc": 0.2995426849039327 }, { "epoch": 3.5687481676927586, "grad_norm": 0.2917733902157744, "learning_rate": 0.00019111594236209748, "loss": 2.942704200744629, "step": 6088, "token_acc": 0.3065314344016577 }, { "epoch": 3.5693345060099677, "grad_norm": 0.24658253107296965, "learning_rate": 0.000191111948271405, "loss": 2.9848532676696777, "step": 6089, "token_acc": 0.2996251544937048 }, { "epoch": 3.569920844327177, "grad_norm": 0.2869888638321483, "learning_rate": 0.00019110795332483854, "loss": 2.9550015926361084, "step": 6090, "token_acc": 0.30549792265408476 }, { "epoch": 3.570507182644386, "grad_norm": 0.27216799171897926, "learning_rate": 0.00019110395752243564, "loss": 2.979416847229004, "step": 6091, "token_acc": 0.3008749185545477 }, { "epoch": 3.571093520961595, "grad_norm": 0.2660240850290858, "learning_rate": 0.00019109996086423382, "loss": 2.9608914852142334, "step": 6092, "token_acc": 0.303288056206089 }, { "epoch": 3.5716798592788037, "grad_norm": 0.2926574999332826, "learning_rate": 0.00019109596335027063, "loss": 2.943258762359619, "step": 6093, "token_acc": 0.30629834533922445 }, { "epoch": 3.572266197596013, "grad_norm": 0.3341132726023575, "learning_rate": 0.00019109196498058362, "loss": 2.951904296875, "step": 6094, "token_acc": 0.3052081071394 }, { "epoch": 3.572852535913222, "grad_norm": 0.3000757445052186, "learning_rate": 0.0001910879657552103, "loss": 2.975396156311035, "step": 6095, "token_acc": 0.30034560559228973 }, { "epoch": 3.573438874230431, "grad_norm": 0.30953948851146373, "learning_rate": 0.00019108396567418833, "loss": 2.9296205043792725, "step": 6096, "token_acc": 0.3092009539756386 }, { "epoch": 3.5740252125476397, "grad_norm": 0.2546143620690671, "learning_rate": 0.00019107996473755523, "loss": 2.9269824028015137, "step": 6097, "token_acc": 0.3071438952733917 }, { "epoch": 3.574611550864849, "grad_norm": 0.26516970969182374, "learning_rate": 0.0001910759629453486, "loss": 2.9254884719848633, "step": 6098, "token_acc": 0.3090405444715539 }, { "epoch": 3.575197889182058, "grad_norm": 0.25674193175851545, "learning_rate": 0.00019107196029760602, "loss": 2.9549310207366943, "step": 6099, "token_acc": 0.3044577241734409 }, { "epoch": 3.575784227499267, "grad_norm": 0.27554819296089095, "learning_rate": 0.0001910679567943651, "loss": 2.983686685562134, "step": 6100, "token_acc": 0.2995202908809435 }, { "epoch": 3.576370565816476, "grad_norm": 0.26104134159790215, "learning_rate": 0.00019106395243566343, "loss": 2.94295334815979, "step": 6101, "token_acc": 0.30498578863143594 }, { "epoch": 3.5769569041336853, "grad_norm": 0.23322966523244412, "learning_rate": 0.00019105994722153863, "loss": 2.9615185260772705, "step": 6102, "token_acc": 0.3033024819574927 }, { "epoch": 3.5775432424508944, "grad_norm": 0.23826787140074537, "learning_rate": 0.00019105594115202833, "loss": 2.9407341480255127, "step": 6103, "token_acc": 0.3082021583284435 }, { "epoch": 3.578129580768103, "grad_norm": 0.23653677545330673, "learning_rate": 0.0001910519342271702, "loss": 2.9667861461639404, "step": 6104, "token_acc": 0.30217488813182825 }, { "epoch": 3.578715919085312, "grad_norm": 0.2659788321623413, "learning_rate": 0.0001910479264470018, "loss": 2.9569573402404785, "step": 6105, "token_acc": 0.30457583058081816 }, { "epoch": 3.5793022574025213, "grad_norm": 0.25319488649102495, "learning_rate": 0.00019104391781156084, "loss": 2.93888258934021, "step": 6106, "token_acc": 0.30696923564210765 }, { "epoch": 3.5798885957197304, "grad_norm": 0.2358698665775989, "learning_rate": 0.00019103990832088498, "loss": 3.006730556488037, "step": 6107, "token_acc": 0.2973490717525425 }, { "epoch": 3.580474934036939, "grad_norm": 0.24685626565436072, "learning_rate": 0.0001910358979750118, "loss": 3.016669273376465, "step": 6108, "token_acc": 0.2964768522411908 }, { "epoch": 3.581061272354148, "grad_norm": 0.25600265813579076, "learning_rate": 0.00019103188677397904, "loss": 2.954880714416504, "step": 6109, "token_acc": 0.3040211349756399 }, { "epoch": 3.5816476106713573, "grad_norm": 0.26135045808462115, "learning_rate": 0.00019102787471782443, "loss": 2.953213691711426, "step": 6110, "token_acc": 0.30506230849904376 }, { "epoch": 3.5822339489885664, "grad_norm": 0.2765286104288164, "learning_rate": 0.00019102386180658556, "loss": 2.936375379562378, "step": 6111, "token_acc": 0.3070043711314714 }, { "epoch": 3.5828202873057755, "grad_norm": 0.26587797202154273, "learning_rate": 0.00019101984804030016, "loss": 3.0120368003845215, "step": 6112, "token_acc": 0.29737433107710265 }, { "epoch": 3.5834066256229846, "grad_norm": 0.23921825868568486, "learning_rate": 0.00019101583341900593, "loss": 3.000037431716919, "step": 6113, "token_acc": 0.2975017080753566 }, { "epoch": 3.5839929639401937, "grad_norm": 0.25513573628948827, "learning_rate": 0.0001910118179427406, "loss": 2.968623399734497, "step": 6114, "token_acc": 0.30301765419895416 }, { "epoch": 3.5845793022574024, "grad_norm": 0.26355806188497044, "learning_rate": 0.00019100780161154188, "loss": 2.9707489013671875, "step": 6115, "token_acc": 0.30307186938041525 }, { "epoch": 3.5851656405746115, "grad_norm": 0.26008214800277046, "learning_rate": 0.00019100378442544753, "loss": 2.971597194671631, "step": 6116, "token_acc": 0.3011936886388191 }, { "epoch": 3.5857519788918206, "grad_norm": 0.2918888880575415, "learning_rate": 0.00019099976638449522, "loss": 2.97985577583313, "step": 6117, "token_acc": 0.3021927156346708 }, { "epoch": 3.5863383172090297, "grad_norm": 0.3258452177376403, "learning_rate": 0.00019099574748872273, "loss": 2.947326183319092, "step": 6118, "token_acc": 0.30692099531384065 }, { "epoch": 3.5869246555262384, "grad_norm": 0.3602005324913786, "learning_rate": 0.00019099172773816782, "loss": 2.9568371772766113, "step": 6119, "token_acc": 0.3052741370386073 }, { "epoch": 3.5875109938434475, "grad_norm": 0.31550800355097924, "learning_rate": 0.00019098770713286823, "loss": 2.994813919067383, "step": 6120, "token_acc": 0.2986731259109386 }, { "epoch": 3.5880973321606566, "grad_norm": 0.28033070210320354, "learning_rate": 0.00019098368567286173, "loss": 2.938511848449707, "step": 6121, "token_acc": 0.3075197295786934 }, { "epoch": 3.5886836704778657, "grad_norm": 0.2594408708124164, "learning_rate": 0.00019097966335818615, "loss": 2.9175825119018555, "step": 6122, "token_acc": 0.30976245131670577 }, { "epoch": 3.589270008795075, "grad_norm": 0.3366059890129282, "learning_rate": 0.0001909756401888792, "loss": 2.9433441162109375, "step": 6123, "token_acc": 0.3052279416435514 }, { "epoch": 3.589856347112284, "grad_norm": 0.30712621591065287, "learning_rate": 0.0001909716161649787, "loss": 2.9516336917877197, "step": 6124, "token_acc": 0.305059607960066 }, { "epoch": 3.590442685429493, "grad_norm": 0.26793115054346023, "learning_rate": 0.00019096759128652243, "loss": 2.967846393585205, "step": 6125, "token_acc": 0.3030857287018495 }, { "epoch": 3.5910290237467017, "grad_norm": 0.32182466652422687, "learning_rate": 0.00019096356555354827, "loss": 2.984340190887451, "step": 6126, "token_acc": 0.2993270314102532 }, { "epoch": 3.591615362063911, "grad_norm": 0.3041975856351586, "learning_rate": 0.00019095953896609396, "loss": 2.959488868713379, "step": 6127, "token_acc": 0.304567551544694 }, { "epoch": 3.59220170038112, "grad_norm": 0.2961943370722895, "learning_rate": 0.00019095551152419735, "loss": 2.9880523681640625, "step": 6128, "token_acc": 0.29803527268335966 }, { "epoch": 3.592788038698329, "grad_norm": 0.3028784154324309, "learning_rate": 0.00019095148322789628, "loss": 2.9437203407287598, "step": 6129, "token_acc": 0.3065974958638435 }, { "epoch": 3.5933743770155377, "grad_norm": 0.26249348856534693, "learning_rate": 0.00019094745407722855, "loss": 2.9660019874572754, "step": 6130, "token_acc": 0.3023608228901138 }, { "epoch": 3.593960715332747, "grad_norm": 0.2583792421501495, "learning_rate": 0.0001909434240722321, "loss": 2.9663591384887695, "step": 6131, "token_acc": 0.30201498125386467 }, { "epoch": 3.594547053649956, "grad_norm": 0.25411573785947217, "learning_rate": 0.00019093939321294468, "loss": 2.9547524452209473, "step": 6132, "token_acc": 0.3052255382385491 }, { "epoch": 3.595133391967165, "grad_norm": 0.25061990353708913, "learning_rate": 0.00019093536149940424, "loss": 2.969355583190918, "step": 6133, "token_acc": 0.3021969333791812 }, { "epoch": 3.595719730284374, "grad_norm": 0.22452123697237805, "learning_rate": 0.00019093132893164858, "loss": 2.9826812744140625, "step": 6134, "token_acc": 0.30174376165887157 }, { "epoch": 3.5963060686015833, "grad_norm": 0.2772123160710859, "learning_rate": 0.00019092729550971565, "loss": 2.9628429412841797, "step": 6135, "token_acc": 0.30360325275382805 }, { "epoch": 3.5968924069187924, "grad_norm": 0.3629147243429197, "learning_rate": 0.0001909232612336433, "loss": 3.001366138458252, "step": 6136, "token_acc": 0.2981142773950823 }, { "epoch": 3.597478745236001, "grad_norm": 0.34757445611169524, "learning_rate": 0.0001909192261034694, "loss": 2.9764175415039062, "step": 6137, "token_acc": 0.3019083597742569 }, { "epoch": 3.59806508355321, "grad_norm": 0.23084649595729798, "learning_rate": 0.0001909151901192319, "loss": 2.9358725547790527, "step": 6138, "token_acc": 0.3073871889415633 }, { "epoch": 3.5986514218704193, "grad_norm": 0.3076775667030008, "learning_rate": 0.0001909111532809687, "loss": 2.978053331375122, "step": 6139, "token_acc": 0.3014994269790495 }, { "epoch": 3.5992377601876284, "grad_norm": 0.25896592089346787, "learning_rate": 0.00019090711558871775, "loss": 2.988049030303955, "step": 6140, "token_acc": 0.2986257299901479 }, { "epoch": 3.599824098504837, "grad_norm": 0.2903077755864694, "learning_rate": 0.0001909030770425169, "loss": 2.9821600914001465, "step": 6141, "token_acc": 0.3007345066962317 }, { "epoch": 3.600410436822046, "grad_norm": 0.2999308038807419, "learning_rate": 0.00019089903764240416, "loss": 2.97176194190979, "step": 6142, "token_acc": 0.3023460100826655 }, { "epoch": 3.6009967751392553, "grad_norm": 0.28123317075303567, "learning_rate": 0.00019089499738841745, "loss": 2.9827146530151367, "step": 6143, "token_acc": 0.3005290512278958 }, { "epoch": 3.6015831134564644, "grad_norm": 0.2654358985731293, "learning_rate": 0.00019089095628059473, "loss": 2.9305708408355713, "step": 6144, "token_acc": 0.3084093706344549 }, { "epoch": 3.6021694517736735, "grad_norm": 0.23664756003134604, "learning_rate": 0.00019088691431897394, "loss": 2.964878559112549, "step": 6145, "token_acc": 0.3014300569989914 }, { "epoch": 3.6027557900908826, "grad_norm": 0.3236881071848014, "learning_rate": 0.00019088287150359305, "loss": 2.982977867126465, "step": 6146, "token_acc": 0.2999882917691137 }, { "epoch": 3.6033421284080913, "grad_norm": 0.2757575712527811, "learning_rate": 0.00019087882783449004, "loss": 2.931215286254883, "step": 6147, "token_acc": 0.3071265580689837 }, { "epoch": 3.6039284667253004, "grad_norm": 0.2550000582798702, "learning_rate": 0.00019087478331170294, "loss": 2.966728448867798, "step": 6148, "token_acc": 0.3024278262295418 }, { "epoch": 3.6045148050425095, "grad_norm": 0.26363615805449864, "learning_rate": 0.00019087073793526971, "loss": 2.9874563217163086, "step": 6149, "token_acc": 0.30007298146667927 }, { "epoch": 3.6051011433597187, "grad_norm": 0.2723891445238067, "learning_rate": 0.00019086669170522832, "loss": 3.021811008453369, "step": 6150, "token_acc": 0.29487909779308397 }, { "epoch": 3.6056874816769273, "grad_norm": 0.29245624329354564, "learning_rate": 0.0001908626446216168, "loss": 2.9864869117736816, "step": 6151, "token_acc": 0.29999313063420097 }, { "epoch": 3.6062738199941364, "grad_norm": 0.2869472107341945, "learning_rate": 0.0001908585966844732, "loss": 2.988417148590088, "step": 6152, "token_acc": 0.2997824874949941 }, { "epoch": 3.6068601583113455, "grad_norm": 0.3238962966920359, "learning_rate": 0.0001908545478938355, "loss": 2.980520248413086, "step": 6153, "token_acc": 0.30055756878825246 }, { "epoch": 3.6074464966285547, "grad_norm": 0.2584199792354374, "learning_rate": 0.00019085049824974176, "loss": 3.0198869705200195, "step": 6154, "token_acc": 0.2940510465702757 }, { "epoch": 3.6080328349457638, "grad_norm": 0.2815049814019709, "learning_rate": 0.00019084644775222998, "loss": 2.9583446979522705, "step": 6155, "token_acc": 0.30336045059536354 }, { "epoch": 3.608619173262973, "grad_norm": 0.2788215288643938, "learning_rate": 0.00019084239640133828, "loss": 2.998249053955078, "step": 6156, "token_acc": 0.2972175999380829 }, { "epoch": 3.609205511580182, "grad_norm": 0.23771260423215942, "learning_rate": 0.00019083834419710466, "loss": 2.96685791015625, "step": 6157, "token_acc": 0.30172644167802193 }, { "epoch": 3.6097918498973907, "grad_norm": 0.26435356443270297, "learning_rate": 0.0001908342911395672, "loss": 2.9485459327697754, "step": 6158, "token_acc": 0.30718357422666487 }, { "epoch": 3.6103781882145998, "grad_norm": 0.2518544107212388, "learning_rate": 0.000190830237228764, "loss": 2.939924716949463, "step": 6159, "token_acc": 0.3062974715991997 }, { "epoch": 3.610964526531809, "grad_norm": 0.2556012043150553, "learning_rate": 0.0001908261824647331, "loss": 2.9505722522735596, "step": 6160, "token_acc": 0.30520046032782805 }, { "epoch": 3.611550864849018, "grad_norm": 0.22653556991068524, "learning_rate": 0.0001908221268475126, "loss": 2.9202792644500732, "step": 6161, "token_acc": 0.3109134807363274 }, { "epoch": 3.6121372031662267, "grad_norm": 0.25990243821009557, "learning_rate": 0.0001908180703771406, "loss": 2.985656261444092, "step": 6162, "token_acc": 0.30095158783622505 }, { "epoch": 3.6127235414834358, "grad_norm": 0.29619555821171106, "learning_rate": 0.00019081401305365522, "loss": 2.97200870513916, "step": 6163, "token_acc": 0.3022150736258777 }, { "epoch": 3.613309879800645, "grad_norm": 0.2941226098620405, "learning_rate": 0.00019080995487709456, "loss": 2.9388225078582764, "step": 6164, "token_acc": 0.3075512075817793 }, { "epoch": 3.613896218117854, "grad_norm": 0.3468725655991898, "learning_rate": 0.00019080589584749672, "loss": 3.005979537963867, "step": 6165, "token_acc": 0.29630646676282035 }, { "epoch": 3.614482556435063, "grad_norm": 0.3220289794508712, "learning_rate": 0.00019080183596489986, "loss": 3.045868396759033, "step": 6166, "token_acc": 0.29053471018393784 }, { "epoch": 3.615068894752272, "grad_norm": 0.27399430655068224, "learning_rate": 0.00019079777522934213, "loss": 2.94460391998291, "step": 6167, "token_acc": 0.3046594322688484 }, { "epoch": 3.6156552330694813, "grad_norm": 0.30118627259141817, "learning_rate": 0.00019079371364086166, "loss": 2.9589529037475586, "step": 6168, "token_acc": 0.3045903235801868 }, { "epoch": 3.61624157138669, "grad_norm": 0.2930012446351192, "learning_rate": 0.00019078965119949655, "loss": 2.98988676071167, "step": 6169, "token_acc": 0.2992364991170486 }, { "epoch": 3.616827909703899, "grad_norm": 0.27856118291067317, "learning_rate": 0.000190785587905285, "loss": 2.945614814758301, "step": 6170, "token_acc": 0.3065683182858871 }, { "epoch": 3.6174142480211082, "grad_norm": 0.26361483267455377, "learning_rate": 0.00019078152375826525, "loss": 2.963853597640991, "step": 6171, "token_acc": 0.3052314062285192 }, { "epoch": 3.6180005863383173, "grad_norm": 0.26750377185294894, "learning_rate": 0.00019077745875847537, "loss": 2.9795773029327393, "step": 6172, "token_acc": 0.3012130103410026 }, { "epoch": 3.618586924655526, "grad_norm": 0.2663804956131301, "learning_rate": 0.0001907733929059536, "loss": 2.944855213165283, "step": 6173, "token_acc": 0.30556487865726195 }, { "epoch": 3.619173262972735, "grad_norm": 0.33296952111803024, "learning_rate": 0.00019076932620073816, "loss": 2.9526374340057373, "step": 6174, "token_acc": 0.30584491692972887 }, { "epoch": 3.6197596012899442, "grad_norm": 0.365944764407279, "learning_rate": 0.00019076525864286715, "loss": 3.0436816215515137, "step": 6175, "token_acc": 0.2919612503765356 }, { "epoch": 3.6203459396071533, "grad_norm": 0.3177944417820769, "learning_rate": 0.00019076119023237888, "loss": 2.9874870777130127, "step": 6176, "token_acc": 0.29948480996832805 }, { "epoch": 3.6209322779243625, "grad_norm": 0.2843870348356647, "learning_rate": 0.00019075712096931153, "loss": 2.984544515609741, "step": 6177, "token_acc": 0.30142010088487786 }, { "epoch": 3.6215186162415716, "grad_norm": 0.32361294931390544, "learning_rate": 0.00019075305085370332, "loss": 2.9647340774536133, "step": 6178, "token_acc": 0.3025995583722277 }, { "epoch": 3.6221049545587807, "grad_norm": 0.2889808386304798, "learning_rate": 0.00019074897988559248, "loss": 2.956777572631836, "step": 6179, "token_acc": 0.3052317645422657 }, { "epoch": 3.6226912928759893, "grad_norm": 0.27006920715000504, "learning_rate": 0.00019074490806501727, "loss": 2.9542698860168457, "step": 6180, "token_acc": 0.30556562654648517 }, { "epoch": 3.6232776311931985, "grad_norm": 0.2868417760748693, "learning_rate": 0.00019074083539201593, "loss": 2.9923036098480225, "step": 6181, "token_acc": 0.299751620681528 }, { "epoch": 3.6238639695104076, "grad_norm": 0.30061755622675707, "learning_rate": 0.0001907367618666267, "loss": 3.0001049041748047, "step": 6182, "token_acc": 0.29682224719160816 }, { "epoch": 3.6244503078276167, "grad_norm": 0.25275165826890705, "learning_rate": 0.00019073268748888786, "loss": 2.940582275390625, "step": 6183, "token_acc": 0.306372486131709 }, { "epoch": 3.6250366461448253, "grad_norm": 0.2962188534729802, "learning_rate": 0.0001907286122588377, "loss": 2.954368829727173, "step": 6184, "token_acc": 0.30511541993209335 }, { "epoch": 3.6256229844620345, "grad_norm": 0.2781403435619372, "learning_rate": 0.00019072453617651448, "loss": 2.9168784618377686, "step": 6185, "token_acc": 0.3099325113494031 }, { "epoch": 3.6262093227792436, "grad_norm": 0.26699117779111864, "learning_rate": 0.00019072045924195652, "loss": 2.9802467823028564, "step": 6186, "token_acc": 0.30024963029937185 }, { "epoch": 3.6267956610964527, "grad_norm": 0.3306638468825156, "learning_rate": 0.00019071638145520204, "loss": 2.9505598545074463, "step": 6187, "token_acc": 0.30659413384159256 }, { "epoch": 3.627381999413662, "grad_norm": 0.2860276604339937, "learning_rate": 0.00019071230281628946, "loss": 2.9598701000213623, "step": 6188, "token_acc": 0.3027730527730528 }, { "epoch": 3.627968337730871, "grad_norm": 0.2711790997606585, "learning_rate": 0.000190708223325257, "loss": 2.9696645736694336, "step": 6189, "token_acc": 0.3020161050772579 }, { "epoch": 3.62855467604808, "grad_norm": 0.33376570529357985, "learning_rate": 0.000190704142982143, "loss": 2.9799728393554688, "step": 6190, "token_acc": 0.30156798414972547 }, { "epoch": 3.6291410143652887, "grad_norm": 0.2705693947280847, "learning_rate": 0.0001907000617869858, "loss": 2.982224941253662, "step": 6191, "token_acc": 0.299917279388226 }, { "epoch": 3.629727352682498, "grad_norm": 0.2917094012425391, "learning_rate": 0.00019069597973982378, "loss": 2.9821066856384277, "step": 6192, "token_acc": 0.300698863369866 }, { "epoch": 3.630313690999707, "grad_norm": 0.2683510046575638, "learning_rate": 0.0001906918968406952, "loss": 2.9758899211883545, "step": 6193, "token_acc": 0.3019360667057791 }, { "epoch": 3.630900029316916, "grad_norm": 0.2586959415928914, "learning_rate": 0.0001906878130896385, "loss": 2.98081111907959, "step": 6194, "token_acc": 0.300010272741281 }, { "epoch": 3.6314863676341247, "grad_norm": 0.29747046318066234, "learning_rate": 0.00019068372848669198, "loss": 2.967271327972412, "step": 6195, "token_acc": 0.30302388583145884 }, { "epoch": 3.632072705951334, "grad_norm": 0.2655527102125555, "learning_rate": 0.000190679643031894, "loss": 2.9688143730163574, "step": 6196, "token_acc": 0.30188255708960304 }, { "epoch": 3.632659044268543, "grad_norm": 0.2539137250025021, "learning_rate": 0.00019067555672528302, "loss": 3.0024023056030273, "step": 6197, "token_acc": 0.29832192066918367 }, { "epoch": 3.633245382585752, "grad_norm": 0.25450568310885174, "learning_rate": 0.00019067146956689733, "loss": 2.974299192428589, "step": 6198, "token_acc": 0.30229438055909347 }, { "epoch": 3.633831720902961, "grad_norm": 0.2907435385167291, "learning_rate": 0.00019066738155677537, "loss": 3.0151805877685547, "step": 6199, "token_acc": 0.2955186169868517 }, { "epoch": 3.6344180592201702, "grad_norm": 0.3122053922969575, "learning_rate": 0.00019066329269495555, "loss": 2.9650015830993652, "step": 6200, "token_acc": 0.30378918611743977 }, { "epoch": 3.635004397537379, "grad_norm": 0.21337769052105093, "learning_rate": 0.00019065920298147625, "loss": 2.951798915863037, "step": 6201, "token_acc": 0.3062927930644751 }, { "epoch": 3.635590735854588, "grad_norm": 0.3442300718367786, "learning_rate": 0.00019065511241637593, "loss": 3.0073294639587402, "step": 6202, "token_acc": 0.2951863729478251 }, { "epoch": 3.636177074171797, "grad_norm": 0.2979926028903667, "learning_rate": 0.00019065102099969297, "loss": 2.9703011512756348, "step": 6203, "token_acc": 0.3023549708184979 }, { "epoch": 3.6367634124890063, "grad_norm": 0.284621434168284, "learning_rate": 0.0001906469287314658, "loss": 2.9915499687194824, "step": 6204, "token_acc": 0.29943188025783896 }, { "epoch": 3.637349750806215, "grad_norm": 0.3207872178620921, "learning_rate": 0.0001906428356117329, "loss": 2.975562572479248, "step": 6205, "token_acc": 0.3026583282799005 }, { "epoch": 3.637936089123424, "grad_norm": 0.2781573544098772, "learning_rate": 0.00019063874164053273, "loss": 2.973410129547119, "step": 6206, "token_acc": 0.30082479699573345 }, { "epoch": 3.638522427440633, "grad_norm": 0.2819400047466067, "learning_rate": 0.00019063464681790367, "loss": 2.9945993423461914, "step": 6207, "token_acc": 0.29973346190726424 }, { "epoch": 3.6391087657578423, "grad_norm": 0.29518411052627225, "learning_rate": 0.00019063055114388428, "loss": 2.9451169967651367, "step": 6208, "token_acc": 0.3058016239202031 }, { "epoch": 3.6396951040750514, "grad_norm": 0.315084499386678, "learning_rate": 0.00019062645461851297, "loss": 2.9967751502990723, "step": 6209, "token_acc": 0.2979920340471109 }, { "epoch": 3.6402814423922605, "grad_norm": 0.3202276478155651, "learning_rate": 0.00019062235724182823, "loss": 2.976663589477539, "step": 6210, "token_acc": 0.3009384899547007 }, { "epoch": 3.6408677807094696, "grad_norm": 0.3143574720813603, "learning_rate": 0.00019061825901386858, "loss": 2.9756641387939453, "step": 6211, "token_acc": 0.3021067168863779 }, { "epoch": 3.6414541190266783, "grad_norm": 0.2948584087582355, "learning_rate": 0.00019061415993467247, "loss": 2.9689507484436035, "step": 6212, "token_acc": 0.3009819320344924 }, { "epoch": 3.6420404573438874, "grad_norm": 0.2853401118200396, "learning_rate": 0.00019061006000427845, "loss": 2.9341278076171875, "step": 6213, "token_acc": 0.3084795649842232 }, { "epoch": 3.6426267956610965, "grad_norm": 0.30364684230206446, "learning_rate": 0.000190605959222725, "loss": 2.9727964401245117, "step": 6214, "token_acc": 0.3018801561240908 }, { "epoch": 3.6432131339783056, "grad_norm": 0.25438090983228817, "learning_rate": 0.0001906018575900507, "loss": 3.007850170135498, "step": 6215, "token_acc": 0.29763555839696426 }, { "epoch": 3.6437994722955143, "grad_norm": 0.30752962351742735, "learning_rate": 0.00019059775510629399, "loss": 3.0018739700317383, "step": 6216, "token_acc": 0.2978389363369506 }, { "epoch": 3.6443858106127234, "grad_norm": 0.2918374990459936, "learning_rate": 0.00019059365177149346, "loss": 2.941260814666748, "step": 6217, "token_acc": 0.30619276545142243 }, { "epoch": 3.6449721489299325, "grad_norm": 0.2812347764106457, "learning_rate": 0.00019058954758568766, "loss": 2.96766996383667, "step": 6218, "token_acc": 0.30441127038020466 }, { "epoch": 3.6455584872471416, "grad_norm": 0.27273104363229456, "learning_rate": 0.00019058544254891514, "loss": 3.00252628326416, "step": 6219, "token_acc": 0.2983004605526632 }, { "epoch": 3.6461448255643507, "grad_norm": 0.30926508385875995, "learning_rate": 0.0001905813366612144, "loss": 2.932182788848877, "step": 6220, "token_acc": 0.308145139894032 }, { "epoch": 3.64673116388156, "grad_norm": 0.33176825152152195, "learning_rate": 0.00019057722992262411, "loss": 2.997267961502075, "step": 6221, "token_acc": 0.29897303303116207 }, { "epoch": 3.647317502198769, "grad_norm": 0.31339646878857463, "learning_rate": 0.0001905731223331828, "loss": 2.9566590785980225, "step": 6222, "token_acc": 0.3038203311201907 }, { "epoch": 3.6479038405159776, "grad_norm": 0.3383958252657143, "learning_rate": 0.00019056901389292901, "loss": 2.9743757247924805, "step": 6223, "token_acc": 0.3006694976067325 }, { "epoch": 3.6484901788331867, "grad_norm": 0.29777665194718855, "learning_rate": 0.00019056490460190144, "loss": 2.971754312515259, "step": 6224, "token_acc": 0.30263735974639255 }, { "epoch": 3.649076517150396, "grad_norm": 0.2933986445680828, "learning_rate": 0.0001905607944601386, "loss": 2.9496264457702637, "step": 6225, "token_acc": 0.3050920278346798 }, { "epoch": 3.649662855467605, "grad_norm": 0.28903595672131344, "learning_rate": 0.0001905566834676791, "loss": 2.9644532203674316, "step": 6226, "token_acc": 0.3025563854485099 }, { "epoch": 3.6502491937848136, "grad_norm": 0.2844361000020329, "learning_rate": 0.00019055257162456162, "loss": 2.9940433502197266, "step": 6227, "token_acc": 0.2994151163910573 }, { "epoch": 3.6508355321020227, "grad_norm": 0.287571379422993, "learning_rate": 0.00019054845893082476, "loss": 2.975808620452881, "step": 6228, "token_acc": 0.3021555390176483 }, { "epoch": 3.651421870419232, "grad_norm": 0.292445357082647, "learning_rate": 0.00019054434538650714, "loss": 2.9213790893554688, "step": 6229, "token_acc": 0.3103839337250903 }, { "epoch": 3.652008208736441, "grad_norm": 0.3243218123384122, "learning_rate": 0.00019054023099164736, "loss": 3.005901336669922, "step": 6230, "token_acc": 0.29821217652743476 }, { "epoch": 3.65259454705365, "grad_norm": 0.3684401545528447, "learning_rate": 0.00019053611574628416, "loss": 2.9830000400543213, "step": 6231, "token_acc": 0.3002663565933397 }, { "epoch": 3.653180885370859, "grad_norm": 0.41392585147505484, "learning_rate": 0.00019053199965045613, "loss": 2.9981765747070312, "step": 6232, "token_acc": 0.2974693519906179 }, { "epoch": 3.6537672236880683, "grad_norm": 0.32182968111089044, "learning_rate": 0.00019052788270420198, "loss": 2.9905354976654053, "step": 6233, "token_acc": 0.3023598309422546 }, { "epoch": 3.654353562005277, "grad_norm": 0.3466503631600139, "learning_rate": 0.00019052376490756034, "loss": 2.979701519012451, "step": 6234, "token_acc": 0.3010690120392589 }, { "epoch": 3.654939900322486, "grad_norm": 0.41647482494672516, "learning_rate": 0.00019051964626056993, "loss": 2.991868019104004, "step": 6235, "token_acc": 0.29920602726842893 }, { "epoch": 3.655526238639695, "grad_norm": 0.2799565199133241, "learning_rate": 0.0001905155267632694, "loss": 3.036208391189575, "step": 6236, "token_acc": 0.29476869984388265 }, { "epoch": 3.6561125769569043, "grad_norm": 0.3771354109494663, "learning_rate": 0.00019051140641569746, "loss": 2.9412827491760254, "step": 6237, "token_acc": 0.3054329158892378 }, { "epoch": 3.656698915274113, "grad_norm": 0.2533824598350181, "learning_rate": 0.00019050728521789284, "loss": 2.956327199935913, "step": 6238, "token_acc": 0.30332765386204047 }, { "epoch": 3.657285253591322, "grad_norm": 0.3377729027095872, "learning_rate": 0.00019050316316989422, "loss": 2.952402114868164, "step": 6239, "token_acc": 0.30571273966233137 }, { "epoch": 3.657871591908531, "grad_norm": 0.22786917596176479, "learning_rate": 0.00019049904027174038, "loss": 2.9367804527282715, "step": 6240, "token_acc": 0.30655910679005366 }, { "epoch": 3.6584579302257403, "grad_norm": 0.3189700110669627, "learning_rate": 0.00019049491652346995, "loss": 2.9897208213806152, "step": 6241, "token_acc": 0.3009005006225492 }, { "epoch": 3.6590442685429494, "grad_norm": 0.22156996188183542, "learning_rate": 0.00019049079192512175, "loss": 2.9745535850524902, "step": 6242, "token_acc": 0.30223500300942957 }, { "epoch": 3.6596306068601585, "grad_norm": 0.2668414452185026, "learning_rate": 0.0001904866664767345, "loss": 2.995163917541504, "step": 6243, "token_acc": 0.2987265562471643 }, { "epoch": 3.660216945177367, "grad_norm": 0.22995724363482903, "learning_rate": 0.00019048254017834694, "loss": 2.9742980003356934, "step": 6244, "token_acc": 0.30336239733741666 }, { "epoch": 3.6608032834945763, "grad_norm": 0.24405452652073414, "learning_rate": 0.00019047841302999785, "loss": 2.9605932235717773, "step": 6245, "token_acc": 0.3040994121211644 }, { "epoch": 3.6613896218117854, "grad_norm": 0.24877708211516808, "learning_rate": 0.000190474285031726, "loss": 2.9449081420898438, "step": 6246, "token_acc": 0.3063109010666817 }, { "epoch": 3.6619759601289945, "grad_norm": 0.2419367217666246, "learning_rate": 0.00019047015618357013, "loss": 3.0069422721862793, "step": 6247, "token_acc": 0.2973931388347044 }, { "epoch": 3.6625622984462036, "grad_norm": 0.26894027616857596, "learning_rate": 0.00019046602648556906, "loss": 2.9843950271606445, "step": 6248, "token_acc": 0.3008007421263756 }, { "epoch": 3.6631486367634123, "grad_norm": 0.2126682485340138, "learning_rate": 0.0001904618959377616, "loss": 3.0006322860717773, "step": 6249, "token_acc": 0.29686951486064383 }, { "epoch": 3.6637349750806214, "grad_norm": 0.26636978955626106, "learning_rate": 0.0001904577645401865, "loss": 2.9362521171569824, "step": 6250, "token_acc": 0.3067324907846235 }, { "epoch": 3.6643213133978305, "grad_norm": 0.23079087184621802, "learning_rate": 0.00019045363229288262, "loss": 2.989394187927246, "step": 6251, "token_acc": 0.30040518206536226 }, { "epoch": 3.6649076517150396, "grad_norm": 0.2515445115068219, "learning_rate": 0.00019044949919588873, "loss": 2.9568722248077393, "step": 6252, "token_acc": 0.3043862412112347 }, { "epoch": 3.6654939900322487, "grad_norm": 0.25468672884628824, "learning_rate": 0.0001904453652492437, "loss": 3.000718593597412, "step": 6253, "token_acc": 0.29907100982090656 }, { "epoch": 3.666080328349458, "grad_norm": 0.25922166180503803, "learning_rate": 0.0001904412304529863, "loss": 2.9637246131896973, "step": 6254, "token_acc": 0.3036730334684558 }, { "epoch": 3.6666666666666665, "grad_norm": 0.24102015358890252, "learning_rate": 0.00019043709480715543, "loss": 2.9836018085479736, "step": 6255, "token_acc": 0.3009246035524242 }, { "epoch": 3.6672530049838756, "grad_norm": 0.22824511692856825, "learning_rate": 0.00019043295831178993, "loss": 2.927424669265747, "step": 6256, "token_acc": 0.30931591594123475 }, { "epoch": 3.6678393433010847, "grad_norm": 0.25916895475754387, "learning_rate": 0.00019042882096692866, "loss": 2.974609375, "step": 6257, "token_acc": 0.30105942329138125 }, { "epoch": 3.668425681618294, "grad_norm": 0.2382323014523667, "learning_rate": 0.00019042468277261044, "loss": 2.9911837577819824, "step": 6258, "token_acc": 0.3013075042693211 }, { "epoch": 3.6690120199355025, "grad_norm": 0.254529195370672, "learning_rate": 0.0001904205437288742, "loss": 2.969275951385498, "step": 6259, "token_acc": 0.30247830044056884 }, { "epoch": 3.6695983582527116, "grad_norm": 0.25586360118694773, "learning_rate": 0.00019041640383575875, "loss": 2.929520606994629, "step": 6260, "token_acc": 0.30856523796349683 }, { "epoch": 3.6701846965699207, "grad_norm": 0.288498697968749, "learning_rate": 0.00019041226309330308, "loss": 2.9930315017700195, "step": 6261, "token_acc": 0.2994312383030594 }, { "epoch": 3.67077103488713, "grad_norm": 0.22877322672817335, "learning_rate": 0.000190408121501546, "loss": 3.008330821990967, "step": 6262, "token_acc": 0.2982720718468046 }, { "epoch": 3.671357373204339, "grad_norm": 0.27063142259504636, "learning_rate": 0.00019040397906052646, "loss": 2.9880032539367676, "step": 6263, "token_acc": 0.298337150307002 }, { "epoch": 3.671943711521548, "grad_norm": 0.23368492651493533, "learning_rate": 0.00019039983577028336, "loss": 2.924875259399414, "step": 6264, "token_acc": 0.30932840960868047 }, { "epoch": 3.672530049838757, "grad_norm": 0.2582833283735504, "learning_rate": 0.0001903956916308556, "loss": 2.9381537437438965, "step": 6265, "token_acc": 0.3068796449636561 }, { "epoch": 3.673116388155966, "grad_norm": 0.27916407381449165, "learning_rate": 0.00019039154664228213, "loss": 2.987145185470581, "step": 6266, "token_acc": 0.2997504599465344 }, { "epoch": 3.673702726473175, "grad_norm": 0.3225457540657182, "learning_rate": 0.0001903874008046019, "loss": 2.986154317855835, "step": 6267, "token_acc": 0.2998957454469242 }, { "epoch": 3.674289064790384, "grad_norm": 0.27221288643563646, "learning_rate": 0.00019038325411785382, "loss": 2.9574015140533447, "step": 6268, "token_acc": 0.3035075450214472 }, { "epoch": 3.674875403107593, "grad_norm": 0.2452292547901702, "learning_rate": 0.00019037910658207684, "loss": 2.929011583328247, "step": 6269, "token_acc": 0.31004418463447136 }, { "epoch": 3.675461741424802, "grad_norm": 0.2717992141555569, "learning_rate": 0.00019037495819731, "loss": 2.9656410217285156, "step": 6270, "token_acc": 0.3026585443329815 }, { "epoch": 3.676048079742011, "grad_norm": 0.2559583773015556, "learning_rate": 0.0001903708089635922, "loss": 3.0026004314422607, "step": 6271, "token_acc": 0.29758137461147993 }, { "epoch": 3.67663441805922, "grad_norm": 0.23217987874185483, "learning_rate": 0.00019036665888096238, "loss": 2.9892964363098145, "step": 6272, "token_acc": 0.2996508453218524 }, { "epoch": 3.677220756376429, "grad_norm": 0.27741439275831536, "learning_rate": 0.00019036250794945958, "loss": 2.9899230003356934, "step": 6273, "token_acc": 0.2983738166600388 }, { "epoch": 3.6778070946936383, "grad_norm": 0.24622681211937247, "learning_rate": 0.0001903583561691228, "loss": 2.951610565185547, "step": 6274, "token_acc": 0.30356155001185425 }, { "epoch": 3.6783934330108474, "grad_norm": 0.2573566691372984, "learning_rate": 0.00019035420353999101, "loss": 2.962080478668213, "step": 6275, "token_acc": 0.3034800608291731 }, { "epoch": 3.6789797713280565, "grad_norm": 0.30408817406016164, "learning_rate": 0.00019035005006210324, "loss": 2.9670677185058594, "step": 6276, "token_acc": 0.3024133993945552 }, { "epoch": 3.679566109645265, "grad_norm": 0.2729999438704715, "learning_rate": 0.00019034589573549852, "loss": 2.9653844833374023, "step": 6277, "token_acc": 0.3023982672178505 }, { "epoch": 3.6801524479624743, "grad_norm": 0.2696680662045605, "learning_rate": 0.00019034174056021584, "loss": 2.952261447906494, "step": 6278, "token_acc": 0.304149359984298 }, { "epoch": 3.6807387862796834, "grad_norm": 0.2778391845593696, "learning_rate": 0.0001903375845362942, "loss": 2.941913604736328, "step": 6279, "token_acc": 0.30735129540547224 }, { "epoch": 3.6813251245968925, "grad_norm": 0.2644557967380305, "learning_rate": 0.00019033342766377274, "loss": 2.979827404022217, "step": 6280, "token_acc": 0.3015176524767281 }, { "epoch": 3.681911462914101, "grad_norm": 0.2543370120967468, "learning_rate": 0.00019032926994269045, "loss": 2.960798501968384, "step": 6281, "token_acc": 0.30410753752438796 }, { "epoch": 3.6824978012313103, "grad_norm": 0.25910844602861055, "learning_rate": 0.00019032511137308635, "loss": 2.9486069679260254, "step": 6282, "token_acc": 0.3048707411974316 }, { "epoch": 3.6830841395485194, "grad_norm": 0.257511453464599, "learning_rate": 0.00019032095195499956, "loss": 2.973449230194092, "step": 6283, "token_acc": 0.30220434374774513 }, { "epoch": 3.6836704778657285, "grad_norm": 0.2817837062344389, "learning_rate": 0.00019031679168846917, "loss": 3.031688690185547, "step": 6284, "token_acc": 0.29244978972605434 }, { "epoch": 3.6842568161829377, "grad_norm": 0.2817957970667045, "learning_rate": 0.0001903126305735342, "loss": 2.964170455932617, "step": 6285, "token_acc": 0.3031678524583648 }, { "epoch": 3.6848431545001468, "grad_norm": 0.24806487159675453, "learning_rate": 0.00019030846861023374, "loss": 2.95097017288208, "step": 6286, "token_acc": 0.3054087659309916 }, { "epoch": 3.685429492817356, "grad_norm": 0.24150324801622605, "learning_rate": 0.00019030430579860692, "loss": 2.984927177429199, "step": 6287, "token_acc": 0.2991145841297998 }, { "epoch": 3.6860158311345645, "grad_norm": 0.2544207190906043, "learning_rate": 0.00019030014213869284, "loss": 2.9702696800231934, "step": 6288, "token_acc": 0.30226896689816524 }, { "epoch": 3.6866021694517737, "grad_norm": 0.2566797661180451, "learning_rate": 0.00019029597763053061, "loss": 2.978369951248169, "step": 6289, "token_acc": 0.3022135487074114 }, { "epoch": 3.6871885077689828, "grad_norm": 0.28976969616958237, "learning_rate": 0.00019029181227415935, "loss": 3.0182032585144043, "step": 6290, "token_acc": 0.29523811995670785 }, { "epoch": 3.687774846086192, "grad_norm": 0.29241306248037163, "learning_rate": 0.00019028764606961816, "loss": 2.9459493160247803, "step": 6291, "token_acc": 0.30576064359144034 }, { "epoch": 3.6883611844034006, "grad_norm": 0.22933930935580515, "learning_rate": 0.00019028347901694622, "loss": 2.942089557647705, "step": 6292, "token_acc": 0.30683110725343626 }, { "epoch": 3.6889475227206097, "grad_norm": 0.31712030748613307, "learning_rate": 0.00019027931111618266, "loss": 2.9646778106689453, "step": 6293, "token_acc": 0.30280976342625465 }, { "epoch": 3.6895338610378188, "grad_norm": 0.4116757377235896, "learning_rate": 0.00019027514236736662, "loss": 2.9486870765686035, "step": 6294, "token_acc": 0.30627205346721803 }, { "epoch": 3.690120199355028, "grad_norm": 0.30100712642999755, "learning_rate": 0.00019027097277053728, "loss": 2.9905476570129395, "step": 6295, "token_acc": 0.3003432700065262 }, { "epoch": 3.690706537672237, "grad_norm": 0.32578474489889564, "learning_rate": 0.00019026680232573376, "loss": 2.9521169662475586, "step": 6296, "token_acc": 0.30554650329140326 }, { "epoch": 3.691292875989446, "grad_norm": 0.3326382296483218, "learning_rate": 0.00019026263103299527, "loss": 2.9799866676330566, "step": 6297, "token_acc": 0.30086036786733056 }, { "epoch": 3.6918792143066548, "grad_norm": 0.260117217522024, "learning_rate": 0.000190258458892361, "loss": 2.9289636611938477, "step": 6298, "token_acc": 0.3070492040970618 }, { "epoch": 3.692465552623864, "grad_norm": 0.3447819424850371, "learning_rate": 0.00019025428590387016, "loss": 2.992405891418457, "step": 6299, "token_acc": 0.30223858817438704 }, { "epoch": 3.693051890941073, "grad_norm": 0.22559533891626973, "learning_rate": 0.0001902501120675619, "loss": 2.9502062797546387, "step": 6300, "token_acc": 0.30567795447268514 }, { "epoch": 3.693638229258282, "grad_norm": 0.2684349205698122, "learning_rate": 0.0001902459373834754, "loss": 2.950061798095703, "step": 6301, "token_acc": 0.30570234811956287 }, { "epoch": 3.6942245675754912, "grad_norm": 0.25520331733917195, "learning_rate": 0.00019024176185165, "loss": 2.984220504760742, "step": 6302, "token_acc": 0.30230165205506504 }, { "epoch": 3.6948109058927, "grad_norm": 0.25501063768549975, "learning_rate": 0.00019023758547212483, "loss": 2.969302177429199, "step": 6303, "token_acc": 0.30262633316569687 }, { "epoch": 3.695397244209909, "grad_norm": 0.26438871768553796, "learning_rate": 0.00019023340824493915, "loss": 3.025841474533081, "step": 6304, "token_acc": 0.2960849748737193 }, { "epoch": 3.695983582527118, "grad_norm": 0.2669508649230447, "learning_rate": 0.00019022923017013218, "loss": 2.9550399780273438, "step": 6305, "token_acc": 0.30319367709045564 }, { "epoch": 3.6965699208443272, "grad_norm": 0.23609734712239372, "learning_rate": 0.00019022505124774317, "loss": 2.9517624378204346, "step": 6306, "token_acc": 0.30398864678126924 }, { "epoch": 3.6971562591615363, "grad_norm": 0.28346762018295474, "learning_rate": 0.0001902208714778114, "loss": 2.996804714202881, "step": 6307, "token_acc": 0.2986849089792562 }, { "epoch": 3.6977425974787455, "grad_norm": 0.2443855681911445, "learning_rate": 0.0001902166908603761, "loss": 3.019991397857666, "step": 6308, "token_acc": 0.29568681395949403 }, { "epoch": 3.698328935795954, "grad_norm": 0.23324056432401047, "learning_rate": 0.00019021250939547658, "loss": 2.955322742462158, "step": 6309, "token_acc": 0.3040921560658403 }, { "epoch": 3.6989152741131632, "grad_norm": 0.2493018923769049, "learning_rate": 0.00019020832708315207, "loss": 2.9597363471984863, "step": 6310, "token_acc": 0.3034445734878665 }, { "epoch": 3.6995016124303723, "grad_norm": 0.25336673496525297, "learning_rate": 0.00019020414392344187, "loss": 2.9652631282806396, "step": 6311, "token_acc": 0.3043450893173276 }, { "epoch": 3.7000879507475815, "grad_norm": 0.2930127425097025, "learning_rate": 0.00019019995991638534, "loss": 2.9235386848449707, "step": 6312, "token_acc": 0.3073756468089886 }, { "epoch": 3.70067428906479, "grad_norm": 0.25189305536060086, "learning_rate": 0.0001901957750620217, "loss": 2.9615228176116943, "step": 6313, "token_acc": 0.3040208610388837 }, { "epoch": 3.7012606273819992, "grad_norm": 0.2772555745158556, "learning_rate": 0.00019019158936039028, "loss": 2.986903429031372, "step": 6314, "token_acc": 0.2994557264525366 }, { "epoch": 3.7018469656992083, "grad_norm": 0.23084375307624813, "learning_rate": 0.00019018740281153044, "loss": 2.9422569274902344, "step": 6315, "token_acc": 0.3057512771988745 }, { "epoch": 3.7024333040164175, "grad_norm": 0.26245203025893526, "learning_rate": 0.00019018321541548148, "loss": 2.9992148876190186, "step": 6316, "token_acc": 0.29956117641630825 }, { "epoch": 3.7030196423336266, "grad_norm": 0.25214658305730453, "learning_rate": 0.00019017902717228272, "loss": 2.9766857624053955, "step": 6317, "token_acc": 0.3021283245809499 }, { "epoch": 3.7036059806508357, "grad_norm": 0.24707826165407104, "learning_rate": 0.00019017483808197354, "loss": 2.9763357639312744, "step": 6318, "token_acc": 0.3032729370672987 }, { "epoch": 3.704192318968045, "grad_norm": 0.25151762941648587, "learning_rate": 0.00019017064814459326, "loss": 2.965953826904297, "step": 6319, "token_acc": 0.3042194870570032 }, { "epoch": 3.7047786572852535, "grad_norm": 0.2381626682024101, "learning_rate": 0.00019016645736018122, "loss": 2.961422920227051, "step": 6320, "token_acc": 0.30404426863800443 }, { "epoch": 3.7053649956024626, "grad_norm": 0.2447539051919443, "learning_rate": 0.00019016226572877683, "loss": 2.9980249404907227, "step": 6321, "token_acc": 0.29899884842185703 }, { "epoch": 3.7059513339196717, "grad_norm": 0.2887434428940188, "learning_rate": 0.00019015807325041946, "loss": 2.98592209815979, "step": 6322, "token_acc": 0.2995074635632116 }, { "epoch": 3.706537672236881, "grad_norm": 0.3207331876905664, "learning_rate": 0.00019015387992514846, "loss": 2.929492473602295, "step": 6323, "token_acc": 0.30956660289666504 }, { "epoch": 3.7071240105540895, "grad_norm": 0.2956742793871334, "learning_rate": 0.00019014968575300326, "loss": 2.9600448608398438, "step": 6324, "token_acc": 0.30439749014265316 }, { "epoch": 3.7077103488712986, "grad_norm": 0.24755414157473432, "learning_rate": 0.00019014549073402326, "loss": 2.9625425338745117, "step": 6325, "token_acc": 0.30299008550595397 }, { "epoch": 3.7082966871885077, "grad_norm": 0.2950645471218155, "learning_rate": 0.0001901412948682478, "loss": 2.969426155090332, "step": 6326, "token_acc": 0.3027498252509133 }, { "epoch": 3.708883025505717, "grad_norm": 0.3099957167844313, "learning_rate": 0.00019013709815571636, "loss": 2.979088306427002, "step": 6327, "token_acc": 0.30084974355060695 }, { "epoch": 3.709469363822926, "grad_norm": 0.22653178367405352, "learning_rate": 0.00019013290059646836, "loss": 3.002145767211914, "step": 6328, "token_acc": 0.2977456204810919 }, { "epoch": 3.710055702140135, "grad_norm": 0.24421553966880818, "learning_rate": 0.00019012870219054323, "loss": 2.9862465858459473, "step": 6329, "token_acc": 0.29960457223360654 }, { "epoch": 3.710642040457344, "grad_norm": 0.24557155728156826, "learning_rate": 0.00019012450293798034, "loss": 2.96150279045105, "step": 6330, "token_acc": 0.3056087264336471 }, { "epoch": 3.711228378774553, "grad_norm": 0.22986727459819853, "learning_rate": 0.0001901203028388192, "loss": 2.9522314071655273, "step": 6331, "token_acc": 0.3056782453472256 }, { "epoch": 3.711814717091762, "grad_norm": 0.24288577944197778, "learning_rate": 0.00019011610189309928, "loss": 2.963533878326416, "step": 6332, "token_acc": 0.3037156456318133 }, { "epoch": 3.712401055408971, "grad_norm": 0.23093245480123537, "learning_rate": 0.00019011190010086, "loss": 2.990579128265381, "step": 6333, "token_acc": 0.2985225377586733 }, { "epoch": 3.71298739372618, "grad_norm": 0.27835846574597023, "learning_rate": 0.00019010769746214087, "loss": 2.9629178047180176, "step": 6334, "token_acc": 0.30335232529334744 }, { "epoch": 3.713573732043389, "grad_norm": 0.2595220382673158, "learning_rate": 0.00019010349397698134, "loss": 2.9336161613464355, "step": 6335, "token_acc": 0.30579483892587106 }, { "epoch": 3.714160070360598, "grad_norm": 0.28247727036855075, "learning_rate": 0.00019009928964542087, "loss": 2.983488082885742, "step": 6336, "token_acc": 0.3005971371448018 }, { "epoch": 3.714746408677807, "grad_norm": 0.30416202870394077, "learning_rate": 0.00019009508446749898, "loss": 2.945540428161621, "step": 6337, "token_acc": 0.3064674876448612 }, { "epoch": 3.715332746995016, "grad_norm": 0.2280390966040148, "learning_rate": 0.00019009087844325522, "loss": 2.9879589080810547, "step": 6338, "token_acc": 0.2984429929562138 }, { "epoch": 3.7159190853122253, "grad_norm": 0.36426380750709003, "learning_rate": 0.00019008667157272902, "loss": 3.0284194946289062, "step": 6339, "token_acc": 0.2947635266087478 }, { "epoch": 3.7165054236294344, "grad_norm": 0.27550992399242974, "learning_rate": 0.00019008246385595996, "loss": 2.9873099327087402, "step": 6340, "token_acc": 0.2999179434680626 }, { "epoch": 3.7170917619466435, "grad_norm": 0.27602068196707996, "learning_rate": 0.0001900782552929875, "loss": 2.959066152572632, "step": 6341, "token_acc": 0.30414989160730876 }, { "epoch": 3.717678100263852, "grad_norm": 0.24986176387896136, "learning_rate": 0.00019007404588385125, "loss": 2.9747049808502197, "step": 6342, "token_acc": 0.30231796666465843 }, { "epoch": 3.7182644385810613, "grad_norm": 0.2529792653980859, "learning_rate": 0.0001900698356285907, "loss": 3.000605821609497, "step": 6343, "token_acc": 0.2983727477331648 }, { "epoch": 3.7188507768982704, "grad_norm": 0.24879973772842484, "learning_rate": 0.0001900656245272454, "loss": 3.017080307006836, "step": 6344, "token_acc": 0.29679052696439573 }, { "epoch": 3.7194371152154795, "grad_norm": 0.2950510569138496, "learning_rate": 0.00019006141257985496, "loss": 2.984283447265625, "step": 6345, "token_acc": 0.2989423737279141 }, { "epoch": 3.720023453532688, "grad_norm": 0.31357008399101083, "learning_rate": 0.00019005719978645887, "loss": 2.958548069000244, "step": 6346, "token_acc": 0.3047582636566697 }, { "epoch": 3.7206097918498973, "grad_norm": 0.25101169288568986, "learning_rate": 0.00019005298614709678, "loss": 2.96866512298584, "step": 6347, "token_acc": 0.30325714089843614 }, { "epoch": 3.7211961301671064, "grad_norm": 0.3061100156676543, "learning_rate": 0.00019004877166180822, "loss": 2.9383187294006348, "step": 6348, "token_acc": 0.3062591864393838 }, { "epoch": 3.7217824684843155, "grad_norm": 0.23382063298811795, "learning_rate": 0.0001900445563306328, "loss": 2.9775500297546387, "step": 6349, "token_acc": 0.3015733569657971 }, { "epoch": 3.7223688068015246, "grad_norm": 0.3133245743169299, "learning_rate": 0.00019004034015361008, "loss": 2.994356155395508, "step": 6350, "token_acc": 0.29933396764985726 }, { "epoch": 3.7229551451187337, "grad_norm": 0.256536533640805, "learning_rate": 0.00019003612313077972, "loss": 2.9436895847320557, "step": 6351, "token_acc": 0.30521720341652503 }, { "epoch": 3.7235414834359424, "grad_norm": 0.3311811253093764, "learning_rate": 0.00019003190526218128, "loss": 3.029824733734131, "step": 6352, "token_acc": 0.29527110847912763 }, { "epoch": 3.7241278217531515, "grad_norm": 0.27837716692868525, "learning_rate": 0.00019002768654785443, "loss": 3.0153770446777344, "step": 6353, "token_acc": 0.2950725799524723 }, { "epoch": 3.7247141600703606, "grad_norm": 0.2688445741760561, "learning_rate": 0.00019002346698783877, "loss": 2.995077610015869, "step": 6354, "token_acc": 0.2994408817738827 }, { "epoch": 3.7253004983875697, "grad_norm": 0.2694000050206595, "learning_rate": 0.00019001924658217396, "loss": 2.9942402839660645, "step": 6355, "token_acc": 0.29896504121481976 }, { "epoch": 3.7258868367047784, "grad_norm": 0.25495557928438856, "learning_rate": 0.00019001502533089963, "loss": 2.9655890464782715, "step": 6356, "token_acc": 0.3023421060504822 }, { "epoch": 3.7264731750219875, "grad_norm": 0.2631673278622382, "learning_rate": 0.00019001080323405542, "loss": 2.9499459266662598, "step": 6357, "token_acc": 0.3062973812314915 }, { "epoch": 3.7270595133391966, "grad_norm": 0.3135742446004838, "learning_rate": 0.00019000658029168102, "loss": 2.9979248046875, "step": 6358, "token_acc": 0.2973516672146809 }, { "epoch": 3.7276458516564057, "grad_norm": 0.29819777289467625, "learning_rate": 0.0001900023565038161, "loss": 2.9760982990264893, "step": 6359, "token_acc": 0.30106373841456907 }, { "epoch": 3.728232189973615, "grad_norm": 0.26914305886366235, "learning_rate": 0.00018999813187050028, "loss": 2.9923925399780273, "step": 6360, "token_acc": 0.3007405436566006 }, { "epoch": 3.728818528290824, "grad_norm": 0.29489594525756213, "learning_rate": 0.00018999390639177328, "loss": 2.978440284729004, "step": 6361, "token_acc": 0.30089077996816455 }, { "epoch": 3.729404866608033, "grad_norm": 0.2482561434558423, "learning_rate": 0.00018998968006767484, "loss": 2.975919485092163, "step": 6362, "token_acc": 0.30286378137159087 }, { "epoch": 3.7299912049252417, "grad_norm": 0.2487304469717474, "learning_rate": 0.0001899854528982446, "loss": 2.970010280609131, "step": 6363, "token_acc": 0.30247099170900493 }, { "epoch": 3.730577543242451, "grad_norm": 0.26544232901861814, "learning_rate": 0.00018998122488352227, "loss": 3.005382537841797, "step": 6364, "token_acc": 0.29765255686792097 }, { "epoch": 3.73116388155966, "grad_norm": 0.2855807733494327, "learning_rate": 0.00018997699602354761, "loss": 2.9702000617980957, "step": 6365, "token_acc": 0.30279789984548644 }, { "epoch": 3.731750219876869, "grad_norm": 0.2773575380206403, "learning_rate": 0.0001899727663183603, "loss": 2.995187759399414, "step": 6366, "token_acc": 0.2986285533053349 }, { "epoch": 3.7323365581940777, "grad_norm": 0.27210417189701036, "learning_rate": 0.0001899685357680001, "loss": 2.962930679321289, "step": 6367, "token_acc": 0.3033415950079296 }, { "epoch": 3.732922896511287, "grad_norm": 0.252306545487768, "learning_rate": 0.00018996430437250673, "loss": 2.973048686981201, "step": 6368, "token_acc": 0.30247084445326494 }, { "epoch": 3.733509234828496, "grad_norm": 0.2785191343597969, "learning_rate": 0.00018996007213191996, "loss": 2.9834110736846924, "step": 6369, "token_acc": 0.30146891897469763 }, { "epoch": 3.734095573145705, "grad_norm": 0.2693615063767666, "learning_rate": 0.00018995583904627954, "loss": 3.0329649448394775, "step": 6370, "token_acc": 0.29366451774687324 }, { "epoch": 3.734681911462914, "grad_norm": 0.24196000562253117, "learning_rate": 0.0001899516051156252, "loss": 2.9376871585845947, "step": 6371, "token_acc": 0.3066253441694045 }, { "epoch": 3.7352682497801233, "grad_norm": 0.2751400566294294, "learning_rate": 0.00018994737033999678, "loss": 2.975109100341797, "step": 6372, "token_acc": 0.3030088402717662 }, { "epoch": 3.7358545880973324, "grad_norm": 0.23953798830569437, "learning_rate": 0.000189943134719434, "loss": 2.9184794425964355, "step": 6373, "token_acc": 0.3087879595305293 }, { "epoch": 3.736440926414541, "grad_norm": 0.28342537556308817, "learning_rate": 0.00018993889825397666, "loss": 2.9362549781799316, "step": 6374, "token_acc": 0.3070441740204276 }, { "epoch": 3.73702726473175, "grad_norm": 0.2572060120275675, "learning_rate": 0.00018993466094366457, "loss": 2.967146396636963, "step": 6375, "token_acc": 0.30267576115926564 }, { "epoch": 3.7376136030489593, "grad_norm": 0.2519424990729519, "learning_rate": 0.00018993042278853754, "loss": 2.959254026412964, "step": 6376, "token_acc": 0.30264331426127505 }, { "epoch": 3.7381999413661684, "grad_norm": 0.23522774289619797, "learning_rate": 0.0001899261837886354, "loss": 3.0247530937194824, "step": 6377, "token_acc": 0.29371664828819527 }, { "epoch": 3.738786279683377, "grad_norm": 0.25765752840441936, "learning_rate": 0.00018992194394399788, "loss": 2.9726128578186035, "step": 6378, "token_acc": 0.3022246164706007 }, { "epoch": 3.739372618000586, "grad_norm": 0.2581576400911658, "learning_rate": 0.0001899177032546649, "loss": 2.946310520172119, "step": 6379, "token_acc": 0.3055899968477461 }, { "epoch": 3.7399589563177953, "grad_norm": 0.296704095447177, "learning_rate": 0.00018991346172067626, "loss": 2.9851527214050293, "step": 6380, "token_acc": 0.3022891418137872 }, { "epoch": 3.7405452946350044, "grad_norm": 0.3897481978672039, "learning_rate": 0.0001899092193420718, "loss": 2.978149175643921, "step": 6381, "token_acc": 0.30136508490162195 }, { "epoch": 3.7411316329522135, "grad_norm": 0.39710884091290466, "learning_rate": 0.0001899049761188914, "loss": 3.0382485389709473, "step": 6382, "token_acc": 0.2937630054252156 }, { "epoch": 3.7417179712694226, "grad_norm": 0.23478985922368062, "learning_rate": 0.00018990073205117487, "loss": 2.9756479263305664, "step": 6383, "token_acc": 0.3007988004793859 }, { "epoch": 3.7423043095866317, "grad_norm": 0.292506791141633, "learning_rate": 0.00018989648713896214, "loss": 2.932119846343994, "step": 6384, "token_acc": 0.30870210694479555 }, { "epoch": 3.7428906479038404, "grad_norm": 0.3126245163646938, "learning_rate": 0.000189892241382293, "loss": 2.985936164855957, "step": 6385, "token_acc": 0.30068314764670623 }, { "epoch": 3.7434769862210495, "grad_norm": 0.2706567940782977, "learning_rate": 0.00018988799478120743, "loss": 2.994166374206543, "step": 6386, "token_acc": 0.2971665969432188 }, { "epoch": 3.7440633245382586, "grad_norm": 0.3286795833011848, "learning_rate": 0.00018988374733574522, "loss": 2.951742172241211, "step": 6387, "token_acc": 0.305133521610124 }, { "epoch": 3.7446496628554677, "grad_norm": 0.2675871955453939, "learning_rate": 0.00018987949904594636, "loss": 2.9892616271972656, "step": 6388, "token_acc": 0.30090340154373174 }, { "epoch": 3.7452360011726764, "grad_norm": 0.3078409773378117, "learning_rate": 0.00018987524991185076, "loss": 2.9753918647766113, "step": 6389, "token_acc": 0.3001743008338175 }, { "epoch": 3.7458223394898855, "grad_norm": 0.3104564901570339, "learning_rate": 0.00018987099993349822, "loss": 2.967794179916382, "step": 6390, "token_acc": 0.30155068497382914 }, { "epoch": 3.7464086778070946, "grad_norm": 0.3308259582628546, "learning_rate": 0.0001898667491109288, "loss": 2.9795639514923096, "step": 6391, "token_acc": 0.3012476546419125 }, { "epoch": 3.7469950161243037, "grad_norm": 0.3064452017340387, "learning_rate": 0.00018986249744418231, "loss": 3.0218162536621094, "step": 6392, "token_acc": 0.2959819570674865 }, { "epoch": 3.747581354441513, "grad_norm": 0.24555781944165342, "learning_rate": 0.0001898582449332988, "loss": 2.9798426628112793, "step": 6393, "token_acc": 0.3036490209116446 }, { "epoch": 3.748167692758722, "grad_norm": 0.2804345634201924, "learning_rate": 0.00018985399157831813, "loss": 2.9589853286743164, "step": 6394, "token_acc": 0.30474302861588454 }, { "epoch": 3.748754031075931, "grad_norm": 0.25378063817720764, "learning_rate": 0.00018984973737928032, "loss": 2.9575445652008057, "step": 6395, "token_acc": 0.3049566529927231 }, { "epoch": 3.7493403693931397, "grad_norm": 0.3229580742639388, "learning_rate": 0.00018984548233622528, "loss": 2.9705677032470703, "step": 6396, "token_acc": 0.3028504843205396 }, { "epoch": 3.749926707710349, "grad_norm": 0.22192204877831015, "learning_rate": 0.000189841226449193, "loss": 2.985295295715332, "step": 6397, "token_acc": 0.30107986171362977 }, { "epoch": 3.750513046027558, "grad_norm": 0.27086656075954574, "learning_rate": 0.00018983696971822348, "loss": 2.9959020614624023, "step": 6398, "token_acc": 0.3003767616365222 }, { "epoch": 3.751099384344767, "grad_norm": 0.2501935623215438, "learning_rate": 0.00018983271214335665, "loss": 2.9752509593963623, "step": 6399, "token_acc": 0.30150096392178466 }, { "epoch": 3.7516857226619758, "grad_norm": 0.2640993820224359, "learning_rate": 0.00018982845372463259, "loss": 2.9430179595947266, "step": 6400, "token_acc": 0.3080886789977699 }, { "epoch": 3.752272060979185, "grad_norm": 0.28871971653885753, "learning_rate": 0.0001898241944620912, "loss": 2.9612202644348145, "step": 6401, "token_acc": 0.3027732727016724 }, { "epoch": 3.752858399296394, "grad_norm": 0.2503679298147152, "learning_rate": 0.00018981993435577258, "loss": 2.988402843475342, "step": 6402, "token_acc": 0.3026130795622659 }, { "epoch": 3.753444737613603, "grad_norm": 0.29817020657949045, "learning_rate": 0.00018981567340571668, "loss": 2.988438606262207, "step": 6403, "token_acc": 0.29840501768550054 }, { "epoch": 3.754031075930812, "grad_norm": 0.2938869669411254, "learning_rate": 0.00018981141161196358, "loss": 2.9649300575256348, "step": 6404, "token_acc": 0.30339169256121795 }, { "epoch": 3.7546174142480213, "grad_norm": 0.26326831530161504, "learning_rate": 0.0001898071489745533, "loss": 2.9572858810424805, "step": 6405, "token_acc": 0.3050320950054461 }, { "epoch": 3.75520375256523, "grad_norm": 0.34522361980275706, "learning_rate": 0.00018980288549352587, "loss": 2.9809975624084473, "step": 6406, "token_acc": 0.30201390788991395 }, { "epoch": 3.755790090882439, "grad_norm": 0.26329738302473293, "learning_rate": 0.00018979862116892134, "loss": 2.970841407775879, "step": 6407, "token_acc": 0.30208955380247454 }, { "epoch": 3.756376429199648, "grad_norm": 0.3130808412389744, "learning_rate": 0.00018979435600077974, "loss": 2.9555368423461914, "step": 6408, "token_acc": 0.3052555489412365 }, { "epoch": 3.7569627675168573, "grad_norm": 0.29680289133861065, "learning_rate": 0.00018979008998914118, "loss": 2.9590911865234375, "step": 6409, "token_acc": 0.3046593304893298 }, { "epoch": 3.757549105834066, "grad_norm": 0.2601585311163044, "learning_rate": 0.00018978582313404575, "loss": 2.9795825481414795, "step": 6410, "token_acc": 0.3022048718100783 }, { "epoch": 3.758135444151275, "grad_norm": 0.2750601792237733, "learning_rate": 0.00018978155543553345, "loss": 2.9883341789245605, "step": 6411, "token_acc": 0.30147124844473516 }, { "epoch": 3.758721782468484, "grad_norm": 0.245527105222135, "learning_rate": 0.00018977728689364444, "loss": 2.947695016860962, "step": 6412, "token_acc": 0.3059318847805973 }, { "epoch": 3.7593081207856933, "grad_norm": 0.2585593028353297, "learning_rate": 0.00018977301750841877, "loss": 2.96058988571167, "step": 6413, "token_acc": 0.30481432930930347 }, { "epoch": 3.7598944591029024, "grad_norm": 0.2473124335993149, "learning_rate": 0.00018976874727989662, "loss": 2.96328067779541, "step": 6414, "token_acc": 0.30300524060048656 }, { "epoch": 3.7604807974201115, "grad_norm": 0.26193974612623955, "learning_rate": 0.00018976447620811803, "loss": 2.9938931465148926, "step": 6415, "token_acc": 0.30001367278295826 }, { "epoch": 3.7610671357373207, "grad_norm": 0.22710905408863505, "learning_rate": 0.00018976020429312316, "loss": 2.9814109802246094, "step": 6416, "token_acc": 0.30099770830329936 }, { "epoch": 3.7616534740545293, "grad_norm": 0.3062556298707376, "learning_rate": 0.0001897559315349521, "loss": 2.9744997024536133, "step": 6417, "token_acc": 0.3015961623238913 }, { "epoch": 3.7622398123717384, "grad_norm": 0.26644095334067547, "learning_rate": 0.00018975165793364503, "loss": 3.0003349781036377, "step": 6418, "token_acc": 0.29760157647729923 }, { "epoch": 3.7628261506889475, "grad_norm": 0.3045591780146438, "learning_rate": 0.00018974738348924206, "loss": 2.96622633934021, "step": 6419, "token_acc": 0.3031609271069775 }, { "epoch": 3.7634124890061567, "grad_norm": 0.32790874832235006, "learning_rate": 0.00018974310820178336, "loss": 3.014979839324951, "step": 6420, "token_acc": 0.297230867608195 }, { "epoch": 3.7639988273233653, "grad_norm": 0.2870746585624688, "learning_rate": 0.0001897388320713091, "loss": 2.9602577686309814, "step": 6421, "token_acc": 0.30392334550494415 }, { "epoch": 3.7645851656405744, "grad_norm": 0.26266519882543654, "learning_rate": 0.00018973455509785944, "loss": 2.984834671020508, "step": 6422, "token_acc": 0.3009348360406882 }, { "epoch": 3.7651715039577835, "grad_norm": 0.39968855288980115, "learning_rate": 0.00018973027728147454, "loss": 2.9502432346343994, "step": 6423, "token_acc": 0.3055117941749962 }, { "epoch": 3.7657578422749927, "grad_norm": 0.3037615600197763, "learning_rate": 0.0001897259986221946, "loss": 2.9382119178771973, "step": 6424, "token_acc": 0.3067722748796007 }, { "epoch": 3.7663441805922018, "grad_norm": 0.3041086263052896, "learning_rate": 0.00018972171912005981, "loss": 2.9570322036743164, "step": 6425, "token_acc": 0.3041972494140179 }, { "epoch": 3.766930518909411, "grad_norm": 0.3216288563934693, "learning_rate": 0.0001897174387751104, "loss": 2.9967215061187744, "step": 6426, "token_acc": 0.29831994347473373 }, { "epoch": 3.76751685722662, "grad_norm": 0.2681767360601132, "learning_rate": 0.0001897131575873865, "loss": 2.9416747093200684, "step": 6427, "token_acc": 0.3051988248872102 }, { "epoch": 3.7681031955438287, "grad_norm": 0.32238434255261966, "learning_rate": 0.0001897088755569284, "loss": 2.9535956382751465, "step": 6428, "token_acc": 0.30622803643778024 }, { "epoch": 3.7686895338610378, "grad_norm": 0.2597282288638976, "learning_rate": 0.00018970459268377628, "loss": 2.9602630138397217, "step": 6429, "token_acc": 0.3039485914342272 }, { "epoch": 3.769275872178247, "grad_norm": 0.3106684193517675, "learning_rate": 0.00018970030896797043, "loss": 2.9956607818603516, "step": 6430, "token_acc": 0.29824697031898956 }, { "epoch": 3.769862210495456, "grad_norm": 0.26526237475685877, "learning_rate": 0.000189696024409551, "loss": 2.9677610397338867, "step": 6431, "token_acc": 0.30389454861623766 }, { "epoch": 3.7704485488126647, "grad_norm": 0.31382890761195076, "learning_rate": 0.0001896917390085583, "loss": 3.0021324157714844, "step": 6432, "token_acc": 0.2979854548485575 }, { "epoch": 3.771034887129874, "grad_norm": 0.25340962827369945, "learning_rate": 0.00018968745276503262, "loss": 2.949416399002075, "step": 6433, "token_acc": 0.3042702034048257 }, { "epoch": 3.771621225447083, "grad_norm": 0.35338136637199524, "learning_rate": 0.00018968316567901413, "loss": 2.997436046600342, "step": 6434, "token_acc": 0.2983510323680739 }, { "epoch": 3.772207563764292, "grad_norm": 0.26437406847111955, "learning_rate": 0.00018967887775054316, "loss": 2.963125705718994, "step": 6435, "token_acc": 0.30427102931297956 }, { "epoch": 3.772793902081501, "grad_norm": 0.28555454071696934, "learning_rate": 0.00018967458897966, "loss": 2.9734902381896973, "step": 6436, "token_acc": 0.3030131674610227 }, { "epoch": 3.7733802403987102, "grad_norm": 0.2500844030724953, "learning_rate": 0.00018967029936640487, "loss": 3.0043625831604004, "step": 6437, "token_acc": 0.297483104473588 }, { "epoch": 3.7739665787159193, "grad_norm": 0.3040280860395201, "learning_rate": 0.00018966600891081817, "loss": 2.973471164703369, "step": 6438, "token_acc": 0.3003231264726502 }, { "epoch": 3.774552917033128, "grad_norm": 0.26987391042434655, "learning_rate": 0.00018966171761294009, "loss": 2.976778745651245, "step": 6439, "token_acc": 0.30086796927793685 }, { "epoch": 3.775139255350337, "grad_norm": 0.2729465586788869, "learning_rate": 0.000189657425472811, "loss": 2.9822378158569336, "step": 6440, "token_acc": 0.301755399802776 }, { "epoch": 3.7757255936675462, "grad_norm": 0.2553439874426675, "learning_rate": 0.0001896531324904712, "loss": 2.9639816284179688, "step": 6441, "token_acc": 0.30337592074719893 }, { "epoch": 3.7763119319847553, "grad_norm": 0.25992658858808765, "learning_rate": 0.00018964883866596108, "loss": 2.9855594635009766, "step": 6442, "token_acc": 0.2981479496183725 }, { "epoch": 3.776898270301964, "grad_norm": 0.23978903154784204, "learning_rate": 0.0001896445439993209, "loss": 2.9921417236328125, "step": 6443, "token_acc": 0.2989601606793278 }, { "epoch": 3.777484608619173, "grad_norm": 0.2367045496190604, "learning_rate": 0.000189640248490591, "loss": 2.972799777984619, "step": 6444, "token_acc": 0.301528904993437 }, { "epoch": 3.7780709469363822, "grad_norm": 0.2380341998977342, "learning_rate": 0.0001896359521398118, "loss": 3.000115394592285, "step": 6445, "token_acc": 0.29759096811809294 }, { "epoch": 3.7786572852535913, "grad_norm": 0.20778327035566968, "learning_rate": 0.00018963165494702356, "loss": 2.9575588703155518, "step": 6446, "token_acc": 0.3030098715038424 }, { "epoch": 3.7792436235708005, "grad_norm": 0.2616016860918608, "learning_rate": 0.00018962735691226677, "loss": 2.9937148094177246, "step": 6447, "token_acc": 0.2989232197460414 }, { "epoch": 3.7798299618880096, "grad_norm": 0.2439608996607693, "learning_rate": 0.0001896230580355817, "loss": 2.9846038818359375, "step": 6448, "token_acc": 0.30090735028462606 }, { "epoch": 3.7804163002052187, "grad_norm": 0.23812481452576162, "learning_rate": 0.00018961875831700878, "loss": 3.0058727264404297, "step": 6449, "token_acc": 0.2980010240218458 }, { "epoch": 3.7810026385224274, "grad_norm": 0.30612820256887985, "learning_rate": 0.00018961445775658836, "loss": 2.983628988265991, "step": 6450, "token_acc": 0.3002706528870084 }, { "epoch": 3.7815889768396365, "grad_norm": 0.28807737227148783, "learning_rate": 0.0001896101563543609, "loss": 2.942044496536255, "step": 6451, "token_acc": 0.3067467718067746 }, { "epoch": 3.7821753151568456, "grad_norm": 0.23091217902331396, "learning_rate": 0.00018960585411036674, "loss": 2.958925485610962, "step": 6452, "token_acc": 0.3048559344366662 }, { "epoch": 3.7827616534740547, "grad_norm": 0.3257970135816232, "learning_rate": 0.00018960155102464637, "loss": 2.9423441886901855, "step": 6453, "token_acc": 0.3065108591434528 }, { "epoch": 3.7833479917912634, "grad_norm": 0.27545340774214644, "learning_rate": 0.0001895972470972401, "loss": 2.960127115249634, "step": 6454, "token_acc": 0.30305548270498633 }, { "epoch": 3.7839343301084725, "grad_norm": 0.2465387068210143, "learning_rate": 0.0001895929423281885, "loss": 3.020472288131714, "step": 6455, "token_acc": 0.2950990207427338 }, { "epoch": 3.7845206684256816, "grad_norm": 0.2870415606059651, "learning_rate": 0.00018958863671753192, "loss": 2.9609787464141846, "step": 6456, "token_acc": 0.30344210951450784 }, { "epoch": 3.7851070067428907, "grad_norm": 0.26132612657468557, "learning_rate": 0.00018958433026531078, "loss": 2.9534366130828857, "step": 6457, "token_acc": 0.3039761161623015 }, { "epoch": 3.7856933450601, "grad_norm": 0.24280552585491177, "learning_rate": 0.00018958002297156558, "loss": 2.957118034362793, "step": 6458, "token_acc": 0.3029219110378913 }, { "epoch": 3.786279683377309, "grad_norm": 0.2607746553561123, "learning_rate": 0.0001895757148363368, "loss": 2.967437982559204, "step": 6459, "token_acc": 0.3028230361403427 }, { "epoch": 3.7868660216945176, "grad_norm": 0.21662138940769565, "learning_rate": 0.0001895714058596649, "loss": 2.9625139236450195, "step": 6460, "token_acc": 0.3039037875264817 }, { "epoch": 3.7874523600117267, "grad_norm": 0.2611983872616639, "learning_rate": 0.0001895670960415903, "loss": 2.960892915725708, "step": 6461, "token_acc": 0.3034370419478643 }, { "epoch": 3.788038698328936, "grad_norm": 0.2261393400770819, "learning_rate": 0.00018956278538215354, "loss": 2.978372097015381, "step": 6462, "token_acc": 0.2994066601054084 }, { "epoch": 3.788625036646145, "grad_norm": 0.27289552200721107, "learning_rate": 0.0001895584738813951, "loss": 3.006833791732788, "step": 6463, "token_acc": 0.2977787308114381 }, { "epoch": 3.7892113749633536, "grad_norm": 0.24511034184719233, "learning_rate": 0.0001895541615393555, "loss": 2.9368844032287598, "step": 6464, "token_acc": 0.30731900336975393 }, { "epoch": 3.7897977132805627, "grad_norm": 0.28370330594737736, "learning_rate": 0.0001895498483560752, "loss": 3.005736827850342, "step": 6465, "token_acc": 0.2987435863702144 }, { "epoch": 3.790384051597772, "grad_norm": 0.24985030356549393, "learning_rate": 0.00018954553433159473, "loss": 2.9910166263580322, "step": 6466, "token_acc": 0.3002912424788317 }, { "epoch": 3.790970389914981, "grad_norm": 0.26021952515960534, "learning_rate": 0.00018954121946595468, "loss": 2.954996347427368, "step": 6467, "token_acc": 0.30532725857267506 }, { "epoch": 3.79155672823219, "grad_norm": 0.2342053143968054, "learning_rate": 0.00018953690375919551, "loss": 2.9303388595581055, "step": 6468, "token_acc": 0.3076729335741725 }, { "epoch": 3.792143066549399, "grad_norm": 0.25630843671086245, "learning_rate": 0.00018953258721135776, "loss": 2.9153311252593994, "step": 6469, "token_acc": 0.3092236565532177 }, { "epoch": 3.7927294048666083, "grad_norm": 0.2310152075531785, "learning_rate": 0.00018952826982248202, "loss": 2.981039524078369, "step": 6470, "token_acc": 0.30252157236259636 }, { "epoch": 3.793315743183817, "grad_norm": 0.2816029721011824, "learning_rate": 0.00018952395159260884, "loss": 2.9622507095336914, "step": 6471, "token_acc": 0.3042772814124529 }, { "epoch": 3.793902081501026, "grad_norm": 0.25851978307745715, "learning_rate": 0.00018951963252177874, "loss": 2.975102663040161, "step": 6472, "token_acc": 0.3025784026481156 }, { "epoch": 3.794488419818235, "grad_norm": 0.29187641028021233, "learning_rate": 0.00018951531261003233, "loss": 2.9543423652648926, "step": 6473, "token_acc": 0.30443890325971845 }, { "epoch": 3.7950747581354443, "grad_norm": 0.3137516332363987, "learning_rate": 0.0001895109918574102, "loss": 2.984013557434082, "step": 6474, "token_acc": 0.30018195943735687 }, { "epoch": 3.795661096452653, "grad_norm": 0.4079429339667035, "learning_rate": 0.00018950667026395289, "loss": 2.9659347534179688, "step": 6475, "token_acc": 0.30197372485225377 }, { "epoch": 3.796247434769862, "grad_norm": 0.5110694433849108, "learning_rate": 0.00018950234782970105, "loss": 2.9980976581573486, "step": 6476, "token_acc": 0.2985276326288965 }, { "epoch": 3.796833773087071, "grad_norm": 0.3668991743382624, "learning_rate": 0.00018949802455469524, "loss": 2.9845781326293945, "step": 6477, "token_acc": 0.299582671162965 }, { "epoch": 3.7974201114042803, "grad_norm": 0.27596411300862606, "learning_rate": 0.0001894937004389761, "loss": 2.961146116256714, "step": 6478, "token_acc": 0.3037281218114748 }, { "epoch": 3.7980064497214894, "grad_norm": 0.31313731265377037, "learning_rate": 0.00018948937548258422, "loss": 2.973755359649658, "step": 6479, "token_acc": 0.302887353189119 }, { "epoch": 3.7985927880386985, "grad_norm": 0.24055250786731236, "learning_rate": 0.00018948504968556028, "loss": 2.991786241531372, "step": 6480, "token_acc": 0.299454233504549 }, { "epoch": 3.7991791263559076, "grad_norm": 0.2861633278163949, "learning_rate": 0.00018948072304794488, "loss": 2.9693217277526855, "step": 6481, "token_acc": 0.30120011582332806 }, { "epoch": 3.7997654646731163, "grad_norm": 0.25525817753095326, "learning_rate": 0.00018947639556977862, "loss": 2.9497933387756348, "step": 6482, "token_acc": 0.30518910861954696 }, { "epoch": 3.8003518029903254, "grad_norm": 0.2752159503143734, "learning_rate": 0.0001894720672511022, "loss": 2.9878454208374023, "step": 6483, "token_acc": 0.29990168705021825 }, { "epoch": 3.8009381413075345, "grad_norm": 0.26002350968562365, "learning_rate": 0.0001894677380919563, "loss": 2.958299160003662, "step": 6484, "token_acc": 0.3040312863765393 }, { "epoch": 3.8015244796247436, "grad_norm": 0.2818770964375362, "learning_rate": 0.00018946340809238157, "loss": 3.008742332458496, "step": 6485, "token_acc": 0.2971870042433544 }, { "epoch": 3.8021108179419523, "grad_norm": 0.27733767582467467, "learning_rate": 0.00018945907725241866, "loss": 3.0215892791748047, "step": 6486, "token_acc": 0.2938997270264858 }, { "epoch": 3.8026971562591614, "grad_norm": 0.30673458385011565, "learning_rate": 0.00018945474557210826, "loss": 2.960934638977051, "step": 6487, "token_acc": 0.3039904333584631 }, { "epoch": 3.8032834945763705, "grad_norm": 0.25723798977338125, "learning_rate": 0.00018945041305149104, "loss": 2.9898130893707275, "step": 6488, "token_acc": 0.30111156898173097 }, { "epoch": 3.8038698328935796, "grad_norm": 0.27094781658790884, "learning_rate": 0.00018944607969060778, "loss": 2.965735673904419, "step": 6489, "token_acc": 0.3030776445296858 }, { "epoch": 3.8044561712107887, "grad_norm": 0.27857574245158556, "learning_rate": 0.00018944174548949912, "loss": 2.979887008666992, "step": 6490, "token_acc": 0.301169279155887 }, { "epoch": 3.805042509527998, "grad_norm": 0.25951125385213214, "learning_rate": 0.0001894374104482058, "loss": 2.9727773666381836, "step": 6491, "token_acc": 0.3020376754173881 }, { "epoch": 3.805628847845207, "grad_norm": 0.26324424208473696, "learning_rate": 0.00018943307456676848, "loss": 2.9863975048065186, "step": 6492, "token_acc": 0.3003067614230351 }, { "epoch": 3.8062151861624156, "grad_norm": 0.26997291967709486, "learning_rate": 0.00018942873784522795, "loss": 2.9934046268463135, "step": 6493, "token_acc": 0.29805158072838056 }, { "epoch": 3.8068015244796247, "grad_norm": 0.23298918778922945, "learning_rate": 0.00018942440028362493, "loss": 2.9511237144470215, "step": 6494, "token_acc": 0.3046206307711768 }, { "epoch": 3.807387862796834, "grad_norm": 0.2575913067109522, "learning_rate": 0.0001894200618820002, "loss": 2.953953742980957, "step": 6495, "token_acc": 0.3045232908533289 }, { "epoch": 3.807974201114043, "grad_norm": 0.2522861284613284, "learning_rate": 0.00018941572264039445, "loss": 2.935549736022949, "step": 6496, "token_acc": 0.30819986644478126 }, { "epoch": 3.8085605394312516, "grad_norm": 0.2231331236893713, "learning_rate": 0.00018941138255884848, "loss": 2.9963440895080566, "step": 6497, "token_acc": 0.2985771398149729 }, { "epoch": 3.8091468777484607, "grad_norm": 0.25898359044725044, "learning_rate": 0.00018940704163740308, "loss": 2.9858288764953613, "step": 6498, "token_acc": 0.3017366932439177 }, { "epoch": 3.80973321606567, "grad_norm": 0.3227033621169905, "learning_rate": 0.00018940269987609897, "loss": 2.954547882080078, "step": 6499, "token_acc": 0.306346068916488 }, { "epoch": 3.810319554382879, "grad_norm": 0.330409234622868, "learning_rate": 0.00018939835727497698, "loss": 2.9800124168395996, "step": 6500, "token_acc": 0.29988499284399917 }, { "epoch": 3.810905892700088, "grad_norm": 0.2569895624660461, "learning_rate": 0.0001893940138340779, "loss": 2.991697311401367, "step": 6501, "token_acc": 0.29903958207755676 }, { "epoch": 3.811492231017297, "grad_norm": 0.2981652275263049, "learning_rate": 0.00018938966955344251, "loss": 2.9611032009124756, "step": 6502, "token_acc": 0.3036033248660356 }, { "epoch": 3.8120785693345063, "grad_norm": 0.3102511908111465, "learning_rate": 0.00018938532443311165, "loss": 2.98814058303833, "step": 6503, "token_acc": 0.29982911895592196 }, { "epoch": 3.812664907651715, "grad_norm": 0.23916674370112898, "learning_rate": 0.0001893809784731261, "loss": 2.9591097831726074, "step": 6504, "token_acc": 0.30400351064990144 }, { "epoch": 3.813251245968924, "grad_norm": 0.3320336812420841, "learning_rate": 0.0001893766316735267, "loss": 2.9880242347717285, "step": 6505, "token_acc": 0.30118081773636435 }, { "epoch": 3.813837584286133, "grad_norm": 0.27311381123343714, "learning_rate": 0.00018937228403435427, "loss": 2.999217987060547, "step": 6506, "token_acc": 0.29825480484294303 }, { "epoch": 3.8144239226033423, "grad_norm": 0.24613684233206592, "learning_rate": 0.00018936793555564965, "loss": 2.9370527267456055, "step": 6507, "token_acc": 0.30585877049946514 }, { "epoch": 3.815010260920551, "grad_norm": 0.2706709001681524, "learning_rate": 0.00018936358623745375, "loss": 2.950209617614746, "step": 6508, "token_acc": 0.3073964450626742 }, { "epoch": 3.81559659923776, "grad_norm": 0.22471046895308247, "learning_rate": 0.00018935923607980732, "loss": 2.9983434677124023, "step": 6509, "token_acc": 0.2981613456423738 }, { "epoch": 3.816182937554969, "grad_norm": 0.2968323557506492, "learning_rate": 0.0001893548850827513, "loss": 2.9787673950195312, "step": 6510, "token_acc": 0.3017991019728154 }, { "epoch": 3.8167692758721783, "grad_norm": 0.2679037393202564, "learning_rate": 0.00018935053324632657, "loss": 2.9783711433410645, "step": 6511, "token_acc": 0.3012353442541161 }, { "epoch": 3.8173556141893874, "grad_norm": 0.27883523188495396, "learning_rate": 0.00018934618057057394, "loss": 2.970696210861206, "step": 6512, "token_acc": 0.30121491091841357 }, { "epoch": 3.8179419525065965, "grad_norm": 0.26270425608838743, "learning_rate": 0.00018934182705553437, "loss": 2.968486785888672, "step": 6513, "token_acc": 0.3015280144837509 }, { "epoch": 3.818528290823805, "grad_norm": 0.2636634036711456, "learning_rate": 0.00018933747270124873, "loss": 2.965203285217285, "step": 6514, "token_acc": 0.3025961630719664 }, { "epoch": 3.8191146291410143, "grad_norm": 0.246383340857532, "learning_rate": 0.0001893331175077579, "loss": 2.963128089904785, "step": 6515, "token_acc": 0.3050427157749085 }, { "epoch": 3.8197009674582234, "grad_norm": 0.2822987575261475, "learning_rate": 0.00018932876147510278, "loss": 2.9310970306396484, "step": 6516, "token_acc": 0.30740909189353083 }, { "epoch": 3.8202873057754325, "grad_norm": 0.3008160734583212, "learning_rate": 0.00018932440460332436, "loss": 3.0081210136413574, "step": 6517, "token_acc": 0.2963012600547718 }, { "epoch": 3.820873644092641, "grad_norm": 0.2581158722056415, "learning_rate": 0.0001893200468924635, "loss": 2.963991641998291, "step": 6518, "token_acc": 0.3036314919331851 }, { "epoch": 3.8214599824098503, "grad_norm": 0.28318244703655115, "learning_rate": 0.00018931568834256116, "loss": 2.953887939453125, "step": 6519, "token_acc": 0.3028388813268869 }, { "epoch": 3.8220463207270594, "grad_norm": 0.2981496810028302, "learning_rate": 0.00018931132895365832, "loss": 2.9810752868652344, "step": 6520, "token_acc": 0.30094367171694586 }, { "epoch": 3.8226326590442685, "grad_norm": 0.2746602810359318, "learning_rate": 0.00018930696872579588, "loss": 2.973417282104492, "step": 6521, "token_acc": 0.3016037274839222 }, { "epoch": 3.8232189973614776, "grad_norm": 0.2931987574985435, "learning_rate": 0.0001893026076590148, "loss": 2.948890209197998, "step": 6522, "token_acc": 0.30440457024961315 }, { "epoch": 3.8238053356786867, "grad_norm": 0.22997746077521894, "learning_rate": 0.00018929824575335605, "loss": 2.9974405765533447, "step": 6523, "token_acc": 0.29686401236788 }, { "epoch": 3.824391673995896, "grad_norm": 0.29955618221972885, "learning_rate": 0.00018929388300886063, "loss": 2.976614236831665, "step": 6524, "token_acc": 0.30220428076243316 }, { "epoch": 3.8249780123131045, "grad_norm": 0.25745567293784866, "learning_rate": 0.0001892895194255695, "loss": 3.0095407962799072, "step": 6525, "token_acc": 0.29481090129204374 }, { "epoch": 3.8255643506303136, "grad_norm": 0.2713412391909302, "learning_rate": 0.00018928515500352364, "loss": 2.9895386695861816, "step": 6526, "token_acc": 0.30012694031698206 }, { "epoch": 3.8261506889475227, "grad_norm": 0.27907659983060323, "learning_rate": 0.00018928078974276405, "loss": 2.9679059982299805, "step": 6527, "token_acc": 0.3018858281800991 }, { "epoch": 3.826737027264732, "grad_norm": 0.32873247246054155, "learning_rate": 0.00018927642364333175, "loss": 3.030717611312866, "step": 6528, "token_acc": 0.29340850396728957 }, { "epoch": 3.8273233655819405, "grad_norm": 0.3217152890377281, "learning_rate": 0.0001892720567052678, "loss": 2.9552791118621826, "step": 6529, "token_acc": 0.3057435942161332 }, { "epoch": 3.8279097038991496, "grad_norm": 0.27239758691672594, "learning_rate": 0.00018926768892861312, "loss": 2.978858232498169, "step": 6530, "token_acc": 0.30147255203457995 }, { "epoch": 3.8284960422163588, "grad_norm": 0.389569836016819, "learning_rate": 0.00018926332031340883, "loss": 2.9666478633880615, "step": 6531, "token_acc": 0.300999751909505 }, { "epoch": 3.829082380533568, "grad_norm": 0.3982687088187171, "learning_rate": 0.0001892589508596959, "loss": 3.0066559314727783, "step": 6532, "token_acc": 0.2976260942708496 }, { "epoch": 3.829668718850777, "grad_norm": 0.23442871447609875, "learning_rate": 0.00018925458056751545, "loss": 2.9384467601776123, "step": 6533, "token_acc": 0.30749373063873725 }, { "epoch": 3.830255057167986, "grad_norm": 0.3720642887850602, "learning_rate": 0.00018925020943690843, "loss": 2.924694538116455, "step": 6534, "token_acc": 0.30937463864857195 }, { "epoch": 3.830841395485195, "grad_norm": 0.2238513332271215, "learning_rate": 0.00018924583746791597, "loss": 3.0158438682556152, "step": 6535, "token_acc": 0.2951865592888039 }, { "epoch": 3.831427733802404, "grad_norm": 0.3026993928634364, "learning_rate": 0.00018924146466057918, "loss": 2.9711694717407227, "step": 6536, "token_acc": 0.30340052032691245 }, { "epoch": 3.832014072119613, "grad_norm": 0.23077247873787882, "learning_rate": 0.00018923709101493903, "loss": 2.941213607788086, "step": 6537, "token_acc": 0.3052276991519675 }, { "epoch": 3.832600410436822, "grad_norm": 0.2850071633613535, "learning_rate": 0.00018923271653103666, "loss": 2.9446988105773926, "step": 6538, "token_acc": 0.3030086422015034 }, { "epoch": 3.833186748754031, "grad_norm": 0.23391634714757192, "learning_rate": 0.00018922834120891317, "loss": 3.051865816116333, "step": 6539, "token_acc": 0.2906703134840017 }, { "epoch": 3.83377308707124, "grad_norm": 0.31385137184852774, "learning_rate": 0.00018922396504860966, "loss": 3.0062785148620605, "step": 6540, "token_acc": 0.2961318779284322 }, { "epoch": 3.834359425388449, "grad_norm": 0.22039312273114434, "learning_rate": 0.00018921958805016723, "loss": 2.9394474029541016, "step": 6541, "token_acc": 0.30521860203245743 }, { "epoch": 3.834945763705658, "grad_norm": 0.2755406592956006, "learning_rate": 0.00018921521021362698, "loss": 2.989572048187256, "step": 6542, "token_acc": 0.2981256603831371 }, { "epoch": 3.835532102022867, "grad_norm": 0.21725046293351383, "learning_rate": 0.00018921083153903006, "loss": 3.013251304626465, "step": 6543, "token_acc": 0.2971545768338258 }, { "epoch": 3.8361184403400763, "grad_norm": 0.25457714487559724, "learning_rate": 0.00018920645202641758, "loss": 2.9969961643218994, "step": 6544, "token_acc": 0.2989139344262295 }, { "epoch": 3.8367047786572854, "grad_norm": 0.21417911366880765, "learning_rate": 0.0001892020716758307, "loss": 2.9853835105895996, "step": 6545, "token_acc": 0.3015421829254323 }, { "epoch": 3.8372911169744945, "grad_norm": 0.26402859028122605, "learning_rate": 0.00018919769048731058, "loss": 2.975426435470581, "step": 6546, "token_acc": 0.3027708668715547 }, { "epoch": 3.837877455291703, "grad_norm": 0.2388827238170951, "learning_rate": 0.00018919330846089833, "loss": 3.0057177543640137, "step": 6547, "token_acc": 0.29634797597123114 }, { "epoch": 3.8384637936089123, "grad_norm": 0.24731542545910584, "learning_rate": 0.00018918892559663514, "loss": 3.0001282691955566, "step": 6548, "token_acc": 0.2989893251911288 }, { "epoch": 3.8390501319261214, "grad_norm": 0.26255158036576626, "learning_rate": 0.00018918454189456216, "loss": 2.987797975540161, "step": 6549, "token_acc": 0.29973290393487456 }, { "epoch": 3.8396364702433305, "grad_norm": 0.23109189835235516, "learning_rate": 0.00018918015735472062, "loss": 2.954200267791748, "step": 6550, "token_acc": 0.3035390835439622 }, { "epoch": 3.840222808560539, "grad_norm": 0.24974193886076582, "learning_rate": 0.00018917577197715164, "loss": 2.9989688396453857, "step": 6551, "token_acc": 0.29938174795536143 }, { "epoch": 3.8408091468777483, "grad_norm": 0.24800466375299599, "learning_rate": 0.00018917138576189646, "loss": 2.956712245941162, "step": 6552, "token_acc": 0.30490720829605444 }, { "epoch": 3.8413954851949574, "grad_norm": 0.2910677173467684, "learning_rate": 0.00018916699870899628, "loss": 2.9893062114715576, "step": 6553, "token_acc": 0.30073180846986935 }, { "epoch": 3.8419818235121665, "grad_norm": 0.27300541051629335, "learning_rate": 0.0001891626108184923, "loss": 2.9990646839141846, "step": 6554, "token_acc": 0.29844519331665187 }, { "epoch": 3.8425681618293757, "grad_norm": 0.2777863842559474, "learning_rate": 0.00018915822209042573, "loss": 2.9732422828674316, "step": 6555, "token_acc": 0.3015398740609634 }, { "epoch": 3.8431545001465848, "grad_norm": 0.31920648038273947, "learning_rate": 0.00018915383252483782, "loss": 2.958232879638672, "step": 6556, "token_acc": 0.3027655832736273 }, { "epoch": 3.843740838463794, "grad_norm": 0.27735658715512135, "learning_rate": 0.00018914944212176978, "loss": 2.935795783996582, "step": 6557, "token_acc": 0.3069559171335533 }, { "epoch": 3.8443271767810026, "grad_norm": 0.27684169793614577, "learning_rate": 0.00018914505088126285, "loss": 2.9397130012512207, "step": 6558, "token_acc": 0.30711435226322736 }, { "epoch": 3.8449135150982117, "grad_norm": 0.24738124463911165, "learning_rate": 0.00018914065880335832, "loss": 3.0078787803649902, "step": 6559, "token_acc": 0.2976215159503569 }, { "epoch": 3.8454998534154208, "grad_norm": 0.28035206463663415, "learning_rate": 0.0001891362658880974, "loss": 2.9847705364227295, "step": 6560, "token_acc": 0.2998066283504324 }, { "epoch": 3.84608619173263, "grad_norm": 0.2634102832611171, "learning_rate": 0.00018913187213552134, "loss": 2.9645724296569824, "step": 6561, "token_acc": 0.30301350719490605 }, { "epoch": 3.8466725300498386, "grad_norm": 0.23484176992673733, "learning_rate": 0.0001891274775456715, "loss": 2.987703323364258, "step": 6562, "token_acc": 0.2986919689986353 }, { "epoch": 3.8472588683670477, "grad_norm": 0.25984591952909364, "learning_rate": 0.0001891230821185891, "loss": 2.9654252529144287, "step": 6563, "token_acc": 0.30525967344194654 }, { "epoch": 3.847845206684257, "grad_norm": 0.26629596386013177, "learning_rate": 0.00018911868585431543, "loss": 2.943603754043579, "step": 6564, "token_acc": 0.3052165743282676 }, { "epoch": 3.848431545001466, "grad_norm": 0.2391992298775766, "learning_rate": 0.0001891142887528918, "loss": 2.9723825454711914, "step": 6565, "token_acc": 0.2996885211914188 }, { "epoch": 3.849017883318675, "grad_norm": 0.2685897502324014, "learning_rate": 0.0001891098908143595, "loss": 3.0006003379821777, "step": 6566, "token_acc": 0.2978954978218709 }, { "epoch": 3.849604221635884, "grad_norm": 0.3312508644240777, "learning_rate": 0.00018910549203875987, "loss": 2.944103717803955, "step": 6567, "token_acc": 0.3060699601854717 }, { "epoch": 3.850190559953093, "grad_norm": 0.3269699735457189, "learning_rate": 0.00018910109242613421, "loss": 2.968503713607788, "step": 6568, "token_acc": 0.30211712342791863 }, { "epoch": 3.850776898270302, "grad_norm": 0.24664538726190666, "learning_rate": 0.00018909669197652383, "loss": 3.0190699100494385, "step": 6569, "token_acc": 0.29489689976475536 }, { "epoch": 3.851363236587511, "grad_norm": 0.2693928496394285, "learning_rate": 0.0001890922906899701, "loss": 2.9379968643188477, "step": 6570, "token_acc": 0.30698960337233383 }, { "epoch": 3.85194957490472, "grad_norm": 0.27658183039681894, "learning_rate": 0.0001890878885665144, "loss": 2.962949752807617, "step": 6571, "token_acc": 0.3037013900527682 }, { "epoch": 3.852535913221929, "grad_norm": 0.2693550361387959, "learning_rate": 0.00018908348560619796, "loss": 2.9985125064849854, "step": 6572, "token_acc": 0.2967676260299886 }, { "epoch": 3.853122251539138, "grad_norm": 0.2617317406431663, "learning_rate": 0.00018907908180906225, "loss": 2.9859962463378906, "step": 6573, "token_acc": 0.3013485742329281 }, { "epoch": 3.853708589856347, "grad_norm": 0.2340987878679894, "learning_rate": 0.0001890746771751486, "loss": 2.9504761695861816, "step": 6574, "token_acc": 0.3054859663071361 }, { "epoch": 3.854294928173556, "grad_norm": 0.23756963622205265, "learning_rate": 0.00018907027170449837, "loss": 2.9846694469451904, "step": 6575, "token_acc": 0.30020008593941533 }, { "epoch": 3.8548812664907652, "grad_norm": 0.2740573110573964, "learning_rate": 0.00018906586539715298, "loss": 2.985908031463623, "step": 6576, "token_acc": 0.3002133420178898 }, { "epoch": 3.8554676048079743, "grad_norm": 0.2351187850980175, "learning_rate": 0.0001890614582531538, "loss": 2.9653494358062744, "step": 6577, "token_acc": 0.3045679164231743 }, { "epoch": 3.8560539431251835, "grad_norm": 0.2516890280930331, "learning_rate": 0.00018905705027254222, "loss": 2.95635986328125, "step": 6578, "token_acc": 0.30444575924599593 }, { "epoch": 3.856640281442392, "grad_norm": 0.27630985197699487, "learning_rate": 0.0001890526414553597, "loss": 2.980644702911377, "step": 6579, "token_acc": 0.3010527659370787 }, { "epoch": 3.8572266197596012, "grad_norm": 0.22901415240074638, "learning_rate": 0.00018904823180164755, "loss": 2.9527535438537598, "step": 6580, "token_acc": 0.30582121990754013 }, { "epoch": 3.8578129580768104, "grad_norm": 0.22480247876338946, "learning_rate": 0.00018904382131144728, "loss": 2.9790101051330566, "step": 6581, "token_acc": 0.30085785966710726 }, { "epoch": 3.8583992963940195, "grad_norm": 0.24950726632856918, "learning_rate": 0.00018903940998480032, "loss": 2.9796957969665527, "step": 6582, "token_acc": 0.30032191233326627 }, { "epoch": 3.858985634711228, "grad_norm": 0.24375514582605803, "learning_rate": 0.00018903499782174806, "loss": 2.9565320014953613, "step": 6583, "token_acc": 0.3046423225611923 }, { "epoch": 3.8595719730284372, "grad_norm": 0.24682260971358824, "learning_rate": 0.00018903058482233197, "loss": 2.9611656665802, "step": 6584, "token_acc": 0.3038019269743901 }, { "epoch": 3.8601583113456464, "grad_norm": 0.27799446773964115, "learning_rate": 0.00018902617098659355, "loss": 2.993495464324951, "step": 6585, "token_acc": 0.29933959969746127 }, { "epoch": 3.8607446496628555, "grad_norm": 0.2763036721744753, "learning_rate": 0.00018902175631457417, "loss": 2.9699997901916504, "step": 6586, "token_acc": 0.3037248762961001 }, { "epoch": 3.8613309879800646, "grad_norm": 0.3033940454127729, "learning_rate": 0.00018901734080631536, "loss": 2.9681100845336914, "step": 6587, "token_acc": 0.303348460374843 }, { "epoch": 3.8619173262972737, "grad_norm": 0.2784206850543003, "learning_rate": 0.00018901292446185859, "loss": 2.981411933898926, "step": 6588, "token_acc": 0.3005357695800493 }, { "epoch": 3.862503664614483, "grad_norm": 0.2612354992627761, "learning_rate": 0.00018900850728124536, "loss": 3.0085861682891846, "step": 6589, "token_acc": 0.29694844130866305 }, { "epoch": 3.8630900029316915, "grad_norm": 0.3149991636738378, "learning_rate": 0.0001890040892645171, "loss": 2.9808101654052734, "step": 6590, "token_acc": 0.29982053141214526 }, { "epoch": 3.8636763412489006, "grad_norm": 0.40096297645010476, "learning_rate": 0.0001889996704117154, "loss": 2.9964959621429443, "step": 6591, "token_acc": 0.2990876897355222 }, { "epoch": 3.8642626795661097, "grad_norm": 0.3759961880757574, "learning_rate": 0.00018899525072288168, "loss": 2.9703972339630127, "step": 6592, "token_acc": 0.302446519323957 }, { "epoch": 3.864849017883319, "grad_norm": 0.30962284190031536, "learning_rate": 0.00018899083019805754, "loss": 2.9514222145080566, "step": 6593, "token_acc": 0.3035770473022216 }, { "epoch": 3.8654353562005275, "grad_norm": 0.2797327000604344, "learning_rate": 0.00018898640883728446, "loss": 2.94486665725708, "step": 6594, "token_acc": 0.3068007112204382 }, { "epoch": 3.8660216945177366, "grad_norm": 0.3165109061810221, "learning_rate": 0.00018898198664060395, "loss": 2.9469738006591797, "step": 6595, "token_acc": 0.30442420012298815 }, { "epoch": 3.8666080328349457, "grad_norm": 0.2686144491908403, "learning_rate": 0.00018897756360805763, "loss": 2.9870944023132324, "step": 6596, "token_acc": 0.3003847929015563 }, { "epoch": 3.867194371152155, "grad_norm": 0.27060693351158177, "learning_rate": 0.00018897313973968697, "loss": 2.9807047843933105, "step": 6597, "token_acc": 0.299737125732762 }, { "epoch": 3.867780709469364, "grad_norm": 0.2624501927585736, "learning_rate": 0.00018896871503553355, "loss": 2.962324380874634, "step": 6598, "token_acc": 0.30281936976349905 }, { "epoch": 3.868367047786573, "grad_norm": 0.22024054527219022, "learning_rate": 0.00018896428949563896, "loss": 2.9282186031341553, "step": 6599, "token_acc": 0.30940650583624907 }, { "epoch": 3.868953386103782, "grad_norm": 0.3003255056871707, "learning_rate": 0.00018895986312004475, "loss": 3.009282112121582, "step": 6600, "token_acc": 0.2969149529720053 }, { "epoch": 3.869539724420991, "grad_norm": 0.21971218301402018, "learning_rate": 0.00018895543590879247, "loss": 2.9734272956848145, "step": 6601, "token_acc": 0.30134884118155714 }, { "epoch": 3.8701260627382, "grad_norm": 0.29897030765720656, "learning_rate": 0.00018895100786192373, "loss": 2.9890599250793457, "step": 6602, "token_acc": 0.3013773111004432 }, { "epoch": 3.870712401055409, "grad_norm": 0.2455968303443717, "learning_rate": 0.0001889465789794802, "loss": 2.9506888389587402, "step": 6603, "token_acc": 0.30582732832007986 }, { "epoch": 3.871298739372618, "grad_norm": 0.2661505922192627, "learning_rate": 0.00018894214926150338, "loss": 2.9331350326538086, "step": 6604, "token_acc": 0.3072227088419545 }, { "epoch": 3.871885077689827, "grad_norm": 0.2764553014798297, "learning_rate": 0.00018893771870803492, "loss": 2.9683642387390137, "step": 6605, "token_acc": 0.30380053451324573 }, { "epoch": 3.872471416007036, "grad_norm": 0.24987557665629767, "learning_rate": 0.0001889332873191164, "loss": 2.952374219894409, "step": 6606, "token_acc": 0.3028951541896259 }, { "epoch": 3.873057754324245, "grad_norm": 0.3125239165562528, "learning_rate": 0.00018892885509478954, "loss": 2.9447720050811768, "step": 6607, "token_acc": 0.3066290928609769 }, { "epoch": 3.873644092641454, "grad_norm": 0.2447664245438412, "learning_rate": 0.0001889244220350959, "loss": 3.016688346862793, "step": 6608, "token_acc": 0.2944239226033421 }, { "epoch": 3.8742304309586633, "grad_norm": 0.3380046160889902, "learning_rate": 0.00018891998814007715, "loss": 2.9483423233032227, "step": 6609, "token_acc": 0.3063433998345527 }, { "epoch": 3.8748167692758724, "grad_norm": 0.29053080853390506, "learning_rate": 0.0001889155534097749, "loss": 2.9640774726867676, "step": 6610, "token_acc": 0.30336190224351495 }, { "epoch": 3.875403107593081, "grad_norm": 0.30449922224142834, "learning_rate": 0.00018891111784423087, "loss": 3.0250210762023926, "step": 6611, "token_acc": 0.2939440106646353 }, { "epoch": 3.87598944591029, "grad_norm": 0.23451655621413303, "learning_rate": 0.00018890668144348668, "loss": 3.0070528984069824, "step": 6612, "token_acc": 0.2973025568108105 }, { "epoch": 3.8765757842274993, "grad_norm": 0.2678397291227879, "learning_rate": 0.00018890224420758407, "loss": 3.0150537490844727, "step": 6613, "token_acc": 0.2967995867657425 }, { "epoch": 3.8771621225447084, "grad_norm": 0.26214380594391734, "learning_rate": 0.00018889780613656464, "loss": 2.9843482971191406, "step": 6614, "token_acc": 0.30008882087611627 }, { "epoch": 3.8777484608619175, "grad_norm": 0.32610892180228557, "learning_rate": 0.00018889336723047008, "loss": 3.0186028480529785, "step": 6615, "token_acc": 0.29446573504174245 }, { "epoch": 3.878334799179126, "grad_norm": 0.34055448049035714, "learning_rate": 0.00018888892748934218, "loss": 2.985410690307617, "step": 6616, "token_acc": 0.2982729575762252 }, { "epoch": 3.8789211374963353, "grad_norm": 0.26096876699023536, "learning_rate": 0.00018888448691322253, "loss": 2.9608302116394043, "step": 6617, "token_acc": 0.30279321940841397 }, { "epoch": 3.8795074758135444, "grad_norm": 0.2592938861485805, "learning_rate": 0.00018888004550215293, "loss": 2.9746205806732178, "step": 6618, "token_acc": 0.3006277431866898 }, { "epoch": 3.8800938141307535, "grad_norm": 0.29374199501137177, "learning_rate": 0.00018887560325617507, "loss": 2.993506669998169, "step": 6619, "token_acc": 0.2983843683473608 }, { "epoch": 3.8806801524479626, "grad_norm": 0.23343553620098012, "learning_rate": 0.00018887116017533067, "loss": 2.976466178894043, "step": 6620, "token_acc": 0.30247849061445076 }, { "epoch": 3.8812664907651717, "grad_norm": 0.28125736025367254, "learning_rate": 0.0001888667162596615, "loss": 2.96417498588562, "step": 6621, "token_acc": 0.3029697081564429 }, { "epoch": 3.8818528290823804, "grad_norm": 0.2538792065483055, "learning_rate": 0.00018886227150920922, "loss": 3.009347915649414, "step": 6622, "token_acc": 0.2963773363137403 }, { "epoch": 3.8824391673995895, "grad_norm": 0.26553775339762836, "learning_rate": 0.0001888578259240157, "loss": 2.9885189533233643, "step": 6623, "token_acc": 0.3004351655864218 }, { "epoch": 3.8830255057167986, "grad_norm": 0.23495111482500267, "learning_rate": 0.0001888533795041226, "loss": 2.9778409004211426, "step": 6624, "token_acc": 0.3005599203160312 }, { "epoch": 3.8836118440340077, "grad_norm": 0.23949151438282304, "learning_rate": 0.00018884893224957176, "loss": 2.9597413539886475, "step": 6625, "token_acc": 0.30309328520880435 }, { "epoch": 3.8841981823512164, "grad_norm": 0.23497873600284389, "learning_rate": 0.00018884448416040493, "loss": 2.998344898223877, "step": 6626, "token_acc": 0.2976496127914501 }, { "epoch": 3.8847845206684255, "grad_norm": 0.21587033977052048, "learning_rate": 0.0001888400352366639, "loss": 3.004761219024658, "step": 6627, "token_acc": 0.29682110343714446 }, { "epoch": 3.8853708589856346, "grad_norm": 0.24969744940200198, "learning_rate": 0.00018883558547839042, "loss": 3.0008902549743652, "step": 6628, "token_acc": 0.297086806206952 }, { "epoch": 3.8859571973028437, "grad_norm": 0.2412223863704887, "learning_rate": 0.00018883113488562633, "loss": 2.95741605758667, "step": 6629, "token_acc": 0.30567092028304804 }, { "epoch": 3.886543535620053, "grad_norm": 0.2660658390821338, "learning_rate": 0.00018882668345841344, "loss": 3.001864433288574, "step": 6630, "token_acc": 0.2982506174906853 }, { "epoch": 3.887129873937262, "grad_norm": 0.2463285791102486, "learning_rate": 0.00018882223119679354, "loss": 2.9841654300689697, "step": 6631, "token_acc": 0.299462497075899 }, { "epoch": 3.887716212254471, "grad_norm": 0.22612302055758363, "learning_rate": 0.00018881777810080848, "loss": 2.971144676208496, "step": 6632, "token_acc": 0.3019725294410264 }, { "epoch": 3.8883025505716797, "grad_norm": 0.2517708960294958, "learning_rate": 0.0001888133241705001, "loss": 2.954951286315918, "step": 6633, "token_acc": 0.3054076485986082 }, { "epoch": 3.888888888888889, "grad_norm": 0.2653754782302431, "learning_rate": 0.00018880886940591016, "loss": 2.9475808143615723, "step": 6634, "token_acc": 0.30563719727251354 }, { "epoch": 3.889475227206098, "grad_norm": 0.2817371743030177, "learning_rate": 0.0001888044138070806, "loss": 3.0366148948669434, "step": 6635, "token_acc": 0.2947911516898253 }, { "epoch": 3.890061565523307, "grad_norm": 0.2836071043199749, "learning_rate": 0.00018879995737405324, "loss": 2.9504294395446777, "step": 6636, "token_acc": 0.30356567036228216 }, { "epoch": 3.8906479038405157, "grad_norm": 0.26358054784821444, "learning_rate": 0.00018879550010686994, "loss": 2.984335422515869, "step": 6637, "token_acc": 0.30244030424572416 }, { "epoch": 3.891234242157725, "grad_norm": 0.24424673486908102, "learning_rate": 0.00018879104200557255, "loss": 2.9907312393188477, "step": 6638, "token_acc": 0.2999737824909844 }, { "epoch": 3.891820580474934, "grad_norm": 0.2573698541705401, "learning_rate": 0.000188786583070203, "loss": 2.9807116985321045, "step": 6639, "token_acc": 0.3016705354085011 }, { "epoch": 3.892406918792143, "grad_norm": 0.2457970213698467, "learning_rate": 0.0001887821233008031, "loss": 2.9726996421813965, "step": 6640, "token_acc": 0.30085238733521596 }, { "epoch": 3.892993257109352, "grad_norm": 0.27429160107036615, "learning_rate": 0.00018877766269741485, "loss": 2.983583927154541, "step": 6641, "token_acc": 0.3002059745677536 }, { "epoch": 3.8935795954265613, "grad_norm": 0.286163928529811, "learning_rate": 0.00018877320126008002, "loss": 2.9630837440490723, "step": 6642, "token_acc": 0.30331265357381015 }, { "epoch": 3.8941659337437704, "grad_norm": 0.2767036903622396, "learning_rate": 0.00018876873898884063, "loss": 3.0131726264953613, "step": 6643, "token_acc": 0.29637767610157717 }, { "epoch": 3.894752272060979, "grad_norm": 0.29226693828062505, "learning_rate": 0.00018876427588373855, "loss": 2.938687801361084, "step": 6644, "token_acc": 0.3063164223898699 }, { "epoch": 3.895338610378188, "grad_norm": 0.2549602347961865, "learning_rate": 0.0001887598119448157, "loss": 2.9561405181884766, "step": 6645, "token_acc": 0.3039101709813707 }, { "epoch": 3.8959249486953973, "grad_norm": 0.2684099165668575, "learning_rate": 0.000188755347172114, "loss": 2.958974838256836, "step": 6646, "token_acc": 0.3040535567844902 }, { "epoch": 3.8965112870126064, "grad_norm": 0.28050509497263837, "learning_rate": 0.00018875088156567547, "loss": 2.975895881652832, "step": 6647, "token_acc": 0.3009517259514731 }, { "epoch": 3.897097625329815, "grad_norm": 0.3307004023140116, "learning_rate": 0.00018874641512554193, "loss": 2.9834132194519043, "step": 6648, "token_acc": 0.3017969558301977 }, { "epoch": 3.897683963647024, "grad_norm": 0.2978112259235787, "learning_rate": 0.00018874194785175545, "loss": 3.007200241088867, "step": 6649, "token_acc": 0.29563803614278095 }, { "epoch": 3.8982703019642333, "grad_norm": 0.2830289180697127, "learning_rate": 0.00018873747974435795, "loss": 2.954601287841797, "step": 6650, "token_acc": 0.3065585151677867 }, { "epoch": 3.8988566402814424, "grad_norm": 0.2730273361561211, "learning_rate": 0.0001887330108033914, "loss": 3.034597873687744, "step": 6651, "token_acc": 0.2936712306103552 }, { "epoch": 3.8994429785986515, "grad_norm": 0.2489265504183231, "learning_rate": 0.00018872854102889778, "loss": 2.9668939113616943, "step": 6652, "token_acc": 0.30229206581423684 }, { "epoch": 3.9000293169158606, "grad_norm": 0.28273924084914726, "learning_rate": 0.00018872407042091907, "loss": 2.983332872390747, "step": 6653, "token_acc": 0.2992593430202974 }, { "epoch": 3.9006156552330697, "grad_norm": 0.24252612102820797, "learning_rate": 0.0001887195989794973, "loss": 2.9591875076293945, "step": 6654, "token_acc": 0.3032617946535689 }, { "epoch": 3.9012019935502784, "grad_norm": 0.21934129097145666, "learning_rate": 0.00018871512670467445, "loss": 2.950644016265869, "step": 6655, "token_acc": 0.30633798271905166 }, { "epoch": 3.9017883318674875, "grad_norm": 0.2587007173024275, "learning_rate": 0.00018871065359649252, "loss": 2.9710941314697266, "step": 6656, "token_acc": 0.3023864611225206 }, { "epoch": 3.9023746701846966, "grad_norm": 0.22998476477185303, "learning_rate": 0.0001887061796549935, "loss": 3.008894920349121, "step": 6657, "token_acc": 0.2977257352115325 }, { "epoch": 3.9029610085019057, "grad_norm": 0.2740375265682306, "learning_rate": 0.0001887017048802195, "loss": 2.9860899448394775, "step": 6658, "token_acc": 0.29987601818399967 }, { "epoch": 3.9035473468191144, "grad_norm": 0.2331201957062955, "learning_rate": 0.0001886972292722125, "loss": 2.916191816329956, "step": 6659, "token_acc": 0.3096351209365017 }, { "epoch": 3.9041336851363235, "grad_norm": 0.25891759390324937, "learning_rate": 0.00018869275283101456, "loss": 2.936060905456543, "step": 6660, "token_acc": 0.3073071549338654 }, { "epoch": 3.9047200234535326, "grad_norm": 0.24260118343385476, "learning_rate": 0.00018868827555666771, "loss": 2.9556992053985596, "step": 6661, "token_acc": 0.3028853715952322 }, { "epoch": 3.9053063617707418, "grad_norm": 0.2729552877252817, "learning_rate": 0.00018868379744921404, "loss": 2.9791150093078613, "step": 6662, "token_acc": 0.3010581472521356 }, { "epoch": 3.905892700087951, "grad_norm": 0.3124095888779568, "learning_rate": 0.00018867931850869555, "loss": 2.9738237857818604, "step": 6663, "token_acc": 0.30141635610932693 }, { "epoch": 3.90647903840516, "grad_norm": 0.28528815077524716, "learning_rate": 0.0001886748387351544, "loss": 2.9784393310546875, "step": 6664, "token_acc": 0.302279112243603 }, { "epoch": 3.9070653767223686, "grad_norm": 0.26914262399552624, "learning_rate": 0.00018867035812863262, "loss": 2.9897382259368896, "step": 6665, "token_acc": 0.30008254153121217 }, { "epoch": 3.9076517150395778, "grad_norm": 0.2620991239098661, "learning_rate": 0.00018866587668917232, "loss": 2.9942331314086914, "step": 6666, "token_acc": 0.2996946078960036 }, { "epoch": 3.908238053356787, "grad_norm": 0.382617384140643, "learning_rate": 0.00018866139441681558, "loss": 2.9819374084472656, "step": 6667, "token_acc": 0.30054543678639545 }, { "epoch": 3.908824391673996, "grad_norm": 0.40462677720926665, "learning_rate": 0.0001886569113116045, "loss": 2.9759955406188965, "step": 6668, "token_acc": 0.3001475040757705 }, { "epoch": 3.909410729991205, "grad_norm": 0.2817685339523738, "learning_rate": 0.00018865242737358122, "loss": 2.9759418964385986, "step": 6669, "token_acc": 0.30132286924130025 }, { "epoch": 3.9099970683084138, "grad_norm": 0.28204952404280176, "learning_rate": 0.00018864794260278785, "loss": 3.0075559616088867, "step": 6670, "token_acc": 0.29701503553278086 }, { "epoch": 3.910583406625623, "grad_norm": 0.3088966121298939, "learning_rate": 0.00018864345699926648, "loss": 3.008556842803955, "step": 6671, "token_acc": 0.29775123271727777 }, { "epoch": 3.911169744942832, "grad_norm": 0.24020410682945567, "learning_rate": 0.00018863897056305932, "loss": 2.972257614135742, "step": 6672, "token_acc": 0.30153817537369343 }, { "epoch": 3.911756083260041, "grad_norm": 0.3318332743695136, "learning_rate": 0.00018863448329420844, "loss": 2.9934463500976562, "step": 6673, "token_acc": 0.30060680568890535 }, { "epoch": 3.91234242157725, "grad_norm": 0.24597747425358377, "learning_rate": 0.00018862999519275606, "loss": 2.955934524536133, "step": 6674, "token_acc": 0.3042511874458761 }, { "epoch": 3.9129287598944593, "grad_norm": 0.3181839379228489, "learning_rate": 0.00018862550625874428, "loss": 2.9687986373901367, "step": 6675, "token_acc": 0.3025857144670534 }, { "epoch": 3.913515098211668, "grad_norm": 0.23861709256364186, "learning_rate": 0.00018862101649221532, "loss": 2.9907801151275635, "step": 6676, "token_acc": 0.3006237324455669 }, { "epoch": 3.914101436528877, "grad_norm": 0.2739791161942881, "learning_rate": 0.0001886165258932113, "loss": 2.941749095916748, "step": 6677, "token_acc": 0.3073049247359243 }, { "epoch": 3.914687774846086, "grad_norm": 0.23549711403840526, "learning_rate": 0.00018861203446177442, "loss": 2.9294729232788086, "step": 6678, "token_acc": 0.30900740598249493 }, { "epoch": 3.9152741131632953, "grad_norm": 0.26645897253211187, "learning_rate": 0.00018860754219794692, "loss": 2.981844902038574, "step": 6679, "token_acc": 0.30028378338843215 }, { "epoch": 3.915860451480504, "grad_norm": 0.23248579314557088, "learning_rate": 0.00018860304910177096, "loss": 2.988368511199951, "step": 6680, "token_acc": 0.29872042760393996 }, { "epoch": 3.916446789797713, "grad_norm": 0.26265769508347514, "learning_rate": 0.00018859855517328871, "loss": 2.988191604614258, "step": 6681, "token_acc": 0.2993813847303027 }, { "epoch": 3.917033128114922, "grad_norm": 0.2721945776908509, "learning_rate": 0.00018859406041254247, "loss": 3.002068519592285, "step": 6682, "token_acc": 0.2966658763635985 }, { "epoch": 3.9176194664321313, "grad_norm": 0.23651154349850323, "learning_rate": 0.0001885895648195744, "loss": 3.011507034301758, "step": 6683, "token_acc": 0.29744212177747625 }, { "epoch": 3.9182058047493404, "grad_norm": 0.2857830774184962, "learning_rate": 0.00018858506839442672, "loss": 2.9982192516326904, "step": 6684, "token_acc": 0.29824278616795313 }, { "epoch": 3.9187921430665495, "grad_norm": 0.27910292689126925, "learning_rate": 0.0001885805711371417, "loss": 2.979325771331787, "step": 6685, "token_acc": 0.30059245157397585 }, { "epoch": 3.9193784813837587, "grad_norm": 0.23210769833271055, "learning_rate": 0.00018857607304776158, "loss": 2.954697608947754, "step": 6686, "token_acc": 0.3069179363806469 }, { "epoch": 3.9199648197009673, "grad_norm": 0.2426971957680676, "learning_rate": 0.00018857157412632863, "loss": 2.9683780670166016, "step": 6687, "token_acc": 0.30241355121976565 }, { "epoch": 3.9205511580181764, "grad_norm": 0.27815189472815216, "learning_rate": 0.00018856707437288507, "loss": 2.941561222076416, "step": 6688, "token_acc": 0.3067292128617168 }, { "epoch": 3.9211374963353856, "grad_norm": 0.2838777392455147, "learning_rate": 0.0001885625737874732, "loss": 2.9664716720581055, "step": 6689, "token_acc": 0.3022922711964925 }, { "epoch": 3.9217238346525947, "grad_norm": 0.24015118604739444, "learning_rate": 0.0001885580723701353, "loss": 2.997640371322632, "step": 6690, "token_acc": 0.29694977159500663 }, { "epoch": 3.9223101729698033, "grad_norm": 0.26932663321063127, "learning_rate": 0.0001885535701209136, "loss": 3.0059115886688232, "step": 6691, "token_acc": 0.2984698069105152 }, { "epoch": 3.9228965112870124, "grad_norm": 0.2450360482989737, "learning_rate": 0.00018854906703985052, "loss": 3.0054855346679688, "step": 6692, "token_acc": 0.2972920902264788 }, { "epoch": 3.9234828496042216, "grad_norm": 0.24135419153231888, "learning_rate": 0.0001885445631269882, "loss": 2.9718894958496094, "step": 6693, "token_acc": 0.30213827005763666 }, { "epoch": 3.9240691879214307, "grad_norm": 0.20733617101589707, "learning_rate": 0.00018854005838236907, "loss": 2.9810047149658203, "step": 6694, "token_acc": 0.30001229526423046 }, { "epoch": 3.92465552623864, "grad_norm": 0.2482746054414359, "learning_rate": 0.00018853555280603536, "loss": 3.0050950050354004, "step": 6695, "token_acc": 0.2962661080429125 }, { "epoch": 3.925241864555849, "grad_norm": 0.2437572470359346, "learning_rate": 0.00018853104639802946, "loss": 2.9590678215026855, "step": 6696, "token_acc": 0.30386676625129394 }, { "epoch": 3.925828202873058, "grad_norm": 0.22526899777181897, "learning_rate": 0.0001885265391583937, "loss": 2.9736289978027344, "step": 6697, "token_acc": 0.30161480834697085 }, { "epoch": 3.9264145411902667, "grad_norm": 0.2248672416308465, "learning_rate": 0.00018852203108717035, "loss": 2.9654767513275146, "step": 6698, "token_acc": 0.3020724940435782 }, { "epoch": 3.927000879507476, "grad_norm": 0.23801344476314878, "learning_rate": 0.0001885175221844018, "loss": 2.974339485168457, "step": 6699, "token_acc": 0.3018122724374139 }, { "epoch": 3.927587217824685, "grad_norm": 0.24412606760224878, "learning_rate": 0.00018851301245013043, "loss": 2.953859567642212, "step": 6700, "token_acc": 0.3060422800456221 }, { "epoch": 3.928173556141894, "grad_norm": 0.3448065097607578, "learning_rate": 0.0001885085018843986, "loss": 3.0057570934295654, "step": 6701, "token_acc": 0.29516110301725046 }, { "epoch": 3.9287598944591027, "grad_norm": 0.5342747009782947, "learning_rate": 0.00018850399048724864, "loss": 3.0397000312805176, "step": 6702, "token_acc": 0.2924180072658666 }, { "epoch": 3.929346232776312, "grad_norm": 0.29935721550933975, "learning_rate": 0.00018849947825872295, "loss": 3.025165557861328, "step": 6703, "token_acc": 0.2957742319957909 }, { "epoch": 3.929932571093521, "grad_norm": 0.35227942705131465, "learning_rate": 0.0001884949651988639, "loss": 2.9858784675598145, "step": 6704, "token_acc": 0.30065772591602175 }, { "epoch": 3.93051890941073, "grad_norm": 0.31267133984855744, "learning_rate": 0.00018849045130771392, "loss": 2.963439702987671, "step": 6705, "token_acc": 0.3041682771000267 }, { "epoch": 3.931105247727939, "grad_norm": 0.2753930172292787, "learning_rate": 0.00018848593658531542, "loss": 2.9809625148773193, "step": 6706, "token_acc": 0.299745481656683 }, { "epoch": 3.9316915860451482, "grad_norm": 0.22391815326384154, "learning_rate": 0.00018848142103171074, "loss": 2.9947967529296875, "step": 6707, "token_acc": 0.2974418261043125 }, { "epoch": 3.9322779243623573, "grad_norm": 0.29136291934432906, "learning_rate": 0.00018847690464694235, "loss": 3.0012121200561523, "step": 6708, "token_acc": 0.29734666065536813 }, { "epoch": 3.932864262679566, "grad_norm": 0.23693068982099624, "learning_rate": 0.00018847238743105265, "loss": 2.984525203704834, "step": 6709, "token_acc": 0.2996593308376834 }, { "epoch": 3.933450600996775, "grad_norm": 0.257219104597974, "learning_rate": 0.00018846786938408412, "loss": 3.01507830619812, "step": 6710, "token_acc": 0.2973228338167252 }, { "epoch": 3.9340369393139842, "grad_norm": 0.2712616398425281, "learning_rate": 0.00018846335050607915, "loss": 2.991961717605591, "step": 6711, "token_acc": 0.29910618836793335 }, { "epoch": 3.9346232776311933, "grad_norm": 0.2175791430664531, "learning_rate": 0.0001884588307970802, "loss": 2.942702531814575, "step": 6712, "token_acc": 0.3064930947913098 }, { "epoch": 3.935209615948402, "grad_norm": 0.27374914362069525, "learning_rate": 0.00018845431025712976, "loss": 2.9535255432128906, "step": 6713, "token_acc": 0.3045037567597339 }, { "epoch": 3.935795954265611, "grad_norm": 0.2146644429450761, "learning_rate": 0.00018844978888627026, "loss": 3.003509044647217, "step": 6714, "token_acc": 0.2983390016924991 }, { "epoch": 3.9363822925828202, "grad_norm": 0.2548154634490993, "learning_rate": 0.00018844526668454416, "loss": 2.9654130935668945, "step": 6715, "token_acc": 0.3020009611603827 }, { "epoch": 3.9369686309000294, "grad_norm": 0.23083478923313672, "learning_rate": 0.00018844074365199397, "loss": 2.995351791381836, "step": 6716, "token_acc": 0.29794829762044156 }, { "epoch": 3.9375549692172385, "grad_norm": 0.28509096399661604, "learning_rate": 0.0001884362197886622, "loss": 2.967449188232422, "step": 6717, "token_acc": 0.30224573251203896 }, { "epoch": 3.9381413075344476, "grad_norm": 0.23592641383866594, "learning_rate": 0.00018843169509459129, "loss": 3.0169780254364014, "step": 6718, "token_acc": 0.29644500362274295 }, { "epoch": 3.9387276458516562, "grad_norm": 0.2729500266007118, "learning_rate": 0.00018842716956982375, "loss": 2.928776741027832, "step": 6719, "token_acc": 0.3085829506698908 }, { "epoch": 3.9393139841688654, "grad_norm": 0.24984817604975365, "learning_rate": 0.00018842264321440212, "loss": 2.991912364959717, "step": 6720, "token_acc": 0.29745179425208845 }, { "epoch": 3.9399003224860745, "grad_norm": 0.2650022539419051, "learning_rate": 0.00018841811602836894, "loss": 3.01686429977417, "step": 6721, "token_acc": 0.29551065289161355 }, { "epoch": 3.9404866608032836, "grad_norm": 0.28380291512288974, "learning_rate": 0.00018841358801176668, "loss": 3.016180992126465, "step": 6722, "token_acc": 0.29580694303870114 }, { "epoch": 3.9410729991204922, "grad_norm": 0.24804405403461538, "learning_rate": 0.0001884090591646379, "loss": 2.9789557456970215, "step": 6723, "token_acc": 0.301797160440482 }, { "epoch": 3.9416593374377014, "grad_norm": 0.2399389473635261, "learning_rate": 0.00018840452948702514, "loss": 3.006746292114258, "step": 6724, "token_acc": 0.29700139451204216 }, { "epoch": 3.9422456757549105, "grad_norm": 0.2825573584632716, "learning_rate": 0.00018839999897897093, "loss": 2.9718286991119385, "step": 6725, "token_acc": 0.3027985607859583 }, { "epoch": 3.9428320140721196, "grad_norm": 0.2378724108255232, "learning_rate": 0.00018839546764051786, "loss": 2.9617390632629395, "step": 6726, "token_acc": 0.30351177763696247 }, { "epoch": 3.9434183523893287, "grad_norm": 0.2536960931711908, "learning_rate": 0.0001883909354717085, "loss": 3.0065436363220215, "step": 6727, "token_acc": 0.29690043880731243 }, { "epoch": 3.944004690706538, "grad_norm": 0.24768087383794138, "learning_rate": 0.00018838640247258545, "loss": 2.9516966342926025, "step": 6728, "token_acc": 0.3045352611691942 }, { "epoch": 3.944591029023747, "grad_norm": 0.24610096818774838, "learning_rate": 0.0001883818686431912, "loss": 2.943861961364746, "step": 6729, "token_acc": 0.30548240116403785 }, { "epoch": 3.9451773673409556, "grad_norm": 0.2977890103816516, "learning_rate": 0.00018837733398356838, "loss": 2.9800877571105957, "step": 6730, "token_acc": 0.3013265213501212 }, { "epoch": 3.9457637056581647, "grad_norm": 0.22772336913162497, "learning_rate": 0.00018837279849375963, "loss": 2.9788689613342285, "step": 6731, "token_acc": 0.3010798768067865 }, { "epoch": 3.946350043975374, "grad_norm": 0.28514128536165245, "learning_rate": 0.00018836826217380752, "loss": 2.997581958770752, "step": 6732, "token_acc": 0.29697987419489247 }, { "epoch": 3.946936382292583, "grad_norm": 0.2408616192881167, "learning_rate": 0.00018836372502375467, "loss": 3.000856399536133, "step": 6733, "token_acc": 0.29821835833699495 }, { "epoch": 3.9475227206097916, "grad_norm": 0.2810211890300591, "learning_rate": 0.00018835918704364365, "loss": 2.9607861042022705, "step": 6734, "token_acc": 0.3031808366901115 }, { "epoch": 3.9481090589270007, "grad_norm": 0.3721457343335993, "learning_rate": 0.00018835464823351716, "loss": 2.9481794834136963, "step": 6735, "token_acc": 0.30523759604028716 }, { "epoch": 3.94869539724421, "grad_norm": 0.3226434525145338, "learning_rate": 0.0001883501085934178, "loss": 2.965211868286133, "step": 6736, "token_acc": 0.3030965252925201 }, { "epoch": 3.949281735561419, "grad_norm": 0.2849766560330644, "learning_rate": 0.00018834556812338823, "loss": 2.985299587249756, "step": 6737, "token_acc": 0.3008067415555964 }, { "epoch": 3.949868073878628, "grad_norm": 0.34291130232988004, "learning_rate": 0.00018834102682347112, "loss": 3.019843816757202, "step": 6738, "token_acc": 0.2951466191718084 }, { "epoch": 3.950454412195837, "grad_norm": 0.34828503828766616, "learning_rate": 0.00018833648469370907, "loss": 2.9998016357421875, "step": 6739, "token_acc": 0.29816482091853563 }, { "epoch": 3.9510407505130463, "grad_norm": 0.26848142731771146, "learning_rate": 0.0001883319417341448, "loss": 3.0172119140625, "step": 6740, "token_acc": 0.2944113093576305 }, { "epoch": 3.951627088830255, "grad_norm": 0.3757257089541708, "learning_rate": 0.00018832739794482096, "loss": 3.0265755653381348, "step": 6741, "token_acc": 0.293521873528629 }, { "epoch": 3.952213427147464, "grad_norm": 0.21302904121821553, "learning_rate": 0.00018832285332578023, "loss": 2.987481117248535, "step": 6742, "token_acc": 0.30050292496107067 }, { "epoch": 3.952799765464673, "grad_norm": 0.2940721433710636, "learning_rate": 0.00018831830787706535, "loss": 2.992220401763916, "step": 6743, "token_acc": 0.3006512338892902 }, { "epoch": 3.9533861037818823, "grad_norm": 0.21356996805635348, "learning_rate": 0.00018831376159871894, "loss": 3.0146684646606445, "step": 6744, "token_acc": 0.29469122426868904 }, { "epoch": 3.953972442099091, "grad_norm": 0.2782581852332759, "learning_rate": 0.00018830921449078373, "loss": 2.963684558868408, "step": 6745, "token_acc": 0.3026602225858913 }, { "epoch": 3.9545587804163, "grad_norm": 0.2618278956787487, "learning_rate": 0.00018830466655330247, "loss": 2.9898948669433594, "step": 6746, "token_acc": 0.2990431303492121 }, { "epoch": 3.955145118733509, "grad_norm": 0.26411360515532745, "learning_rate": 0.00018830011778631786, "loss": 2.986818790435791, "step": 6747, "token_acc": 0.30089953258532726 }, { "epoch": 3.9557314570507183, "grad_norm": 0.27602320964989147, "learning_rate": 0.00018829556818987265, "loss": 2.994004726409912, "step": 6748, "token_acc": 0.2989589825878234 }, { "epoch": 3.9563177953679274, "grad_norm": 0.2914662116213642, "learning_rate": 0.0001882910177640095, "loss": 2.9968347549438477, "step": 6749, "token_acc": 0.29892500584235954 }, { "epoch": 3.9569041336851365, "grad_norm": 0.2707021838356128, "learning_rate": 0.00018828646650877128, "loss": 2.997046709060669, "step": 6750, "token_acc": 0.29701084302041614 }, { "epoch": 3.9574904720023456, "grad_norm": 0.2909293188522531, "learning_rate": 0.00018828191442420063, "loss": 3.0266470909118652, "step": 6751, "token_acc": 0.29594473590095915 }, { "epoch": 3.9580768103195543, "grad_norm": 0.24077010139663063, "learning_rate": 0.00018827736151034037, "loss": 2.9700400829315186, "step": 6752, "token_acc": 0.30128910991493 }, { "epoch": 3.9586631486367634, "grad_norm": 0.2707599993823994, "learning_rate": 0.00018827280776723324, "loss": 2.9679665565490723, "step": 6753, "token_acc": 0.303233648506837 }, { "epoch": 3.9592494869539725, "grad_norm": 0.27108208792957694, "learning_rate": 0.00018826825319492204, "loss": 2.9435768127441406, "step": 6754, "token_acc": 0.3064626511542446 }, { "epoch": 3.9598358252711816, "grad_norm": 0.2625951319089346, "learning_rate": 0.00018826369779344955, "loss": 3.0129075050354004, "step": 6755, "token_acc": 0.29509989870034703 }, { "epoch": 3.9604221635883903, "grad_norm": 0.2982722701120704, "learning_rate": 0.00018825914156285855, "loss": 2.9858531951904297, "step": 6756, "token_acc": 0.3011798745849476 }, { "epoch": 3.9610085019055994, "grad_norm": 0.3671582135871774, "learning_rate": 0.00018825458450319184, "loss": 2.972569465637207, "step": 6757, "token_acc": 0.30175144353905764 }, { "epoch": 3.9615948402228085, "grad_norm": 0.31822372583403363, "learning_rate": 0.00018825002661449223, "loss": 2.9502756595611572, "step": 6758, "token_acc": 0.3049355800898979 }, { "epoch": 3.9621811785400176, "grad_norm": 0.29943348317112517, "learning_rate": 0.00018824546789680255, "loss": 2.9721460342407227, "step": 6759, "token_acc": 0.3020162776848255 }, { "epoch": 3.9627675168572267, "grad_norm": 0.33220010175026204, "learning_rate": 0.00018824090835016565, "loss": 2.9790258407592773, "step": 6760, "token_acc": 0.3010301650244124 }, { "epoch": 3.963353855174436, "grad_norm": 0.24525993244889766, "learning_rate": 0.00018823634797462426, "loss": 2.9715735912323, "step": 6761, "token_acc": 0.3028212949762525 }, { "epoch": 3.963940193491645, "grad_norm": 0.2807718891394711, "learning_rate": 0.0001882317867702213, "loss": 2.9553141593933105, "step": 6762, "token_acc": 0.3049802690324625 }, { "epoch": 3.9645265318088536, "grad_norm": 0.23330554934775408, "learning_rate": 0.00018822722473699958, "loss": 2.979844093322754, "step": 6763, "token_acc": 0.3016093044829673 }, { "epoch": 3.9651128701260627, "grad_norm": 0.31647614568306526, "learning_rate": 0.000188222661875002, "loss": 2.9736900329589844, "step": 6764, "token_acc": 0.3008516382378206 }, { "epoch": 3.965699208443272, "grad_norm": 0.26130259170910786, "learning_rate": 0.00018821809818427137, "loss": 2.939664840698242, "step": 6765, "token_acc": 0.3059006457131091 }, { "epoch": 3.966285546760481, "grad_norm": 0.27701252227024686, "learning_rate": 0.0001882135336648506, "loss": 2.9174704551696777, "step": 6766, "token_acc": 0.3099732001144849 }, { "epoch": 3.9668718850776896, "grad_norm": 0.2698149330486192, "learning_rate": 0.00018820896831678256, "loss": 3.00138258934021, "step": 6767, "token_acc": 0.29681123783614105 }, { "epoch": 3.9674582233948987, "grad_norm": 0.30534898789471326, "learning_rate": 0.00018820440214011011, "loss": 3.01220703125, "step": 6768, "token_acc": 0.2964907440051913 }, { "epoch": 3.968044561712108, "grad_norm": 0.24498760661080554, "learning_rate": 0.00018819983513487616, "loss": 3.010448455810547, "step": 6769, "token_acc": 0.29770500492359975 }, { "epoch": 3.968630900029317, "grad_norm": 0.3043936681446897, "learning_rate": 0.00018819526730112361, "loss": 2.9761781692504883, "step": 6770, "token_acc": 0.30152095922738 }, { "epoch": 3.969217238346526, "grad_norm": 0.2663148257534456, "learning_rate": 0.00018819069863889535, "loss": 2.9302144050598145, "step": 6771, "token_acc": 0.3073616654082623 }, { "epoch": 3.969803576663735, "grad_norm": 0.237462570604375, "learning_rate": 0.00018818612914823433, "loss": 2.9407782554626465, "step": 6772, "token_acc": 0.30635882146062876 }, { "epoch": 3.970389914980944, "grad_norm": 0.32117142702392415, "learning_rate": 0.0001881815588291835, "loss": 2.980024814605713, "step": 6773, "token_acc": 0.3003881088330319 }, { "epoch": 3.970976253298153, "grad_norm": 0.23111150447555812, "learning_rate": 0.0001881769876817857, "loss": 2.933063507080078, "step": 6774, "token_acc": 0.3073785239932257 }, { "epoch": 3.971562591615362, "grad_norm": 0.2610491755144798, "learning_rate": 0.00018817241570608394, "loss": 2.9211087226867676, "step": 6775, "token_acc": 0.3102465377659004 }, { "epoch": 3.972148929932571, "grad_norm": 0.26054823907501207, "learning_rate": 0.00018816784290212114, "loss": 2.9758048057556152, "step": 6776, "token_acc": 0.3019210422587806 }, { "epoch": 3.97273526824978, "grad_norm": 0.2385174360994277, "learning_rate": 0.00018816326926994026, "loss": 3.026453733444214, "step": 6777, "token_acc": 0.29289401643560825 }, { "epoch": 3.973321606566989, "grad_norm": 0.25604700494317345, "learning_rate": 0.00018815869480958428, "loss": 2.9858181476593018, "step": 6778, "token_acc": 0.30215400428628575 }, { "epoch": 3.973907944884198, "grad_norm": 0.2282562617277907, "learning_rate": 0.00018815411952109617, "loss": 2.9625072479248047, "step": 6779, "token_acc": 0.3027356734036608 }, { "epoch": 3.974494283201407, "grad_norm": 0.25919321277160245, "learning_rate": 0.00018814954340451884, "loss": 3.0089173316955566, "step": 6780, "token_acc": 0.29615116777373046 }, { "epoch": 3.9750806215186163, "grad_norm": 0.2529452818591886, "learning_rate": 0.00018814496645989536, "loss": 2.9495909214019775, "step": 6781, "token_acc": 0.30609142243912413 }, { "epoch": 3.9756669598358254, "grad_norm": 0.24065454400460834, "learning_rate": 0.00018814038868726873, "loss": 3.055450439453125, "step": 6782, "token_acc": 0.2920182962570486 }, { "epoch": 3.9762532981530345, "grad_norm": 0.23490452149378216, "learning_rate": 0.0001881358100866819, "loss": 2.9737353324890137, "step": 6783, "token_acc": 0.3017115028189002 }, { "epoch": 3.976839636470243, "grad_norm": 0.22653160540067888, "learning_rate": 0.0001881312306581779, "loss": 2.9702277183532715, "step": 6784, "token_acc": 0.30145397322468903 }, { "epoch": 3.9774259747874523, "grad_norm": 0.24136439047655175, "learning_rate": 0.00018812665040179974, "loss": 3.010288715362549, "step": 6785, "token_acc": 0.29607907374409703 }, { "epoch": 3.9780123131046614, "grad_norm": 0.22942669704587268, "learning_rate": 0.00018812206931759044, "loss": 2.9914684295654297, "step": 6786, "token_acc": 0.2985744193001695 }, { "epoch": 3.9785986514218705, "grad_norm": 0.25289077789476483, "learning_rate": 0.00018811748740559306, "loss": 2.9701802730560303, "step": 6787, "token_acc": 0.301797229205895 }, { "epoch": 3.979184989739079, "grad_norm": 0.26288878477655336, "learning_rate": 0.0001881129046658506, "loss": 2.9892754554748535, "step": 6788, "token_acc": 0.2998869067547511 }, { "epoch": 3.9797713280562883, "grad_norm": 0.2707327154319935, "learning_rate": 0.00018810832109840617, "loss": 2.9541473388671875, "step": 6789, "token_acc": 0.3046050285649786 }, { "epoch": 3.9803576663734974, "grad_norm": 0.422053307629299, "learning_rate": 0.00018810373670330278, "loss": 2.9699063301086426, "step": 6790, "token_acc": 0.30276202456080503 }, { "epoch": 3.9809440046907065, "grad_norm": 0.5311450207612103, "learning_rate": 0.00018809915148058353, "loss": 2.97745680809021, "step": 6791, "token_acc": 0.3011652602294524 }, { "epoch": 3.9815303430079156, "grad_norm": 0.2636961692842945, "learning_rate": 0.00018809456543029143, "loss": 2.9863481521606445, "step": 6792, "token_acc": 0.2992351302215003 }, { "epoch": 3.9821166813251248, "grad_norm": 0.49579220722114775, "learning_rate": 0.00018808997855246959, "loss": 2.9743685722351074, "step": 6793, "token_acc": 0.30131669740585215 }, { "epoch": 3.982703019642334, "grad_norm": 0.25582776572123345, "learning_rate": 0.0001880853908471611, "loss": 3.0076003074645996, "step": 6794, "token_acc": 0.2989739461545342 }, { "epoch": 3.9832893579595425, "grad_norm": 0.4065192974116163, "learning_rate": 0.0001880808023144091, "loss": 2.9674878120422363, "step": 6795, "token_acc": 0.30174112389182334 }, { "epoch": 3.9838756962767516, "grad_norm": 0.3240933578073701, "learning_rate": 0.00018807621295425663, "loss": 2.9737026691436768, "step": 6796, "token_acc": 0.30184735564235193 }, { "epoch": 3.9844620345939608, "grad_norm": 0.3129066599396761, "learning_rate": 0.00018807162276674683, "loss": 2.9665474891662598, "step": 6797, "token_acc": 0.3029389763992932 }, { "epoch": 3.98504837291117, "grad_norm": 0.38436543022251884, "learning_rate": 0.00018806703175192283, "loss": 2.986518383026123, "step": 6798, "token_acc": 0.3019264275886969 }, { "epoch": 3.9856347112283785, "grad_norm": 0.26632490687678073, "learning_rate": 0.0001880624399098277, "loss": 3.0117480754852295, "step": 6799, "token_acc": 0.29685834657893273 }, { "epoch": 3.9862210495455876, "grad_norm": 0.27168005805669226, "learning_rate": 0.0001880578472405046, "loss": 2.971714496612549, "step": 6800, "token_acc": 0.3028493376103423 }, { "epoch": 3.9868073878627968, "grad_norm": 0.3007347093146597, "learning_rate": 0.00018805325374399674, "loss": 3.036787509918213, "step": 6801, "token_acc": 0.2940040021153279 }, { "epoch": 3.987393726180006, "grad_norm": 0.23480167232985188, "learning_rate": 0.0001880486594203472, "loss": 3.0148262977600098, "step": 6802, "token_acc": 0.29634861810402574 }, { "epoch": 3.987980064497215, "grad_norm": 0.28844957628243073, "learning_rate": 0.00018804406426959914, "loss": 2.9950225353240967, "step": 6803, "token_acc": 0.299117812174192 }, { "epoch": 3.988566402814424, "grad_norm": 0.3267480090876118, "learning_rate": 0.00018803946829179573, "loss": 2.959831714630127, "step": 6804, "token_acc": 0.3033361715303042 }, { "epoch": 3.989152741131633, "grad_norm": 0.2551761789983474, "learning_rate": 0.00018803487148698016, "loss": 2.8965282440185547, "step": 6805, "token_acc": 0.3137999390845125 }, { "epoch": 3.989739079448842, "grad_norm": 0.279856292192039, "learning_rate": 0.0001880302738551956, "loss": 2.984565019607544, "step": 6806, "token_acc": 0.30079113278380804 }, { "epoch": 3.990325417766051, "grad_norm": 0.2663470781715964, "learning_rate": 0.00018802567539648524, "loss": 2.9823896884918213, "step": 6807, "token_acc": 0.2989748397628311 }, { "epoch": 3.99091175608326, "grad_norm": 0.2623050758104646, "learning_rate": 0.00018802107611089227, "loss": 3.036341667175293, "step": 6808, "token_acc": 0.294514719471258 }, { "epoch": 3.991498094400469, "grad_norm": 0.2764828792927526, "learning_rate": 0.0001880164759984599, "loss": 2.972909927368164, "step": 6809, "token_acc": 0.30311552649841117 }, { "epoch": 3.992084432717678, "grad_norm": 0.27648808913692197, "learning_rate": 0.00018801187505923135, "loss": 2.9660797119140625, "step": 6810, "token_acc": 0.30340494363026005 }, { "epoch": 3.992670771034887, "grad_norm": 0.27046810527472004, "learning_rate": 0.0001880072732932498, "loss": 2.9569969177246094, "step": 6811, "token_acc": 0.3039939541418543 }, { "epoch": 3.993257109352096, "grad_norm": 0.31583071405922153, "learning_rate": 0.00018800267070055856, "loss": 2.9581847190856934, "step": 6812, "token_acc": 0.3065733972193214 }, { "epoch": 3.993843447669305, "grad_norm": 0.2450234315344251, "learning_rate": 0.00018799806728120078, "loss": 2.962909698486328, "step": 6813, "token_acc": 0.30253667695060865 }, { "epoch": 3.9944297859865143, "grad_norm": 0.2929593605116672, "learning_rate": 0.00018799346303521977, "loss": 2.9641213417053223, "step": 6814, "token_acc": 0.30367895811863693 }, { "epoch": 3.9950161243037234, "grad_norm": 0.2626130861557688, "learning_rate": 0.0001879888579626587, "loss": 2.970533847808838, "step": 6815, "token_acc": 0.30462212522345666 }, { "epoch": 3.9956024626209325, "grad_norm": 0.24576273767346254, "learning_rate": 0.0001879842520635609, "loss": 2.9424028396606445, "step": 6816, "token_acc": 0.3085727670617093 }, { "epoch": 3.996188800938141, "grad_norm": 0.27148725249277733, "learning_rate": 0.00018797964533796962, "loss": 2.9336466789245605, "step": 6817, "token_acc": 0.30644212834233137 }, { "epoch": 3.9967751392553503, "grad_norm": 0.23894465883073088, "learning_rate": 0.00018797503778592812, "loss": 2.9928646087646484, "step": 6818, "token_acc": 0.2979747386174724 }, { "epoch": 3.9973614775725594, "grad_norm": 0.300943096893216, "learning_rate": 0.00018797042940747968, "loss": 3.016228675842285, "step": 6819, "token_acc": 0.29471618136302663 }, { "epoch": 3.9979478158897686, "grad_norm": 0.24899592794644393, "learning_rate": 0.0001879658202026676, "loss": 2.9966681003570557, "step": 6820, "token_acc": 0.2958041834365423 }, { "epoch": 3.998534154206977, "grad_norm": 0.2587882244025811, "learning_rate": 0.00018796121017153518, "loss": 2.980031728744507, "step": 6821, "token_acc": 0.3012523419781087 }, { "epoch": 3.9991204925241863, "grad_norm": 0.296043718760882, "learning_rate": 0.0001879565993141257, "loss": 2.973658561706543, "step": 6822, "token_acc": 0.30175447784644394 }, { "epoch": 3.9997068308413954, "grad_norm": 0.2307610500363952, "learning_rate": 0.00018795198763048253, "loss": 2.9776337146759033, "step": 6823, "token_acc": 0.30236705249099516 }, { "epoch": 4.0, "grad_norm": 0.34804182990384036, "learning_rate": 0.0001879473751206489, "loss": 2.997058391571045, "step": 6824, "token_acc": 0.29656255541201204 }, { "epoch": 4.0, "eval_loss": 3.0742709636688232, "eval_runtime": 16.6778, "eval_samples_per_second": 15.35, "eval_steps_per_second": 1.919, "eval_token_acc": 0.2890191852159429, "step": 6824 }, { "epoch": 4.000586338317209, "grad_norm": 0.36997597595215104, "learning_rate": 0.00018794276178466825, "loss": 2.809704542160034, "step": 6825, "token_acc": 0.32451676463325535 }, { "epoch": 4.001172676634418, "grad_norm": 0.33900744913162, "learning_rate": 0.00018793814762258382, "loss": 2.8247694969177246, "step": 6826, "token_acc": 0.32178274099865434 }, { "epoch": 4.001759014951627, "grad_norm": 0.31652232796080954, "learning_rate": 0.00018793353263443901, "loss": 2.7757062911987305, "step": 6827, "token_acc": 0.3302428767299934 }, { "epoch": 4.0023453532688364, "grad_norm": 0.3389172728475794, "learning_rate": 0.00018792891682027713, "loss": 2.8247079849243164, "step": 6828, "token_acc": 0.32160448064345776 }, { "epoch": 4.002931691586046, "grad_norm": 0.3544336107182365, "learning_rate": 0.00018792430018014158, "loss": 2.797395706176758, "step": 6829, "token_acc": 0.3270148923388244 }, { "epoch": 4.003518029903254, "grad_norm": 0.3383968223869068, "learning_rate": 0.00018791968271407572, "loss": 2.732421398162842, "step": 6830, "token_acc": 0.33748829666064717 }, { "epoch": 4.004104368220463, "grad_norm": 0.333691439209077, "learning_rate": 0.0001879150644221229, "loss": 2.7615413665771484, "step": 6831, "token_acc": 0.3321200475941273 }, { "epoch": 4.004690706537672, "grad_norm": 0.3381708553043849, "learning_rate": 0.00018791044530432652, "loss": 2.7796247005462646, "step": 6832, "token_acc": 0.3298306647345111 }, { "epoch": 4.005277044854881, "grad_norm": 0.3070489547109363, "learning_rate": 0.00018790582536072994, "loss": 2.7613229751586914, "step": 6833, "token_acc": 0.3317651491183794 }, { "epoch": 4.00586338317209, "grad_norm": 0.2667117412931318, "learning_rate": 0.0001879012045913766, "loss": 2.8310656547546387, "step": 6834, "token_acc": 0.3219197205906989 }, { "epoch": 4.006449721489299, "grad_norm": 0.30541554795689796, "learning_rate": 0.00018789658299630992, "loss": 2.844264507293701, "step": 6835, "token_acc": 0.32203951085799587 }, { "epoch": 4.0070360598065085, "grad_norm": 0.27519184963179677, "learning_rate": 0.00018789196057557325, "loss": 2.7469067573547363, "step": 6836, "token_acc": 0.33306382891346087 }, { "epoch": 4.007622398123718, "grad_norm": 0.3345346497857896, "learning_rate": 0.00018788733732921008, "loss": 2.790536403656006, "step": 6837, "token_acc": 0.3262795125278708 }, { "epoch": 4.008208736440927, "grad_norm": 0.29805289471656893, "learning_rate": 0.0001878827132572638, "loss": 2.7803330421447754, "step": 6838, "token_acc": 0.32960974379301644 }, { "epoch": 4.008795074758136, "grad_norm": 0.3529577082254483, "learning_rate": 0.00018787808835977782, "loss": 2.7568695545196533, "step": 6839, "token_acc": 0.3329231032656097 }, { "epoch": 4.009381413075345, "grad_norm": 0.31774521769599595, "learning_rate": 0.00018787346263679565, "loss": 2.805112600326538, "step": 6840, "token_acc": 0.32637249413845415 }, { "epoch": 4.009967751392553, "grad_norm": 0.31368219394733554, "learning_rate": 0.0001878688360883607, "loss": 2.780792236328125, "step": 6841, "token_acc": 0.32936063789550063 }, { "epoch": 4.010554089709762, "grad_norm": 0.28134436042772193, "learning_rate": 0.00018786420871451642, "loss": 2.787625312805176, "step": 6842, "token_acc": 0.32785053193411345 }, { "epoch": 4.011140428026971, "grad_norm": 0.3148655213326251, "learning_rate": 0.0001878595805153063, "loss": 2.8253822326660156, "step": 6843, "token_acc": 0.3229981791706366 }, { "epoch": 4.0117267663441805, "grad_norm": 0.3182534964493563, "learning_rate": 0.00018785495149077383, "loss": 2.7305078506469727, "step": 6844, "token_acc": 0.33757875365708057 }, { "epoch": 4.01231310466139, "grad_norm": 0.2958896345586758, "learning_rate": 0.00018785032164096247, "loss": 2.8016417026519775, "step": 6845, "token_acc": 0.3270356449674975 }, { "epoch": 4.012899442978599, "grad_norm": 0.3573213669477871, "learning_rate": 0.00018784569096591574, "loss": 2.7652981281280518, "step": 6846, "token_acc": 0.33177658142664873 }, { "epoch": 4.013485781295808, "grad_norm": 0.32646934065996, "learning_rate": 0.00018784105946567713, "loss": 2.782241106033325, "step": 6847, "token_acc": 0.3289520871735069 }, { "epoch": 4.014072119613017, "grad_norm": 0.3093142605955748, "learning_rate": 0.00018783642714029005, "loss": 2.8232805728912354, "step": 6848, "token_acc": 0.32506771520613736 }, { "epoch": 4.014658457930226, "grad_norm": 0.33437466731138826, "learning_rate": 0.00018783179398979818, "loss": 2.776484489440918, "step": 6849, "token_acc": 0.32847820399163774 }, { "epoch": 4.015244796247435, "grad_norm": 0.2737094633540436, "learning_rate": 0.0001878271600142449, "loss": 2.7839882373809814, "step": 6850, "token_acc": 0.32758526032339447 }, { "epoch": 4.015831134564644, "grad_norm": 0.29259197162976963, "learning_rate": 0.00018782252521367388, "loss": 2.774160861968994, "step": 6851, "token_acc": 0.32935152712477855 }, { "epoch": 4.0164174728818525, "grad_norm": 0.2718931648698179, "learning_rate": 0.00018781788958812848, "loss": 2.7623963356018066, "step": 6852, "token_acc": 0.33307464581970925 }, { "epoch": 4.017003811199062, "grad_norm": 0.3241439474375975, "learning_rate": 0.0001878132531376524, "loss": 2.8113932609558105, "step": 6853, "token_acc": 0.3252248794772176 }, { "epoch": 4.017590149516271, "grad_norm": 0.30259265680455233, "learning_rate": 0.00018780861586228915, "loss": 2.8035244941711426, "step": 6854, "token_acc": 0.32413013950772435 }, { "epoch": 4.01817648783348, "grad_norm": 0.2852199936990498, "learning_rate": 0.00018780397776208224, "loss": 2.7789130210876465, "step": 6855, "token_acc": 0.3299833948145579 }, { "epoch": 4.018762826150689, "grad_norm": 0.3158733558305754, "learning_rate": 0.0001877993388370753, "loss": 2.7633376121520996, "step": 6856, "token_acc": 0.3312243693459922 }, { "epoch": 4.019349164467898, "grad_norm": 0.25732623428031365, "learning_rate": 0.00018779469908731188, "loss": 2.788844347000122, "step": 6857, "token_acc": 0.32929831927082626 }, { "epoch": 4.019935502785107, "grad_norm": 0.31042140665164614, "learning_rate": 0.00018779005851283554, "loss": 2.7620437145233154, "step": 6858, "token_acc": 0.33377648859882364 }, { "epoch": 4.020521841102316, "grad_norm": 0.3008021889394991, "learning_rate": 0.00018778541711368996, "loss": 2.7931137084960938, "step": 6859, "token_acc": 0.3269916180569226 }, { "epoch": 4.021108179419525, "grad_norm": 0.28329654320018144, "learning_rate": 0.0001877807748899186, "loss": 2.7533888816833496, "step": 6860, "token_acc": 0.3354698925701905 }, { "epoch": 4.0216945177367345, "grad_norm": 0.2710676671682988, "learning_rate": 0.0001877761318415652, "loss": 2.782712697982788, "step": 6861, "token_acc": 0.32884026724160875 }, { "epoch": 4.022280856053943, "grad_norm": 0.28894613102882455, "learning_rate": 0.00018777148796867332, "loss": 2.7825112342834473, "step": 6862, "token_acc": 0.3281718801255604 }, { "epoch": 4.022867194371152, "grad_norm": 0.2863772677554496, "learning_rate": 0.00018776684327128658, "loss": 2.7521257400512695, "step": 6863, "token_acc": 0.33326026166785183 }, { "epoch": 4.023453532688361, "grad_norm": 0.2616965983665461, "learning_rate": 0.00018776219774944858, "loss": 2.7520627975463867, "step": 6864, "token_acc": 0.33269347640427177 }, { "epoch": 4.02403987100557, "grad_norm": 0.2745740760072004, "learning_rate": 0.00018775755140320303, "loss": 2.788012742996216, "step": 6865, "token_acc": 0.3275942636995192 }, { "epoch": 4.024626209322779, "grad_norm": 0.2866265639456835, "learning_rate": 0.00018775290423259352, "loss": 2.7775301933288574, "step": 6866, "token_acc": 0.3302292526604533 }, { "epoch": 4.025212547639988, "grad_norm": 0.2739144262284417, "learning_rate": 0.00018774825623766374, "loss": 2.8072848320007324, "step": 6867, "token_acc": 0.3249855449551894 }, { "epoch": 4.025798885957197, "grad_norm": 0.27953365424063953, "learning_rate": 0.00018774360741845734, "loss": 2.776688575744629, "step": 6868, "token_acc": 0.32906569904241434 }, { "epoch": 4.0263852242744065, "grad_norm": 0.2689891638017352, "learning_rate": 0.00018773895777501794, "loss": 2.7590856552124023, "step": 6869, "token_acc": 0.3310644620202492 }, { "epoch": 4.026971562591616, "grad_norm": 0.3166968437748137, "learning_rate": 0.0001877343073073893, "loss": 2.7364020347595215, "step": 6870, "token_acc": 0.33491128391295943 }, { "epoch": 4.027557900908825, "grad_norm": 0.3354108839453728, "learning_rate": 0.00018772965601561507, "loss": 2.75793194770813, "step": 6871, "token_acc": 0.33087312679159825 }, { "epoch": 4.028144239226034, "grad_norm": 0.28742281669057507, "learning_rate": 0.00018772500389973893, "loss": 2.7706661224365234, "step": 6872, "token_acc": 0.33081962238436086 }, { "epoch": 4.028730577543242, "grad_norm": 0.3782253887071229, "learning_rate": 0.0001877203509598046, "loss": 2.7687172889709473, "step": 6873, "token_acc": 0.33154544598679875 }, { "epoch": 4.029316915860451, "grad_norm": 0.373791524176523, "learning_rate": 0.00018771569719585576, "loss": 2.8063082695007324, "step": 6874, "token_acc": 0.32528751156289576 }, { "epoch": 4.02990325417766, "grad_norm": 0.2724946527080424, "learning_rate": 0.00018771104260793613, "loss": 2.7572009563446045, "step": 6875, "token_acc": 0.3323743177932232 }, { "epoch": 4.030489592494869, "grad_norm": 0.40798013950818385, "learning_rate": 0.00018770638719608945, "loss": 2.7910990715026855, "step": 6876, "token_acc": 0.32817945332618725 }, { "epoch": 4.0310759308120785, "grad_norm": 0.3344593076500049, "learning_rate": 0.00018770173096035949, "loss": 2.7497029304504395, "step": 6877, "token_acc": 0.33301280203742617 }, { "epoch": 4.031662269129288, "grad_norm": 0.2839827085573838, "learning_rate": 0.0001876970739007899, "loss": 2.772012710571289, "step": 6878, "token_acc": 0.3324896152800963 }, { "epoch": 4.032248607446497, "grad_norm": 0.3336546844688524, "learning_rate": 0.0001876924160174245, "loss": 2.7675232887268066, "step": 6879, "token_acc": 0.33169038455488326 }, { "epoch": 4.032834945763706, "grad_norm": 0.271889986164176, "learning_rate": 0.00018768775731030704, "loss": 2.7831106185913086, "step": 6880, "token_acc": 0.3283844546681325 }, { "epoch": 4.033421284080915, "grad_norm": 0.3064084182953931, "learning_rate": 0.00018768309777948122, "loss": 2.7696099281311035, "step": 6881, "token_acc": 0.33226359543236933 }, { "epoch": 4.034007622398124, "grad_norm": 0.2777333778623159, "learning_rate": 0.00018767843742499088, "loss": 2.799945831298828, "step": 6882, "token_acc": 0.3258161867032082 }, { "epoch": 4.034593960715333, "grad_norm": 0.2955154347868756, "learning_rate": 0.00018767377624687975, "loss": 2.760730266571045, "step": 6883, "token_acc": 0.33243573284904654 }, { "epoch": 4.035180299032541, "grad_norm": 0.2592371792402468, "learning_rate": 0.00018766911424519163, "loss": 2.7805352210998535, "step": 6884, "token_acc": 0.32892527343677463 }, { "epoch": 4.0357666373497505, "grad_norm": 0.2794037223135334, "learning_rate": 0.00018766445141997032, "loss": 2.718114137649536, "step": 6885, "token_acc": 0.3398542559185279 }, { "epoch": 4.03635297566696, "grad_norm": 0.2689090575269661, "learning_rate": 0.00018765978777125962, "loss": 2.778414249420166, "step": 6886, "token_acc": 0.33069485604990234 }, { "epoch": 4.036939313984169, "grad_norm": 0.30376376240548936, "learning_rate": 0.00018765512329910333, "loss": 2.797227621078491, "step": 6887, "token_acc": 0.32605230132618795 }, { "epoch": 4.037525652301378, "grad_norm": 0.2692298278942264, "learning_rate": 0.00018765045800354528, "loss": 2.7535789012908936, "step": 6888, "token_acc": 0.33440590795199787 }, { "epoch": 4.038111990618587, "grad_norm": 0.29456685096097757, "learning_rate": 0.00018764579188462928, "loss": 2.771760940551758, "step": 6889, "token_acc": 0.3302968926263973 }, { "epoch": 4.038698328935796, "grad_norm": 0.28142022945830475, "learning_rate": 0.00018764112494239917, "loss": 2.778557777404785, "step": 6890, "token_acc": 0.33037032015144485 }, { "epoch": 4.039284667253005, "grad_norm": 0.2974819980768383, "learning_rate": 0.00018763645717689883, "loss": 2.7787203788757324, "step": 6891, "token_acc": 0.3297246204265891 }, { "epoch": 4.039871005570214, "grad_norm": 0.27747217849801065, "learning_rate": 0.00018763178858817204, "loss": 2.759206771850586, "step": 6892, "token_acc": 0.33202810830875834 }, { "epoch": 4.040457343887423, "grad_norm": 0.2760489793461656, "learning_rate": 0.00018762711917626266, "loss": 2.7717132568359375, "step": 6893, "token_acc": 0.33075960523036363 }, { "epoch": 4.0410436822046325, "grad_norm": 0.28495269287229136, "learning_rate": 0.00018762244894121458, "loss": 2.781449556350708, "step": 6894, "token_acc": 0.3279262517347046 }, { "epoch": 4.041630020521841, "grad_norm": 0.2773741551301759, "learning_rate": 0.00018761777788307168, "loss": 2.7549259662628174, "step": 6895, "token_acc": 0.33305276378594745 }, { "epoch": 4.04221635883905, "grad_norm": 0.28396673775215203, "learning_rate": 0.00018761310600187782, "loss": 2.8119614124298096, "step": 6896, "token_acc": 0.3237608581907918 }, { "epoch": 4.042802697156259, "grad_norm": 0.2780904414635531, "learning_rate": 0.0001876084332976769, "loss": 2.788419723510742, "step": 6897, "token_acc": 0.32691259313101945 }, { "epoch": 4.043389035473468, "grad_norm": 0.3045459288620365, "learning_rate": 0.00018760375977051278, "loss": 2.7625670433044434, "step": 6898, "token_acc": 0.3312883435582822 }, { "epoch": 4.043975373790677, "grad_norm": 0.2592457588450355, "learning_rate": 0.0001875990854204294, "loss": 2.7478976249694824, "step": 6899, "token_acc": 0.33196353646480586 }, { "epoch": 4.044561712107886, "grad_norm": 0.28383236375360427, "learning_rate": 0.00018759441024747064, "loss": 2.7483723163604736, "step": 6900, "token_acc": 0.3351584893377991 }, { "epoch": 4.045148050425095, "grad_norm": 0.2723310043807201, "learning_rate": 0.00018758973425168045, "loss": 2.807433843612671, "step": 6901, "token_acc": 0.32599103866222284 }, { "epoch": 4.0457343887423045, "grad_norm": 0.28602153419177223, "learning_rate": 0.0001875850574331027, "loss": 2.739790439605713, "step": 6902, "token_acc": 0.33592011381105946 }, { "epoch": 4.046320727059514, "grad_norm": 0.3158398124093764, "learning_rate": 0.0001875803797917814, "loss": 2.7709250450134277, "step": 6903, "token_acc": 0.3304399245501452 }, { "epoch": 4.046907065376723, "grad_norm": 0.3339713907102065, "learning_rate": 0.00018757570132776043, "loss": 2.766171455383301, "step": 6904, "token_acc": 0.3309611355929176 }, { "epoch": 4.047493403693931, "grad_norm": 0.42387878975002297, "learning_rate": 0.00018757102204108373, "loss": 2.735865592956543, "step": 6905, "token_acc": 0.3346720284791351 }, { "epoch": 4.04807974201114, "grad_norm": 0.4886871588915225, "learning_rate": 0.0001875663419317953, "loss": 2.7668159008026123, "step": 6906, "token_acc": 0.3309696511326894 }, { "epoch": 4.048666080328349, "grad_norm": 0.37874510041362275, "learning_rate": 0.00018756166099993913, "loss": 2.796940565109253, "step": 6907, "token_acc": 0.32617878307580594 }, { "epoch": 4.049252418645558, "grad_norm": 0.314298446354948, "learning_rate": 0.00018755697924555912, "loss": 2.7579712867736816, "step": 6908, "token_acc": 0.3329552784915876 }, { "epoch": 4.049838756962767, "grad_norm": 0.3832672785050187, "learning_rate": 0.00018755229666869925, "loss": 2.752161741256714, "step": 6909, "token_acc": 0.33266478086927803 }, { "epoch": 4.0504250952799765, "grad_norm": 0.2936386376660296, "learning_rate": 0.00018754761326940353, "loss": 2.7634830474853516, "step": 6910, "token_acc": 0.33242755247083533 }, { "epoch": 4.051011433597186, "grad_norm": 0.32265135429292935, "learning_rate": 0.00018754292904771597, "loss": 2.7729527950286865, "step": 6911, "token_acc": 0.32992748762987056 }, { "epoch": 4.051597771914395, "grad_norm": 0.27939899591944534, "learning_rate": 0.00018753824400368057, "loss": 2.7908830642700195, "step": 6912, "token_acc": 0.32677308012748457 }, { "epoch": 4.052184110231604, "grad_norm": 0.3676753953235489, "learning_rate": 0.0001875335581373413, "loss": 2.792667865753174, "step": 6913, "token_acc": 0.32852630747769324 }, { "epoch": 4.052770448548813, "grad_norm": 0.2637688120432847, "learning_rate": 0.00018752887144874223, "loss": 2.7679219245910645, "step": 6914, "token_acc": 0.33158920024770533 }, { "epoch": 4.053356786866022, "grad_norm": 0.33093191401428684, "learning_rate": 0.00018752418393792734, "loss": 2.7562692165374756, "step": 6915, "token_acc": 0.33231292427476816 }, { "epoch": 4.05394312518323, "grad_norm": 0.2521829343560796, "learning_rate": 0.0001875194956049407, "loss": 2.7012040615081787, "step": 6916, "token_acc": 0.34112499763246684 }, { "epoch": 4.054529463500439, "grad_norm": 0.3000176500552398, "learning_rate": 0.0001875148064498263, "loss": 2.7751495838165283, "step": 6917, "token_acc": 0.3308576658285902 }, { "epoch": 4.0551158018176485, "grad_norm": 0.2792806399142487, "learning_rate": 0.00018751011647262823, "loss": 2.774353504180908, "step": 6918, "token_acc": 0.3294375727212765 }, { "epoch": 4.055702140134858, "grad_norm": 0.3258240461061615, "learning_rate": 0.00018750542567339058, "loss": 2.7712550163269043, "step": 6919, "token_acc": 0.33067063755266185 }, { "epoch": 4.056288478452067, "grad_norm": 0.26955602050407346, "learning_rate": 0.00018750073405215733, "loss": 2.7577781677246094, "step": 6920, "token_acc": 0.3331525659454999 }, { "epoch": 4.056874816769276, "grad_norm": 0.29942012602178675, "learning_rate": 0.0001874960416089726, "loss": 2.7662367820739746, "step": 6921, "token_acc": 0.33172079272909577 }, { "epoch": 4.057461155086485, "grad_norm": 0.2575745026519664, "learning_rate": 0.00018749134834388049, "loss": 2.789682388305664, "step": 6922, "token_acc": 0.3279884769216919 }, { "epoch": 4.058047493403694, "grad_norm": 0.3052660068945356, "learning_rate": 0.00018748665425692503, "loss": 2.7984015941619873, "step": 6923, "token_acc": 0.32599318023088614 }, { "epoch": 4.058633831720903, "grad_norm": 0.2699691755048279, "learning_rate": 0.00018748195934815035, "loss": 2.8098926544189453, "step": 6924, "token_acc": 0.32484791166095806 }, { "epoch": 4.059220170038112, "grad_norm": 0.3370475519299112, "learning_rate": 0.0001874772636176005, "loss": 2.802156925201416, "step": 6925, "token_acc": 0.3251754456723401 }, { "epoch": 4.059806508355321, "grad_norm": 0.26608284016538064, "learning_rate": 0.0001874725670653197, "loss": 2.753708839416504, "step": 6926, "token_acc": 0.3331864829763936 }, { "epoch": 4.06039284667253, "grad_norm": 0.31024061405733094, "learning_rate": 0.00018746786969135197, "loss": 2.733388900756836, "step": 6927, "token_acc": 0.3366816330235066 }, { "epoch": 4.060979184989739, "grad_norm": 0.2825549507571758, "learning_rate": 0.00018746317149574148, "loss": 2.7752132415771484, "step": 6928, "token_acc": 0.33001740168553534 }, { "epoch": 4.061565523306948, "grad_norm": 0.26218068159211266, "learning_rate": 0.00018745847247853237, "loss": 2.792555809020996, "step": 6929, "token_acc": 0.32729730804405743 }, { "epoch": 4.062151861624157, "grad_norm": 0.28565571409610485, "learning_rate": 0.00018745377263976873, "loss": 2.7336134910583496, "step": 6930, "token_acc": 0.33658813010791955 }, { "epoch": 4.062738199941366, "grad_norm": 0.25917494324516965, "learning_rate": 0.00018744907197949475, "loss": 2.78607177734375, "step": 6931, "token_acc": 0.32839207787848274 }, { "epoch": 4.063324538258575, "grad_norm": 0.275856600393436, "learning_rate": 0.0001874443704977546, "loss": 2.7626137733459473, "step": 6932, "token_acc": 0.3318768629764896 }, { "epoch": 4.063910876575784, "grad_norm": 0.24019919760464942, "learning_rate": 0.00018743966819459237, "loss": 2.732649326324463, "step": 6933, "token_acc": 0.33588962625890506 }, { "epoch": 4.064497214892993, "grad_norm": 0.27187415472866, "learning_rate": 0.00018743496507005235, "loss": 2.8132128715515137, "step": 6934, "token_acc": 0.3224202450757547 }, { "epoch": 4.0650835532102025, "grad_norm": 0.24414871822487383, "learning_rate": 0.0001874302611241786, "loss": 2.7322864532470703, "step": 6935, "token_acc": 0.335468319382995 }, { "epoch": 4.065669891527412, "grad_norm": 0.28580262242350185, "learning_rate": 0.0001874255563570154, "loss": 2.772169589996338, "step": 6936, "token_acc": 0.3297258726288874 }, { "epoch": 4.066256229844621, "grad_norm": 0.24595331072833043, "learning_rate": 0.00018742085076860687, "loss": 2.785900115966797, "step": 6937, "token_acc": 0.32830294757475303 }, { "epoch": 4.066842568161829, "grad_norm": 0.2994795502731689, "learning_rate": 0.00018741614435899729, "loss": 2.777104377746582, "step": 6938, "token_acc": 0.3310012743868956 }, { "epoch": 4.067428906479038, "grad_norm": 0.2541971012890222, "learning_rate": 0.0001874114371282308, "loss": 2.7946391105651855, "step": 6939, "token_acc": 0.3265326393046111 }, { "epoch": 4.068015244796247, "grad_norm": 0.2866739259589239, "learning_rate": 0.00018740672907635163, "loss": 2.78688645362854, "step": 6940, "token_acc": 0.3291741824170653 }, { "epoch": 4.068601583113456, "grad_norm": 0.318026047304951, "learning_rate": 0.00018740202020340406, "loss": 2.777865409851074, "step": 6941, "token_acc": 0.32845302694035317 }, { "epoch": 4.069187921430665, "grad_norm": 0.2636248194563254, "learning_rate": 0.00018739731050943225, "loss": 2.779660224914551, "step": 6942, "token_acc": 0.32937057545943443 }, { "epoch": 4.0697742597478745, "grad_norm": 0.34685037688241127, "learning_rate": 0.00018739259999448052, "loss": 2.770552158355713, "step": 6943, "token_acc": 0.3302495266536336 }, { "epoch": 4.070360598065084, "grad_norm": 0.28457131153826515, "learning_rate": 0.00018738788865859304, "loss": 2.735234260559082, "step": 6944, "token_acc": 0.3351677232327737 }, { "epoch": 4.070946936382293, "grad_norm": 0.2946063281682729, "learning_rate": 0.00018738317650181412, "loss": 2.737989902496338, "step": 6945, "token_acc": 0.336115510013973 }, { "epoch": 4.071533274699502, "grad_norm": 0.3107838949698725, "learning_rate": 0.000187378463524188, "loss": 2.735621929168701, "step": 6946, "token_acc": 0.33495310265864653 }, { "epoch": 4.072119613016711, "grad_norm": 0.2639847972785085, "learning_rate": 0.00018737374972575897, "loss": 2.7650980949401855, "step": 6947, "token_acc": 0.33240904191336135 }, { "epoch": 4.07270595133392, "grad_norm": 0.2953356426811721, "learning_rate": 0.0001873690351065713, "loss": 2.7613000869750977, "step": 6948, "token_acc": 0.3319508468947194 }, { "epoch": 4.073292289651128, "grad_norm": 0.2448617502391117, "learning_rate": 0.00018736431966666925, "loss": 2.796015739440918, "step": 6949, "token_acc": 0.32686305914351205 }, { "epoch": 4.073878627968337, "grad_norm": 0.3349701806695207, "learning_rate": 0.00018735960340609715, "loss": 2.750030994415283, "step": 6950, "token_acc": 0.3334959310585645 }, { "epoch": 4.0744649662855466, "grad_norm": 0.2972968835392099, "learning_rate": 0.0001873548863248993, "loss": 2.78411602973938, "step": 6951, "token_acc": 0.32904960520448046 }, { "epoch": 4.075051304602756, "grad_norm": 0.2862035605160103, "learning_rate": 0.00018735016842312001, "loss": 2.767188310623169, "step": 6952, "token_acc": 0.32981540192677405 }, { "epoch": 4.075637642919965, "grad_norm": 0.28264675652244914, "learning_rate": 0.0001873454497008036, "loss": 2.8102285861968994, "step": 6953, "token_acc": 0.32528572060388317 }, { "epoch": 4.076223981237174, "grad_norm": 0.2677781927569503, "learning_rate": 0.00018734073015799435, "loss": 2.7505927085876465, "step": 6954, "token_acc": 0.3341662497696095 }, { "epoch": 4.076810319554383, "grad_norm": 0.2866412242122134, "learning_rate": 0.0001873360097947367, "loss": 2.7501988410949707, "step": 6955, "token_acc": 0.3330945817123129 }, { "epoch": 4.077396657871592, "grad_norm": 0.2572213606705688, "learning_rate": 0.00018733128861107487, "loss": 2.7422337532043457, "step": 6956, "token_acc": 0.3351038158698162 }, { "epoch": 4.077982996188801, "grad_norm": 0.2781125837809364, "learning_rate": 0.0001873265666070533, "loss": 2.7785842418670654, "step": 6957, "token_acc": 0.3300830435500397 }, { "epoch": 4.07856933450601, "grad_norm": 0.28590266231449196, "learning_rate": 0.00018732184378271626, "loss": 2.801027774810791, "step": 6958, "token_acc": 0.32767509404876755 }, { "epoch": 4.0791556728232194, "grad_norm": 0.27665116877733775, "learning_rate": 0.00018731712013810822, "loss": 2.7490527629852295, "step": 6959, "token_acc": 0.33425858139552933 }, { "epoch": 4.079742011140428, "grad_norm": 0.2881567320608162, "learning_rate": 0.00018731239567327347, "loss": 2.750871181488037, "step": 6960, "token_acc": 0.3347949084223901 }, { "epoch": 4.080328349457637, "grad_norm": 0.29411699909789923, "learning_rate": 0.00018730767038825644, "loss": 2.7507994174957275, "step": 6961, "token_acc": 0.33386738969682034 }, { "epoch": 4.080914687774846, "grad_norm": 0.30938966460423356, "learning_rate": 0.00018730294428310148, "loss": 2.778989315032959, "step": 6962, "token_acc": 0.3300888019386679 }, { "epoch": 4.081501026092055, "grad_norm": 0.2716111641880414, "learning_rate": 0.00018729821735785298, "loss": 2.7616281509399414, "step": 6963, "token_acc": 0.33293458252337077 }, { "epoch": 4.082087364409264, "grad_norm": 0.286731706949621, "learning_rate": 0.00018729348961255538, "loss": 2.7621986865997314, "step": 6964, "token_acc": 0.33139352165042907 }, { "epoch": 4.082673702726473, "grad_norm": 0.2800254923304573, "learning_rate": 0.00018728876104725308, "loss": 2.776677131652832, "step": 6965, "token_acc": 0.32902117496713507 }, { "epoch": 4.083260041043682, "grad_norm": 0.31442177367564694, "learning_rate": 0.0001872840316619905, "loss": 2.73514986038208, "step": 6966, "token_acc": 0.3359620082410142 }, { "epoch": 4.0838463793608915, "grad_norm": 0.29508438486586264, "learning_rate": 0.00018727930145681206, "loss": 2.772282123565674, "step": 6967, "token_acc": 0.33024621876456084 }, { "epoch": 4.084432717678101, "grad_norm": 0.2886810982556486, "learning_rate": 0.00018727457043176218, "loss": 2.758009910583496, "step": 6968, "token_acc": 0.33227547467740054 }, { "epoch": 4.08501905599531, "grad_norm": 0.32037873471429956, "learning_rate": 0.00018726983858688532, "loss": 2.765078067779541, "step": 6969, "token_acc": 0.32996333889780516 }, { "epoch": 4.085605394312518, "grad_norm": 0.37409292450447285, "learning_rate": 0.0001872651059222259, "loss": 2.8005900382995605, "step": 6970, "token_acc": 0.3271375464684015 }, { "epoch": 4.086191732629727, "grad_norm": 0.2938886905099938, "learning_rate": 0.00018726037243782843, "loss": 2.7676639556884766, "step": 6971, "token_acc": 0.33005086735359335 }, { "epoch": 4.086778070946936, "grad_norm": 0.2893332732306068, "learning_rate": 0.00018725563813373733, "loss": 2.76513671875, "step": 6972, "token_acc": 0.33118179900902706 }, { "epoch": 4.087364409264145, "grad_norm": 0.33046676274236186, "learning_rate": 0.0001872509030099971, "loss": 2.7479000091552734, "step": 6973, "token_acc": 0.3332185105076237 }, { "epoch": 4.087950747581354, "grad_norm": 0.26955341360066104, "learning_rate": 0.00018724616706665222, "loss": 2.7698910236358643, "step": 6974, "token_acc": 0.33168206763410774 }, { "epoch": 4.0885370858985635, "grad_norm": 0.2604521758871385, "learning_rate": 0.00018724143030374713, "loss": 2.7822229862213135, "step": 6975, "token_acc": 0.3290214994272534 }, { "epoch": 4.089123424215773, "grad_norm": 0.25591471146964134, "learning_rate": 0.0001872366927213264, "loss": 2.774961471557617, "step": 6976, "token_acc": 0.3283634468471819 }, { "epoch": 4.089709762532982, "grad_norm": 0.2669908508235807, "learning_rate": 0.00018723195431943448, "loss": 2.7355546951293945, "step": 6977, "token_acc": 0.33567018884599886 }, { "epoch": 4.090296100850191, "grad_norm": 0.25196534934350184, "learning_rate": 0.0001872272150981159, "loss": 2.7441518306732178, "step": 6978, "token_acc": 0.33401965323600696 }, { "epoch": 4.0908824391674, "grad_norm": 0.29341116461069644, "learning_rate": 0.00018722247505741514, "loss": 2.806053638458252, "step": 6979, "token_acc": 0.3239189841276395 }, { "epoch": 4.091468777484609, "grad_norm": 0.269197395566141, "learning_rate": 0.0001872177341973768, "loss": 2.7758655548095703, "step": 6980, "token_acc": 0.3302535503534142 }, { "epoch": 4.092055115801817, "grad_norm": 0.26560612438553594, "learning_rate": 0.0001872129925180454, "loss": 2.784797430038452, "step": 6981, "token_acc": 0.32876715945786805 }, { "epoch": 4.092641454119026, "grad_norm": 0.3366844470860098, "learning_rate": 0.0001872082500194654, "loss": 2.7610507011413574, "step": 6982, "token_acc": 0.3336090752459284 }, { "epoch": 4.0932277924362355, "grad_norm": 0.34951875845322655, "learning_rate": 0.00018720350670168144, "loss": 2.770429849624634, "step": 6983, "token_acc": 0.33193197264900937 }, { "epoch": 4.093814130753445, "grad_norm": 0.2637241933805337, "learning_rate": 0.00018719876256473802, "loss": 2.766040325164795, "step": 6984, "token_acc": 0.3314911507425239 }, { "epoch": 4.094400469070654, "grad_norm": 0.3162434060030911, "learning_rate": 0.00018719401760867972, "loss": 2.773322582244873, "step": 6985, "token_acc": 0.3310106870393112 }, { "epoch": 4.094986807387863, "grad_norm": 0.28569884990697536, "learning_rate": 0.00018718927183355115, "loss": 2.77522611618042, "step": 6986, "token_acc": 0.329738195528433 }, { "epoch": 4.095573145705072, "grad_norm": 0.25913497284786324, "learning_rate": 0.00018718452523939683, "loss": 2.724515914916992, "step": 6987, "token_acc": 0.3359277507638022 }, { "epoch": 4.096159484022281, "grad_norm": 0.27935033919243435, "learning_rate": 0.0001871797778262614, "loss": 2.7765698432922363, "step": 6988, "token_acc": 0.328036729317973 }, { "epoch": 4.09674582233949, "grad_norm": 0.2719017625940981, "learning_rate": 0.0001871750295941894, "loss": 2.7635622024536133, "step": 6989, "token_acc": 0.3317244464566759 }, { "epoch": 4.097332160656699, "grad_norm": 0.25469946765134416, "learning_rate": 0.00018717028054322552, "loss": 2.7829761505126953, "step": 6990, "token_acc": 0.32703086983748936 }, { "epoch": 4.097918498973908, "grad_norm": 0.24891389017139048, "learning_rate": 0.00018716553067341427, "loss": 2.7539639472961426, "step": 6991, "token_acc": 0.3325885245150684 }, { "epoch": 4.098504837291117, "grad_norm": 0.2585031128140516, "learning_rate": 0.00018716077998480034, "loss": 2.7908847332000732, "step": 6992, "token_acc": 0.32657609541367644 }, { "epoch": 4.099091175608326, "grad_norm": 0.27687698176399467, "learning_rate": 0.00018715602847742835, "loss": 2.7853105068206787, "step": 6993, "token_acc": 0.32809481168263027 }, { "epoch": 4.099677513925535, "grad_norm": 0.2571919110684222, "learning_rate": 0.00018715127615134288, "loss": 2.7836523056030273, "step": 6994, "token_acc": 0.32807189730150194 }, { "epoch": 4.100263852242744, "grad_norm": 0.2656065161465735, "learning_rate": 0.00018714652300658863, "loss": 2.7827649116516113, "step": 6995, "token_acc": 0.32866491766204053 }, { "epoch": 4.100850190559953, "grad_norm": 0.2628347208721259, "learning_rate": 0.00018714176904321023, "loss": 2.765716075897217, "step": 6996, "token_acc": 0.3299386874732345 }, { "epoch": 4.101436528877162, "grad_norm": 0.2710803609148174, "learning_rate": 0.00018713701426125234, "loss": 2.760533332824707, "step": 6997, "token_acc": 0.3320192728865528 }, { "epoch": 4.102022867194371, "grad_norm": 0.27170442463488476, "learning_rate": 0.0001871322586607596, "loss": 2.7863659858703613, "step": 6998, "token_acc": 0.3277534069062211 }, { "epoch": 4.10260920551158, "grad_norm": 0.3079328478046814, "learning_rate": 0.00018712750224177672, "loss": 2.7903757095336914, "step": 6999, "token_acc": 0.3267958857935452 }, { "epoch": 4.1031955438287895, "grad_norm": 0.3578986661606624, "learning_rate": 0.00018712274500434835, "loss": 2.7802817821502686, "step": 7000, "token_acc": 0.3291383989145183 }, { "epoch": 4.103781882145999, "grad_norm": 0.3120518322693373, "learning_rate": 0.00018711798694851916, "loss": 2.7869412899017334, "step": 7001, "token_acc": 0.3285645192752186 }, { "epoch": 4.104368220463208, "grad_norm": 0.27673372665174956, "learning_rate": 0.00018711322807433392, "loss": 2.7826361656188965, "step": 7002, "token_acc": 0.32803046296916116 }, { "epoch": 4.104954558780416, "grad_norm": 0.2701067293435275, "learning_rate": 0.0001871084683818373, "loss": 2.7830686569213867, "step": 7003, "token_acc": 0.3277646773680463 }, { "epoch": 4.105540897097625, "grad_norm": 0.3061785059713317, "learning_rate": 0.00018710370787107397, "loss": 2.790168285369873, "step": 7004, "token_acc": 0.32784299851509324 }, { "epoch": 4.106127235414834, "grad_norm": 0.26051961441749666, "learning_rate": 0.00018709894654208868, "loss": 2.827219247817993, "step": 7005, "token_acc": 0.32449667320106185 }, { "epoch": 4.106713573732043, "grad_norm": 0.3090480869904867, "learning_rate": 0.00018709418439492615, "loss": 2.7633821964263916, "step": 7006, "token_acc": 0.33233672449013 }, { "epoch": 4.107299912049252, "grad_norm": 0.2593030729150849, "learning_rate": 0.00018708942142963114, "loss": 2.7726902961730957, "step": 7007, "token_acc": 0.3313726369432128 }, { "epoch": 4.1078862503664615, "grad_norm": 0.27487392902951996, "learning_rate": 0.00018708465764624834, "loss": 2.807840347290039, "step": 7008, "token_acc": 0.3253213105705568 }, { "epoch": 4.108472588683671, "grad_norm": 0.2801469133155097, "learning_rate": 0.00018707989304482254, "loss": 2.759533405303955, "step": 7009, "token_acc": 0.33206257968987785 }, { "epoch": 4.10905892700088, "grad_norm": 0.25645991403593255, "learning_rate": 0.0001870751276253985, "loss": 2.8053030967712402, "step": 7010, "token_acc": 0.32579624767881155 }, { "epoch": 4.109645265318089, "grad_norm": 0.2746323135299995, "learning_rate": 0.00018707036138802097, "loss": 2.771796941757202, "step": 7011, "token_acc": 0.3321467927526204 }, { "epoch": 4.110231603635298, "grad_norm": 0.265106029438647, "learning_rate": 0.0001870655943327347, "loss": 2.7741711139678955, "step": 7012, "token_acc": 0.32933837902995683 }, { "epoch": 4.110817941952506, "grad_norm": 0.26188098833628304, "learning_rate": 0.0001870608264595845, "loss": 2.744208812713623, "step": 7013, "token_acc": 0.33597084879555544 }, { "epoch": 4.111404280269715, "grad_norm": 0.29865889116801675, "learning_rate": 0.0001870560577686152, "loss": 2.811337471008301, "step": 7014, "token_acc": 0.32440194285966034 }, { "epoch": 4.111990618586924, "grad_norm": 0.2820099505103101, "learning_rate": 0.0001870512882598715, "loss": 2.795687675476074, "step": 7015, "token_acc": 0.32662213688556246 }, { "epoch": 4.1125769569041335, "grad_norm": 0.2822595527425229, "learning_rate": 0.00018704651793339828, "loss": 2.7699217796325684, "step": 7016, "token_acc": 0.3312767175277295 }, { "epoch": 4.113163295221343, "grad_norm": 0.3725045013551885, "learning_rate": 0.0001870417467892403, "loss": 2.792863368988037, "step": 7017, "token_acc": 0.3268953378775965 }, { "epoch": 4.113749633538552, "grad_norm": 0.3206722410831035, "learning_rate": 0.0001870369748274424, "loss": 2.7744576930999756, "step": 7018, "token_acc": 0.327900455660708 }, { "epoch": 4.114335971855761, "grad_norm": 0.2653342546786799, "learning_rate": 0.0001870322020480494, "loss": 2.773062229156494, "step": 7019, "token_acc": 0.33105562902044666 }, { "epoch": 4.11492231017297, "grad_norm": 0.29228210617629496, "learning_rate": 0.00018702742845110612, "loss": 2.748831033706665, "step": 7020, "token_acc": 0.33241421331329857 }, { "epoch": 4.115508648490179, "grad_norm": 0.2707063354424606, "learning_rate": 0.00018702265403665745, "loss": 2.7499032020568848, "step": 7021, "token_acc": 0.3344534779669922 }, { "epoch": 4.116094986807388, "grad_norm": 0.28093681581677316, "learning_rate": 0.00018701787880474823, "loss": 2.767742156982422, "step": 7022, "token_acc": 0.3306166629524689 }, { "epoch": 4.116681325124597, "grad_norm": 0.24431384247063698, "learning_rate": 0.00018701310275542326, "loss": 2.745388984680176, "step": 7023, "token_acc": 0.3357383016779703 }, { "epoch": 4.1172676634418055, "grad_norm": 0.27872708923673, "learning_rate": 0.00018700832588872744, "loss": 2.790595769882202, "step": 7024, "token_acc": 0.3283110022521354 }, { "epoch": 4.117854001759015, "grad_norm": 0.2726547496097954, "learning_rate": 0.00018700354820470568, "loss": 2.706279754638672, "step": 7025, "token_acc": 0.3406320619504428 }, { "epoch": 4.118440340076224, "grad_norm": 0.3107360314112489, "learning_rate": 0.00018699876970340278, "loss": 2.786653518676758, "step": 7026, "token_acc": 0.32710481381325457 }, { "epoch": 4.119026678393433, "grad_norm": 0.3335487111517807, "learning_rate": 0.00018699399038486368, "loss": 2.801717758178711, "step": 7027, "token_acc": 0.3264239103098651 }, { "epoch": 4.119613016710642, "grad_norm": 0.27672355674194155, "learning_rate": 0.0001869892102491333, "loss": 2.7825257778167725, "step": 7028, "token_acc": 0.32844359496710895 }, { "epoch": 4.120199355027851, "grad_norm": 0.2765096560489484, "learning_rate": 0.00018698442929625646, "loss": 2.8233089447021484, "step": 7029, "token_acc": 0.32240019752171967 }, { "epoch": 4.12078569334506, "grad_norm": 0.3059329729677058, "learning_rate": 0.00018697964752627816, "loss": 2.7617125511169434, "step": 7030, "token_acc": 0.3316954522333451 }, { "epoch": 4.121372031662269, "grad_norm": 0.2710815583715179, "learning_rate": 0.00018697486493924326, "loss": 2.760650634765625, "step": 7031, "token_acc": 0.33190080739989997 }, { "epoch": 4.121958369979478, "grad_norm": 0.31203436338182305, "learning_rate": 0.00018697008153519673, "loss": 2.757140636444092, "step": 7032, "token_acc": 0.33304438915804635 }, { "epoch": 4.1225447082966875, "grad_norm": 0.33578580087262255, "learning_rate": 0.00018696529731418343, "loss": 2.7520766258239746, "step": 7033, "token_acc": 0.3345742932724167 }, { "epoch": 4.123131046613897, "grad_norm": 0.289357131677349, "learning_rate": 0.00018696051227624837, "loss": 2.7894434928894043, "step": 7034, "token_acc": 0.3264634008401748 }, { "epoch": 4.123717384931105, "grad_norm": 0.27819199448952175, "learning_rate": 0.00018695572642143647, "loss": 2.777121067047119, "step": 7035, "token_acc": 0.3277851542335577 }, { "epoch": 4.124303723248314, "grad_norm": 0.3009842204012059, "learning_rate": 0.00018695093974979273, "loss": 2.8255021572113037, "step": 7036, "token_acc": 0.32206532372479424 }, { "epoch": 4.124890061565523, "grad_norm": 0.30974712396712883, "learning_rate": 0.00018694615226136206, "loss": 2.759551525115967, "step": 7037, "token_acc": 0.33152108818030795 }, { "epoch": 4.125476399882732, "grad_norm": 0.2953854844520762, "learning_rate": 0.00018694136395618944, "loss": 2.7843942642211914, "step": 7038, "token_acc": 0.32956415072844664 }, { "epoch": 4.126062738199941, "grad_norm": 0.28009635260859733, "learning_rate": 0.0001869365748343199, "loss": 2.77339243888855, "step": 7039, "token_acc": 0.3305213550539104 }, { "epoch": 4.12664907651715, "grad_norm": 0.32887105735005084, "learning_rate": 0.00018693178489579833, "loss": 2.757084369659424, "step": 7040, "token_acc": 0.33373386423311424 }, { "epoch": 4.1272354148343595, "grad_norm": 0.3053424996870601, "learning_rate": 0.00018692699414066982, "loss": 2.768772840499878, "step": 7041, "token_acc": 0.33037440812785485 }, { "epoch": 4.127821753151569, "grad_norm": 0.25962555383164393, "learning_rate": 0.0001869222025689793, "loss": 2.825148105621338, "step": 7042, "token_acc": 0.321526552242924 }, { "epoch": 4.128408091468778, "grad_norm": 0.3101808558394585, "learning_rate": 0.00018691741018077185, "loss": 2.7873921394348145, "step": 7043, "token_acc": 0.32956636344841844 }, { "epoch": 4.128994429785987, "grad_norm": 0.35146631458379474, "learning_rate": 0.0001869126169760924, "loss": 2.7699458599090576, "step": 7044, "token_acc": 0.3312729667155207 }, { "epoch": 4.129580768103196, "grad_norm": 0.32041795317132327, "learning_rate": 0.0001869078229549861, "loss": 2.78788423538208, "step": 7045, "token_acc": 0.32715159249750037 }, { "epoch": 4.130167106420404, "grad_norm": 0.2951197907994588, "learning_rate": 0.0001869030281174979, "loss": 2.7604000568389893, "step": 7046, "token_acc": 0.3330716538058618 }, { "epoch": 4.130753444737613, "grad_norm": 0.32871960030887004, "learning_rate": 0.0001868982324636728, "loss": 2.7620162963867188, "step": 7047, "token_acc": 0.3314547276534344 }, { "epoch": 4.131339783054822, "grad_norm": 0.3301648556863799, "learning_rate": 0.00018689343599355597, "loss": 2.786294460296631, "step": 7048, "token_acc": 0.3275438922680549 }, { "epoch": 4.1319261213720315, "grad_norm": 0.2906726232084152, "learning_rate": 0.00018688863870719238, "loss": 2.8451647758483887, "step": 7049, "token_acc": 0.3210807841701215 }, { "epoch": 4.132512459689241, "grad_norm": 0.4210967496802527, "learning_rate": 0.0001868838406046271, "loss": 2.7999956607818604, "step": 7050, "token_acc": 0.32510103395106016 }, { "epoch": 4.13309879800645, "grad_norm": 0.2935659433205372, "learning_rate": 0.00018687904168590523, "loss": 2.8028855323791504, "step": 7051, "token_acc": 0.3267167102507736 }, { "epoch": 4.133685136323659, "grad_norm": 0.33054437937100584, "learning_rate": 0.00018687424195107182, "loss": 2.783092498779297, "step": 7052, "token_acc": 0.3278107338763298 }, { "epoch": 4.134271474640868, "grad_norm": 0.2634381035047187, "learning_rate": 0.00018686944140017196, "loss": 2.8198819160461426, "step": 7053, "token_acc": 0.3240459873422462 }, { "epoch": 4.134857812958077, "grad_norm": 0.36315860437658876, "learning_rate": 0.00018686464003325078, "loss": 2.7515907287597656, "step": 7054, "token_acc": 0.3351889375094307 }, { "epoch": 4.135444151275286, "grad_norm": 0.27387714814099007, "learning_rate": 0.00018685983785035335, "loss": 2.7970261573791504, "step": 7055, "token_acc": 0.3256825478294115 }, { "epoch": 4.136030489592494, "grad_norm": 0.3545461519717396, "learning_rate": 0.00018685503485152478, "loss": 2.843562602996826, "step": 7056, "token_acc": 0.31898898459566655 }, { "epoch": 4.1366168279097035, "grad_norm": 0.2874434061880429, "learning_rate": 0.00018685023103681022, "loss": 2.745453357696533, "step": 7057, "token_acc": 0.3346944932283193 }, { "epoch": 4.137203166226913, "grad_norm": 0.33855619322856323, "learning_rate": 0.00018684542640625475, "loss": 2.795011043548584, "step": 7058, "token_acc": 0.32832476815405065 }, { "epoch": 4.137789504544122, "grad_norm": 0.2669482462179286, "learning_rate": 0.00018684062095990353, "loss": 2.8409528732299805, "step": 7059, "token_acc": 0.32114618553623014 }, { "epoch": 4.138375842861331, "grad_norm": 0.2861106352799615, "learning_rate": 0.0001868358146978017, "loss": 2.8036770820617676, "step": 7060, "token_acc": 0.3253860287853887 }, { "epoch": 4.13896218117854, "grad_norm": 0.2449597642082394, "learning_rate": 0.00018683100761999438, "loss": 2.733694076538086, "step": 7061, "token_acc": 0.33732241219902853 }, { "epoch": 4.139548519495749, "grad_norm": 0.29058488972340873, "learning_rate": 0.00018682619972652678, "loss": 2.77097749710083, "step": 7062, "token_acc": 0.33256993774977767 }, { "epoch": 4.140134857812958, "grad_norm": 0.2662685747523196, "learning_rate": 0.00018682139101744403, "loss": 2.7678170204162598, "step": 7063, "token_acc": 0.3309265884012697 }, { "epoch": 4.140721196130167, "grad_norm": 0.29599596641628567, "learning_rate": 0.0001868165814927913, "loss": 2.8359062671661377, "step": 7064, "token_acc": 0.32174010260110836 }, { "epoch": 4.141307534447376, "grad_norm": 0.25726594615956855, "learning_rate": 0.0001868117711526138, "loss": 2.80977725982666, "step": 7065, "token_acc": 0.32590732536672784 }, { "epoch": 4.1418938727645855, "grad_norm": 0.28490856373810053, "learning_rate": 0.00018680695999695663, "loss": 2.783968687057495, "step": 7066, "token_acc": 0.3279909294688005 }, { "epoch": 4.142480211081795, "grad_norm": 0.2520894605068028, "learning_rate": 0.00018680214802586508, "loss": 2.7627320289611816, "step": 7067, "token_acc": 0.33140180161577576 }, { "epoch": 4.143066549399003, "grad_norm": 0.2783301064219893, "learning_rate": 0.00018679733523938432, "loss": 2.7848260402679443, "step": 7068, "token_acc": 0.328845948180744 }, { "epoch": 4.143652887716212, "grad_norm": 0.2658360803771178, "learning_rate": 0.00018679252163755953, "loss": 2.7919535636901855, "step": 7069, "token_acc": 0.3271269463371918 }, { "epoch": 4.144239226033421, "grad_norm": 0.24183446199370118, "learning_rate": 0.000186787707220436, "loss": 2.79420804977417, "step": 7070, "token_acc": 0.32717455206054796 }, { "epoch": 4.14482556435063, "grad_norm": 0.26054339657663267, "learning_rate": 0.00018678289198805888, "loss": 2.8057875633239746, "step": 7071, "token_acc": 0.32339594019566253 }, { "epoch": 4.145411902667839, "grad_norm": 0.2630474990463091, "learning_rate": 0.00018677807594047344, "loss": 2.773744583129883, "step": 7072, "token_acc": 0.33090469261454625 }, { "epoch": 4.145998240985048, "grad_norm": 0.2595537313094888, "learning_rate": 0.0001867732590777249, "loss": 2.764838695526123, "step": 7073, "token_acc": 0.33055181480056434 }, { "epoch": 4.1465845793022575, "grad_norm": 0.26794172093144475, "learning_rate": 0.00018676844139985853, "loss": 2.7939529418945312, "step": 7074, "token_acc": 0.32792273957955637 }, { "epoch": 4.147170917619467, "grad_norm": 0.24037160431563642, "learning_rate": 0.00018676362290691957, "loss": 2.7875704765319824, "step": 7075, "token_acc": 0.32778470299563056 }, { "epoch": 4.147757255936676, "grad_norm": 0.2729734541177915, "learning_rate": 0.00018675880359895328, "loss": 2.8028979301452637, "step": 7076, "token_acc": 0.32618341656263006 }, { "epoch": 4.148343594253885, "grad_norm": 0.2630554088208072, "learning_rate": 0.00018675398347600496, "loss": 2.809323787689209, "step": 7077, "token_acc": 0.32371711534055364 }, { "epoch": 4.148929932571093, "grad_norm": 0.2730631031382186, "learning_rate": 0.00018674916253811981, "loss": 2.768653392791748, "step": 7078, "token_acc": 0.33009128337353405 }, { "epoch": 4.149516270888302, "grad_norm": 0.26453828104729843, "learning_rate": 0.00018674434078534325, "loss": 2.7943623065948486, "step": 7079, "token_acc": 0.3277035462876876 }, { "epoch": 4.150102609205511, "grad_norm": 0.3340931310258282, "learning_rate": 0.00018673951821772047, "loss": 2.79005765914917, "step": 7080, "token_acc": 0.3275406506731599 }, { "epoch": 4.15068894752272, "grad_norm": 0.3506987576465481, "learning_rate": 0.00018673469483529678, "loss": 2.8223509788513184, "step": 7081, "token_acc": 0.32267923932814147 }, { "epoch": 4.1512752858399296, "grad_norm": 0.24428524079193453, "learning_rate": 0.00018672987063811754, "loss": 2.7531051635742188, "step": 7082, "token_acc": 0.335091730446931 }, { "epoch": 4.151861624157139, "grad_norm": 0.32776096009618627, "learning_rate": 0.000186725045626228, "loss": 2.8050754070281982, "step": 7083, "token_acc": 0.3240860176140168 }, { "epoch": 4.152447962474348, "grad_norm": 0.3156337991430243, "learning_rate": 0.00018672021979967353, "loss": 2.7837018966674805, "step": 7084, "token_acc": 0.3277810283039567 }, { "epoch": 4.153034300791557, "grad_norm": 0.27443673184627987, "learning_rate": 0.00018671539315849947, "loss": 2.791717052459717, "step": 7085, "token_acc": 0.3283897708450138 }, { "epoch": 4.153620639108766, "grad_norm": 0.25510788259141437, "learning_rate": 0.00018671056570275114, "loss": 2.813810348510742, "step": 7086, "token_acc": 0.3245557943826832 }, { "epoch": 4.154206977425975, "grad_norm": 0.26251494317763374, "learning_rate": 0.00018670573743247387, "loss": 2.7571256160736084, "step": 7087, "token_acc": 0.3326289915694603 }, { "epoch": 4.154793315743184, "grad_norm": 0.2344264704345542, "learning_rate": 0.00018670090834771306, "loss": 2.7841124534606934, "step": 7088, "token_acc": 0.3269944643438619 }, { "epoch": 4.1553796540603924, "grad_norm": 0.2553680099230396, "learning_rate": 0.00018669607844851402, "loss": 2.748168468475342, "step": 7089, "token_acc": 0.33395510574528264 }, { "epoch": 4.155965992377602, "grad_norm": 0.23980217953221, "learning_rate": 0.00018669124773492218, "loss": 2.7773807048797607, "step": 7090, "token_acc": 0.32837252565183267 }, { "epoch": 4.156552330694811, "grad_norm": 0.27065010800507466, "learning_rate": 0.00018668641620698287, "loss": 2.76983642578125, "step": 7091, "token_acc": 0.33109400840962894 }, { "epoch": 4.15713866901202, "grad_norm": 0.2591167377906633, "learning_rate": 0.00018668158386474145, "loss": 2.7897448539733887, "step": 7092, "token_acc": 0.32789453142394626 }, { "epoch": 4.157725007329229, "grad_norm": 0.2548083500599125, "learning_rate": 0.0001866767507082434, "loss": 2.7587451934814453, "step": 7093, "token_acc": 0.3326336643003703 }, { "epoch": 4.158311345646438, "grad_norm": 0.26800751481464313, "learning_rate": 0.00018667191673753407, "loss": 2.8164479732513428, "step": 7094, "token_acc": 0.3234671255519549 }, { "epoch": 4.158897683963647, "grad_norm": 0.2874489839603957, "learning_rate": 0.00018666708195265889, "loss": 2.812061071395874, "step": 7095, "token_acc": 0.32480085875228043 }, { "epoch": 4.159484022280856, "grad_norm": 0.31640798387122043, "learning_rate": 0.00018666224635366323, "loss": 2.825373649597168, "step": 7096, "token_acc": 0.32279993296681314 }, { "epoch": 4.160070360598065, "grad_norm": 0.259717065218652, "learning_rate": 0.00018665740994059258, "loss": 2.7896132469177246, "step": 7097, "token_acc": 0.32720886105724245 }, { "epoch": 4.1606566989152745, "grad_norm": 0.27521394865798493, "learning_rate": 0.0001866525727134923, "loss": 2.77710223197937, "step": 7098, "token_acc": 0.33024111527149763 }, { "epoch": 4.161243037232484, "grad_norm": 0.26344504062103585, "learning_rate": 0.00018664773467240786, "loss": 2.795780658721924, "step": 7099, "token_acc": 0.32529523306000235 }, { "epoch": 4.161829375549692, "grad_norm": 0.2903025543494628, "learning_rate": 0.00018664289581738476, "loss": 2.7618629932403564, "step": 7100, "token_acc": 0.33281112484740244 }, { "epoch": 4.162415713866901, "grad_norm": 0.27150845033650894, "learning_rate": 0.00018663805614846837, "loss": 2.7867631912231445, "step": 7101, "token_acc": 0.327508163900762 }, { "epoch": 4.16300205218411, "grad_norm": 0.2830386813054436, "learning_rate": 0.00018663321566570417, "loss": 2.7832565307617188, "step": 7102, "token_acc": 0.32921536659854417 }, { "epoch": 4.163588390501319, "grad_norm": 0.27133229843551504, "learning_rate": 0.00018662837436913768, "loss": 2.800172805786133, "step": 7103, "token_acc": 0.32652028632262403 }, { "epoch": 4.164174728818528, "grad_norm": 0.2942079592602994, "learning_rate": 0.00018662353225881435, "loss": 2.800168514251709, "step": 7104, "token_acc": 0.32632758936706574 }, { "epoch": 4.164761067135737, "grad_norm": 0.29979963131332366, "learning_rate": 0.00018661868933477963, "loss": 2.8138575553894043, "step": 7105, "token_acc": 0.32249482707655924 }, { "epoch": 4.1653474054529465, "grad_norm": 0.33770100558570276, "learning_rate": 0.0001866138455970791, "loss": 2.7790517807006836, "step": 7106, "token_acc": 0.3301621644275525 }, { "epoch": 4.165933743770156, "grad_norm": 0.34295619293043955, "learning_rate": 0.00018660900104575814, "loss": 2.7915194034576416, "step": 7107, "token_acc": 0.3272568144007191 }, { "epoch": 4.166520082087365, "grad_norm": 0.2890581748443234, "learning_rate": 0.00018660415568086234, "loss": 2.779521942138672, "step": 7108, "token_acc": 0.33012738000844394 }, { "epoch": 4.167106420404574, "grad_norm": 0.2713896993391161, "learning_rate": 0.0001865993095024372, "loss": 2.782832145690918, "step": 7109, "token_acc": 0.32854373302368883 }, { "epoch": 4.167692758721783, "grad_norm": 0.25793955423588083, "learning_rate": 0.00018659446251052824, "loss": 2.7800774574279785, "step": 7110, "token_acc": 0.32776205535056324 }, { "epoch": 4.168279097038991, "grad_norm": 0.26444740157657376, "learning_rate": 0.00018658961470518102, "loss": 2.791886806488037, "step": 7111, "token_acc": 0.3275825984502213 }, { "epoch": 4.1688654353562, "grad_norm": 0.2607246005357545, "learning_rate": 0.00018658476608644103, "loss": 2.7907450199127197, "step": 7112, "token_acc": 0.32789147049594 }, { "epoch": 4.169451773673409, "grad_norm": 0.2673980628771356, "learning_rate": 0.00018657991665435385, "loss": 2.7650692462921143, "step": 7113, "token_acc": 0.33089376977046525 }, { "epoch": 4.1700381119906185, "grad_norm": 0.26173272002760334, "learning_rate": 0.00018657506640896502, "loss": 2.8083853721618652, "step": 7114, "token_acc": 0.3251694921621615 }, { "epoch": 4.170624450307828, "grad_norm": 0.2623387739758055, "learning_rate": 0.0001865702153503201, "loss": 2.7661795616149902, "step": 7115, "token_acc": 0.33181971427367357 }, { "epoch": 4.171210788625037, "grad_norm": 0.25236774338551904, "learning_rate": 0.00018656536347846464, "loss": 2.7927117347717285, "step": 7116, "token_acc": 0.3270040818101279 }, { "epoch": 4.171797126942246, "grad_norm": 0.2731925456740826, "learning_rate": 0.00018656051079344425, "loss": 2.803333044052124, "step": 7117, "token_acc": 0.3250152650929547 }, { "epoch": 4.172383465259455, "grad_norm": 0.40323974685428443, "learning_rate": 0.0001865556572953045, "loss": 2.806036949157715, "step": 7118, "token_acc": 0.3261204906435175 }, { "epoch": 4.172969803576664, "grad_norm": 0.4240528933155089, "learning_rate": 0.000186550802984091, "loss": 2.8030807971954346, "step": 7119, "token_acc": 0.3260309835994634 }, { "epoch": 4.173556141893873, "grad_norm": 0.26345208945850634, "learning_rate": 0.00018654594785984932, "loss": 2.8113174438476562, "step": 7120, "token_acc": 0.32495721338167743 }, { "epoch": 4.174142480211081, "grad_norm": 0.3025642129724335, "learning_rate": 0.0001865410919226251, "loss": 2.801701068878174, "step": 7121, "token_acc": 0.32611378359309856 }, { "epoch": 4.1747288185282905, "grad_norm": 0.27291186222328934, "learning_rate": 0.0001865362351724639, "loss": 2.759350299835205, "step": 7122, "token_acc": 0.33252460708152326 }, { "epoch": 4.1753151568455, "grad_norm": 0.32262170926153777, "learning_rate": 0.0001865313776094114, "loss": 2.805788040161133, "step": 7123, "token_acc": 0.32526669558808924 }, { "epoch": 4.175901495162709, "grad_norm": 0.27277281305017403, "learning_rate": 0.00018652651923351324, "loss": 2.7759857177734375, "step": 7124, "token_acc": 0.33016439653249025 }, { "epoch": 4.176487833479918, "grad_norm": 0.32194328171796716, "learning_rate": 0.00018652166004481499, "loss": 2.8123388290405273, "step": 7125, "token_acc": 0.32509168434648644 }, { "epoch": 4.177074171797127, "grad_norm": 0.31170966719355153, "learning_rate": 0.00018651680004336236, "loss": 2.7830138206481934, "step": 7126, "token_acc": 0.32659967105522025 }, { "epoch": 4.177660510114336, "grad_norm": 0.30129830320334333, "learning_rate": 0.00018651193922920097, "loss": 2.765509605407715, "step": 7127, "token_acc": 0.3314838423175852 }, { "epoch": 4.178246848431545, "grad_norm": 0.31861687277751904, "learning_rate": 0.00018650707760237646, "loss": 2.8066015243530273, "step": 7128, "token_acc": 0.3234356789983395 }, { "epoch": 4.178833186748754, "grad_norm": 0.29318449613580966, "learning_rate": 0.00018650221516293455, "loss": 2.73773455619812, "step": 7129, "token_acc": 0.3358902243756368 }, { "epoch": 4.179419525065963, "grad_norm": 0.3370932323580013, "learning_rate": 0.00018649735191092088, "loss": 2.776411533355713, "step": 7130, "token_acc": 0.3299775805246265 }, { "epoch": 4.1800058633831725, "grad_norm": 0.2710712040606177, "learning_rate": 0.00018649248784638115, "loss": 2.7904844284057617, "step": 7131, "token_acc": 0.32678880251889036 }, { "epoch": 4.180592201700381, "grad_norm": 0.3019398458232423, "learning_rate": 0.00018648762296936104, "loss": 2.808136463165283, "step": 7132, "token_acc": 0.3249455882278679 }, { "epoch": 4.18117854001759, "grad_norm": 0.26223806080520057, "learning_rate": 0.00018648275727990628, "loss": 2.79612135887146, "step": 7133, "token_acc": 0.32764721988314366 }, { "epoch": 4.181764878334799, "grad_norm": 0.27790552792001005, "learning_rate": 0.00018647789077806253, "loss": 2.7412309646606445, "step": 7134, "token_acc": 0.33470452285961444 }, { "epoch": 4.182351216652008, "grad_norm": 0.26892586305851907, "learning_rate": 0.00018647302346387555, "loss": 2.799913167953491, "step": 7135, "token_acc": 0.3247402396024174 }, { "epoch": 4.182937554969217, "grad_norm": 0.30003085111530087, "learning_rate": 0.000186468155337391, "loss": 2.7498412132263184, "step": 7136, "token_acc": 0.33326342281879195 }, { "epoch": 4.183523893286426, "grad_norm": 0.2754543435671535, "learning_rate": 0.00018646328639865468, "loss": 2.8077759742736816, "step": 7137, "token_acc": 0.32511101394845715 }, { "epoch": 4.184110231603635, "grad_norm": 0.3213513602377404, "learning_rate": 0.00018645841664771226, "loss": 2.797866106033325, "step": 7138, "token_acc": 0.3261113794474525 }, { "epoch": 4.1846965699208445, "grad_norm": 0.29160090776598685, "learning_rate": 0.00018645354608460958, "loss": 2.765488624572754, "step": 7139, "token_acc": 0.3322875692088821 }, { "epoch": 4.185282908238054, "grad_norm": 0.2781717504488947, "learning_rate": 0.00018644867470939228, "loss": 2.801461696624756, "step": 7140, "token_acc": 0.3251454172838077 }, { "epoch": 4.185869246555263, "grad_norm": 0.25709256601189306, "learning_rate": 0.00018644380252210617, "loss": 2.7947957515716553, "step": 7141, "token_acc": 0.3273180992696534 }, { "epoch": 4.186455584872472, "grad_norm": 0.3013285944673012, "learning_rate": 0.00018643892952279704, "loss": 2.7660555839538574, "step": 7142, "token_acc": 0.3319426296132438 }, { "epoch": 4.18704192318968, "grad_norm": 0.25667518933292005, "learning_rate": 0.00018643405571151065, "loss": 2.800262451171875, "step": 7143, "token_acc": 0.3261171906452439 }, { "epoch": 4.187628261506889, "grad_norm": 0.2966746727434648, "learning_rate": 0.00018642918108829277, "loss": 2.758972644805908, "step": 7144, "token_acc": 0.3331086964752205 }, { "epoch": 4.188214599824098, "grad_norm": 0.25814810188244625, "learning_rate": 0.0001864243056531892, "loss": 2.8075592517852783, "step": 7145, "token_acc": 0.32473101791510134 }, { "epoch": 4.188800938141307, "grad_norm": 0.26175778519986925, "learning_rate": 0.00018641942940624576, "loss": 2.7754759788513184, "step": 7146, "token_acc": 0.3302482488805841 }, { "epoch": 4.1893872764585165, "grad_norm": 0.2971018037879146, "learning_rate": 0.00018641455234750818, "loss": 2.837045192718506, "step": 7147, "token_acc": 0.3199036372128917 }, { "epoch": 4.189973614775726, "grad_norm": 0.26381660353703046, "learning_rate": 0.00018640967447702237, "loss": 2.773775577545166, "step": 7148, "token_acc": 0.3313390138115599 }, { "epoch": 4.190559953092935, "grad_norm": 0.2689216620119575, "learning_rate": 0.00018640479579483407, "loss": 2.7813847064971924, "step": 7149, "token_acc": 0.3274797618251154 }, { "epoch": 4.191146291410144, "grad_norm": 0.27646580120974623, "learning_rate": 0.00018639991630098917, "loss": 2.797849655151367, "step": 7150, "token_acc": 0.32580738170133955 }, { "epoch": 4.191732629727353, "grad_norm": 0.30033542856581474, "learning_rate": 0.0001863950359955335, "loss": 2.8148417472839355, "step": 7151, "token_acc": 0.32331430004336037 }, { "epoch": 4.192318968044562, "grad_norm": 0.25430068792565647, "learning_rate": 0.00018639015487851283, "loss": 2.7596330642700195, "step": 7152, "token_acc": 0.33350099038651 }, { "epoch": 4.192905306361771, "grad_norm": 0.3061969566003337, "learning_rate": 0.00018638527294997313, "loss": 2.8368749618530273, "step": 7153, "token_acc": 0.32023378895051274 }, { "epoch": 4.193491644678979, "grad_norm": 0.29360521065985223, "learning_rate": 0.00018638039020996017, "loss": 2.8334736824035645, "step": 7154, "token_acc": 0.3222612657836815 }, { "epoch": 4.1940779829961885, "grad_norm": 0.26469594283487363, "learning_rate": 0.00018637550665851982, "loss": 2.7973012924194336, "step": 7155, "token_acc": 0.32807673110388647 }, { "epoch": 4.194664321313398, "grad_norm": 0.3395100447109936, "learning_rate": 0.000186370622295698, "loss": 2.798107862472534, "step": 7156, "token_acc": 0.32652112847911813 }, { "epoch": 4.195250659630607, "grad_norm": 0.2584639905947706, "learning_rate": 0.00018636573712154059, "loss": 2.7745542526245117, "step": 7157, "token_acc": 0.32972449402041887 }, { "epoch": 4.195836997947816, "grad_norm": 0.3287655019874536, "learning_rate": 0.00018636085113609343, "loss": 2.75933837890625, "step": 7158, "token_acc": 0.33145545844348034 }, { "epoch": 4.196423336265025, "grad_norm": 0.37963036731744526, "learning_rate": 0.00018635596433940244, "loss": 2.8011245727539062, "step": 7159, "token_acc": 0.3259422999765324 }, { "epoch": 4.197009674582234, "grad_norm": 0.2791200974416275, "learning_rate": 0.00018635107673151352, "loss": 2.7943596839904785, "step": 7160, "token_acc": 0.32793067331933323 }, { "epoch": 4.197596012899443, "grad_norm": 0.3190420515211332, "learning_rate": 0.00018634618831247262, "loss": 2.7719130516052246, "step": 7161, "token_acc": 0.3298731796526338 }, { "epoch": 4.198182351216652, "grad_norm": 0.29710887785827383, "learning_rate": 0.00018634129908232566, "loss": 2.7999250888824463, "step": 7162, "token_acc": 0.32671175540466924 }, { "epoch": 4.198768689533861, "grad_norm": 0.34573787991532473, "learning_rate": 0.00018633640904111852, "loss": 2.755390167236328, "step": 7163, "token_acc": 0.3320836998336177 }, { "epoch": 4.19935502785107, "grad_norm": 0.28669619273580593, "learning_rate": 0.00018633151818889712, "loss": 2.805746078491211, "step": 7164, "token_acc": 0.3246945907466223 }, { "epoch": 4.199941366168279, "grad_norm": 0.3486843824690745, "learning_rate": 0.00018632662652570749, "loss": 2.7802281379699707, "step": 7165, "token_acc": 0.3293920071374216 }, { "epoch": 4.200527704485488, "grad_norm": 0.33456941539566115, "learning_rate": 0.0001863217340515955, "loss": 2.775449275970459, "step": 7166, "token_acc": 0.3302888286248387 }, { "epoch": 4.201114042802697, "grad_norm": 0.3177513995503537, "learning_rate": 0.00018631684076660717, "loss": 2.775282859802246, "step": 7167, "token_acc": 0.32994647999052196 }, { "epoch": 4.201700381119906, "grad_norm": 0.3074760862911338, "learning_rate": 0.00018631194667078838, "loss": 2.8411147594451904, "step": 7168, "token_acc": 0.32009838545658564 }, { "epoch": 4.202286719437115, "grad_norm": 0.28596671519668726, "learning_rate": 0.0001863070517641852, "loss": 2.749114513397217, "step": 7169, "token_acc": 0.3348188834945138 }, { "epoch": 4.202873057754324, "grad_norm": 0.34823567201266376, "learning_rate": 0.00018630215604684356, "loss": 2.768881320953369, "step": 7170, "token_acc": 0.33090829604484906 }, { "epoch": 4.203459396071533, "grad_norm": 0.24434087076948205, "learning_rate": 0.00018629725951880945, "loss": 2.7958014011383057, "step": 7171, "token_acc": 0.32703188216453133 }, { "epoch": 4.2040457343887425, "grad_norm": 0.29725275208560054, "learning_rate": 0.00018629236218012886, "loss": 2.7710518836975098, "step": 7172, "token_acc": 0.3319634872141823 }, { "epoch": 4.204632072705952, "grad_norm": 0.25219867571094534, "learning_rate": 0.00018628746403084786, "loss": 2.810288906097412, "step": 7173, "token_acc": 0.3252672054744146 }, { "epoch": 4.205218411023161, "grad_norm": 0.26667411249406076, "learning_rate": 0.00018628256507101235, "loss": 2.8047566413879395, "step": 7174, "token_acc": 0.32448908936733883 }, { "epoch": 4.205804749340369, "grad_norm": 0.26546597053969334, "learning_rate": 0.00018627766530066847, "loss": 2.778273105621338, "step": 7175, "token_acc": 0.3297123247701258 }, { "epoch": 4.206391087657578, "grad_norm": 0.24790662637772332, "learning_rate": 0.00018627276471986214, "loss": 2.7851388454437256, "step": 7176, "token_acc": 0.3281740470489675 }, { "epoch": 4.206977425974787, "grad_norm": 0.2624436848159625, "learning_rate": 0.00018626786332863942, "loss": 2.7707605361938477, "step": 7177, "token_acc": 0.3303622452013953 }, { "epoch": 4.207563764291996, "grad_norm": 0.24880874356177782, "learning_rate": 0.0001862629611270464, "loss": 2.7740938663482666, "step": 7178, "token_acc": 0.3298171950793904 }, { "epoch": 4.208150102609205, "grad_norm": 0.25626723427079173, "learning_rate": 0.0001862580581151291, "loss": 2.816868782043457, "step": 7179, "token_acc": 0.32295718813266483 }, { "epoch": 4.2087364409264145, "grad_norm": 0.23242575522505876, "learning_rate": 0.0001862531542929336, "loss": 2.7931060791015625, "step": 7180, "token_acc": 0.32802569041658763 }, { "epoch": 4.209322779243624, "grad_norm": 0.27803035382476776, "learning_rate": 0.00018624824966050594, "loss": 2.7752556800842285, "step": 7181, "token_acc": 0.32841583807689056 }, { "epoch": 4.209909117560833, "grad_norm": 0.26622353642183966, "learning_rate": 0.00018624334421789217, "loss": 2.7808239459991455, "step": 7182, "token_acc": 0.33086606999444407 }, { "epoch": 4.210495455878042, "grad_norm": 0.267544186442729, "learning_rate": 0.00018623843796513841, "loss": 2.799476146697998, "step": 7183, "token_acc": 0.3260908253828238 }, { "epoch": 4.211081794195251, "grad_norm": 0.32001193414538076, "learning_rate": 0.00018623353090229072, "loss": 2.7878870964050293, "step": 7184, "token_acc": 0.3279342931240509 }, { "epoch": 4.21166813251246, "grad_norm": 0.2954581995548513, "learning_rate": 0.0001862286230293952, "loss": 2.8171629905700684, "step": 7185, "token_acc": 0.3227697935719885 }, { "epoch": 4.212254470829668, "grad_norm": 0.3089045434576776, "learning_rate": 0.00018622371434649798, "loss": 2.8179779052734375, "step": 7186, "token_acc": 0.3237001465956799 }, { "epoch": 4.212840809146877, "grad_norm": 0.413154502755599, "learning_rate": 0.00018621880485364517, "loss": 2.820302963256836, "step": 7187, "token_acc": 0.3229706804302725 }, { "epoch": 4.2134271474640865, "grad_norm": 0.3036522662385077, "learning_rate": 0.00018621389455088285, "loss": 2.872323513031006, "step": 7188, "token_acc": 0.3141758138170849 }, { "epoch": 4.214013485781296, "grad_norm": 0.28155944066202293, "learning_rate": 0.00018620898343825717, "loss": 2.766209125518799, "step": 7189, "token_acc": 0.3312721272607853 }, { "epoch": 4.214599824098505, "grad_norm": 0.3016276611269739, "learning_rate": 0.00018620407151581425, "loss": 2.8263587951660156, "step": 7190, "token_acc": 0.3210977354143723 }, { "epoch": 4.215186162415714, "grad_norm": 0.2517209729682834, "learning_rate": 0.00018619915878360024, "loss": 2.751180648803711, "step": 7191, "token_acc": 0.3344086383992822 }, { "epoch": 4.215772500732923, "grad_norm": 0.30602552379123477, "learning_rate": 0.00018619424524166127, "loss": 2.7748265266418457, "step": 7192, "token_acc": 0.32968820513343516 }, { "epoch": 4.216358839050132, "grad_norm": 0.26846420456726744, "learning_rate": 0.00018618933089004354, "loss": 2.7991480827331543, "step": 7193, "token_acc": 0.32640017723599435 }, { "epoch": 4.216945177367341, "grad_norm": 0.27349423413890933, "learning_rate": 0.00018618441572879318, "loss": 2.759122371673584, "step": 7194, "token_acc": 0.33251756962971424 }, { "epoch": 4.21753151568455, "grad_norm": 0.29711759770444435, "learning_rate": 0.00018617949975795637, "loss": 2.8319473266601562, "step": 7195, "token_acc": 0.32171724992257666 }, { "epoch": 4.218117854001759, "grad_norm": 0.24924275123868372, "learning_rate": 0.0001861745829775793, "loss": 2.786811113357544, "step": 7196, "token_acc": 0.3278883653013114 }, { "epoch": 4.218704192318968, "grad_norm": 0.3314997228043431, "learning_rate": 0.00018616966538770812, "loss": 2.8435819149017334, "step": 7197, "token_acc": 0.3184776167927803 }, { "epoch": 4.219290530636177, "grad_norm": 0.24426536008233282, "learning_rate": 0.00018616474698838906, "loss": 2.7798056602478027, "step": 7198, "token_acc": 0.3280384656061454 }, { "epoch": 4.219876868953386, "grad_norm": 0.28261368987984764, "learning_rate": 0.00018615982777966833, "loss": 2.8509159088134766, "step": 7199, "token_acc": 0.32023487619956525 }, { "epoch": 4.220463207270595, "grad_norm": 0.259177407666592, "learning_rate": 0.0001861549077615921, "loss": 2.8020074367523193, "step": 7200, "token_acc": 0.3258015560764421 }, { "epoch": 4.221049545587804, "grad_norm": 0.2784499628625681, "learning_rate": 0.0001861499869342066, "loss": 2.786154270172119, "step": 7201, "token_acc": 0.3271836673446037 }, { "epoch": 4.221635883905013, "grad_norm": 0.2770141097004256, "learning_rate": 0.00018614506529755808, "loss": 2.827462911605835, "step": 7202, "token_acc": 0.32255528976526904 }, { "epoch": 4.222222222222222, "grad_norm": 0.2665143154133402, "learning_rate": 0.00018614014285169274, "loss": 2.839615821838379, "step": 7203, "token_acc": 0.3204658398191819 }, { "epoch": 4.222808560539431, "grad_norm": 0.3090169036702747, "learning_rate": 0.00018613521959665686, "loss": 2.7768468856811523, "step": 7204, "token_acc": 0.329127732103469 }, { "epoch": 4.2233948988566405, "grad_norm": 0.2612189611700686, "learning_rate": 0.00018613029553249662, "loss": 2.798933506011963, "step": 7205, "token_acc": 0.3266280567650722 }, { "epoch": 4.22398123717385, "grad_norm": 0.2633362058115629, "learning_rate": 0.00018612537065925832, "loss": 2.8057100772857666, "step": 7206, "token_acc": 0.3245074237816565 }, { "epoch": 4.224567575491059, "grad_norm": 0.2582251005796516, "learning_rate": 0.00018612044497698824, "loss": 2.784407138824463, "step": 7207, "token_acc": 0.3286531515272696 }, { "epoch": 4.225153913808267, "grad_norm": 0.2570518390077076, "learning_rate": 0.0001861155184857326, "loss": 2.789052963256836, "step": 7208, "token_acc": 0.32912614715520716 }, { "epoch": 4.225740252125476, "grad_norm": 0.24467996896239622, "learning_rate": 0.00018611059118553774, "loss": 2.810938596725464, "step": 7209, "token_acc": 0.32572292612829024 }, { "epoch": 4.226326590442685, "grad_norm": 0.29150848679983293, "learning_rate": 0.0001861056630764499, "loss": 2.8374547958374023, "step": 7210, "token_acc": 0.32059990991868975 }, { "epoch": 4.226912928759894, "grad_norm": 0.26915418106421807, "learning_rate": 0.0001861007341585154, "loss": 2.7820582389831543, "step": 7211, "token_acc": 0.32779640234278185 }, { "epoch": 4.227499267077103, "grad_norm": 0.2565536197103172, "learning_rate": 0.0001860958044317805, "loss": 2.828176498413086, "step": 7212, "token_acc": 0.32188629471147856 }, { "epoch": 4.2280856053943126, "grad_norm": 0.2410132143674873, "learning_rate": 0.00018609087389629154, "loss": 2.8507637977600098, "step": 7213, "token_acc": 0.3179332989661924 }, { "epoch": 4.228671943711522, "grad_norm": 0.2706676227223853, "learning_rate": 0.00018608594255209484, "loss": 2.782197952270508, "step": 7214, "token_acc": 0.32843443872935457 }, { "epoch": 4.229258282028731, "grad_norm": 0.30932351221382404, "learning_rate": 0.00018608101039923667, "loss": 2.826585531234741, "step": 7215, "token_acc": 0.3223195312480446 }, { "epoch": 4.22984462034594, "grad_norm": 0.35800619903714426, "learning_rate": 0.00018607607743776345, "loss": 2.7740697860717773, "step": 7216, "token_acc": 0.32999056668539767 }, { "epoch": 4.230430958663149, "grad_norm": 0.2669117455777673, "learning_rate": 0.00018607114366772144, "loss": 2.82993221282959, "step": 7217, "token_acc": 0.3216169075052518 }, { "epoch": 4.231017296980358, "grad_norm": 0.27008272214236756, "learning_rate": 0.00018606620908915704, "loss": 2.777475357055664, "step": 7218, "token_acc": 0.3301514410825541 }, { "epoch": 4.231603635297566, "grad_norm": 0.327636506943128, "learning_rate": 0.00018606127370211656, "loss": 2.7887344360351562, "step": 7219, "token_acc": 0.3277534730023452 }, { "epoch": 4.2321899736147754, "grad_norm": 0.25232826421323556, "learning_rate": 0.00018605633750664642, "loss": 2.766155242919922, "step": 7220, "token_acc": 0.3300219960100261 }, { "epoch": 4.232776311931985, "grad_norm": 0.29311034226063165, "learning_rate": 0.00018605140050279292, "loss": 2.773897647857666, "step": 7221, "token_acc": 0.33007722279781926 }, { "epoch": 4.233362650249194, "grad_norm": 0.2708945256233776, "learning_rate": 0.00018604646269060248, "loss": 2.8470466136932373, "step": 7222, "token_acc": 0.319114115354783 }, { "epoch": 4.233948988566403, "grad_norm": 0.2642143483572943, "learning_rate": 0.00018604152407012146, "loss": 2.7781643867492676, "step": 7223, "token_acc": 0.32962308847479854 }, { "epoch": 4.234535326883612, "grad_norm": 0.33031745855416683, "learning_rate": 0.0001860365846413963, "loss": 2.8202686309814453, "step": 7224, "token_acc": 0.3222385777740985 }, { "epoch": 4.235121665200821, "grad_norm": 0.2601389423295867, "learning_rate": 0.00018603164440447333, "loss": 2.8353850841522217, "step": 7225, "token_acc": 0.3215255722264812 }, { "epoch": 4.23570800351803, "grad_norm": 0.3191855904121981, "learning_rate": 0.000186026703359399, "loss": 2.8178486824035645, "step": 7226, "token_acc": 0.32175634001335296 }, { "epoch": 4.236294341835239, "grad_norm": 0.2818438012315105, "learning_rate": 0.00018602176150621968, "loss": 2.815585136413574, "step": 7227, "token_acc": 0.3240985656882833 }, { "epoch": 4.236880680152448, "grad_norm": 0.29816872044435727, "learning_rate": 0.00018601681884498184, "loss": 2.7867541313171387, "step": 7228, "token_acc": 0.3271028289857578 }, { "epoch": 4.237467018469657, "grad_norm": 0.333861646056676, "learning_rate": 0.00018601187537573192, "loss": 2.79691219329834, "step": 7229, "token_acc": 0.3261013303695242 }, { "epoch": 4.238053356786866, "grad_norm": 0.26166243408199225, "learning_rate": 0.00018600693109851633, "loss": 2.798318862915039, "step": 7230, "token_acc": 0.32501832442030004 }, { "epoch": 4.238639695104075, "grad_norm": 0.2868860559254532, "learning_rate": 0.00018600198601338152, "loss": 2.8321170806884766, "step": 7231, "token_acc": 0.3216889179026253 }, { "epoch": 4.239226033421284, "grad_norm": 0.28229931034310796, "learning_rate": 0.00018599704012037393, "loss": 2.828172206878662, "step": 7232, "token_acc": 0.3211865380197565 }, { "epoch": 4.239812371738493, "grad_norm": 0.2908369324851455, "learning_rate": 0.00018599209341954, "loss": 2.8463354110717773, "step": 7233, "token_acc": 0.31912504623014765 }, { "epoch": 4.240398710055702, "grad_norm": 0.26941372844602013, "learning_rate": 0.00018598714591092628, "loss": 2.7919979095458984, "step": 7234, "token_acc": 0.32792464441650354 }, { "epoch": 4.240985048372911, "grad_norm": 0.29940514025789516, "learning_rate": 0.0001859821975945792, "loss": 2.8361620903015137, "step": 7235, "token_acc": 0.3199322057459373 }, { "epoch": 4.24157138669012, "grad_norm": 0.2615773277327321, "learning_rate": 0.00018597724847054518, "loss": 2.7836971282958984, "step": 7236, "token_acc": 0.3276175636605546 }, { "epoch": 4.2421577250073295, "grad_norm": 0.33595114121691927, "learning_rate": 0.0001859722985388708, "loss": 2.817362070083618, "step": 7237, "token_acc": 0.32363001103346817 }, { "epoch": 4.242744063324539, "grad_norm": 0.27727074988570233, "learning_rate": 0.00018596734779960253, "loss": 2.8033151626586914, "step": 7238, "token_acc": 0.32579815387221067 }, { "epoch": 4.243330401641748, "grad_norm": 0.3036697913217882, "learning_rate": 0.00018596239625278687, "loss": 2.775413751602173, "step": 7239, "token_acc": 0.33128635202712853 }, { "epoch": 4.243916739958956, "grad_norm": 0.31217994949411854, "learning_rate": 0.00018595744389847033, "loss": 2.8296804428100586, "step": 7240, "token_acc": 0.3213631723087394 }, { "epoch": 4.244503078276165, "grad_norm": 0.29791173651230707, "learning_rate": 0.0001859524907366994, "loss": 2.777073383331299, "step": 7241, "token_acc": 0.3279179201176034 }, { "epoch": 4.245089416593374, "grad_norm": 0.29269367134351376, "learning_rate": 0.00018594753676752068, "loss": 2.8041634559631348, "step": 7242, "token_acc": 0.3250288615142457 }, { "epoch": 4.245675754910583, "grad_norm": 0.2683766588735151, "learning_rate": 0.00018594258199098067, "loss": 2.77238130569458, "step": 7243, "token_acc": 0.3301181165373822 }, { "epoch": 4.246262093227792, "grad_norm": 0.30190671688672743, "learning_rate": 0.00018593762640712588, "loss": 2.7948150634765625, "step": 7244, "token_acc": 0.32656787605332016 }, { "epoch": 4.2468484315450015, "grad_norm": 0.2611529307863619, "learning_rate": 0.00018593267001600292, "loss": 2.820159912109375, "step": 7245, "token_acc": 0.32378601958515046 }, { "epoch": 4.247434769862211, "grad_norm": 0.2729489508028632, "learning_rate": 0.00018592771281765832, "loss": 2.7711167335510254, "step": 7246, "token_acc": 0.33006193671851763 }, { "epoch": 4.24802110817942, "grad_norm": 0.2500548574256754, "learning_rate": 0.00018592275481213864, "loss": 2.7811269760131836, "step": 7247, "token_acc": 0.32828630240496104 }, { "epoch": 4.248607446496629, "grad_norm": 0.293296876868701, "learning_rate": 0.00018591779599949043, "loss": 2.8041152954101562, "step": 7248, "token_acc": 0.3249691702700843 }, { "epoch": 4.249193784813838, "grad_norm": 0.2591749448981494, "learning_rate": 0.00018591283637976036, "loss": 2.820497512817383, "step": 7249, "token_acc": 0.32207114879571763 }, { "epoch": 4.249780123131047, "grad_norm": 0.2573841579489027, "learning_rate": 0.00018590787595299492, "loss": 2.799330711364746, "step": 7250, "token_acc": 0.3264108412008062 }, { "epoch": 4.250366461448255, "grad_norm": 0.2934884045768928, "learning_rate": 0.00018590291471924078, "loss": 2.8133256435394287, "step": 7251, "token_acc": 0.3234112176359148 }, { "epoch": 4.250952799765464, "grad_norm": 0.2778698682354681, "learning_rate": 0.00018589795267854448, "loss": 2.808682441711426, "step": 7252, "token_acc": 0.32576857317978 }, { "epoch": 4.2515391380826735, "grad_norm": 0.2661434113633592, "learning_rate": 0.00018589298983095266, "loss": 2.769360065460205, "step": 7253, "token_acc": 0.330891473169317 }, { "epoch": 4.252125476399883, "grad_norm": 0.29665914738035704, "learning_rate": 0.000185888026176512, "loss": 2.795877695083618, "step": 7254, "token_acc": 0.3248432694349834 }, { "epoch": 4.252711814717092, "grad_norm": 0.2770992638822399, "learning_rate": 0.000185883061715269, "loss": 2.8054394721984863, "step": 7255, "token_acc": 0.32559387114931804 }, { "epoch": 4.253298153034301, "grad_norm": 0.24699655417277075, "learning_rate": 0.0001858780964472704, "loss": 2.833385944366455, "step": 7256, "token_acc": 0.32014055902240884 }, { "epoch": 4.25388449135151, "grad_norm": 0.26090048906427205, "learning_rate": 0.0001858731303725628, "loss": 2.7807912826538086, "step": 7257, "token_acc": 0.32841326471547794 }, { "epoch": 4.254470829668719, "grad_norm": 0.23758348226021472, "learning_rate": 0.00018586816349119286, "loss": 2.7943272590637207, "step": 7258, "token_acc": 0.3254815892445069 }, { "epoch": 4.255057167985928, "grad_norm": 0.24529113061316865, "learning_rate": 0.00018586319580320723, "loss": 2.762887954711914, "step": 7259, "token_acc": 0.3325062349839744 }, { "epoch": 4.255643506303137, "grad_norm": 0.25691253873043135, "learning_rate": 0.00018585822730865257, "loss": 2.8289601802825928, "step": 7260, "token_acc": 0.3222533395859149 }, { "epoch": 4.256229844620346, "grad_norm": 0.23877210021621345, "learning_rate": 0.00018585325800757557, "loss": 2.7962939739227295, "step": 7261, "token_acc": 0.32584877231849896 }, { "epoch": 4.256816182937555, "grad_norm": 0.25923808291081674, "learning_rate": 0.0001858482879000229, "loss": 2.794616937637329, "step": 7262, "token_acc": 0.3256992623577806 }, { "epoch": 4.257402521254764, "grad_norm": 0.2516689961425692, "learning_rate": 0.00018584331698604122, "loss": 2.761894702911377, "step": 7263, "token_acc": 0.33134821124790503 }, { "epoch": 4.257988859571973, "grad_norm": 0.25685691403779953, "learning_rate": 0.0001858383452656773, "loss": 2.8225340843200684, "step": 7264, "token_acc": 0.3221636380998098 }, { "epoch": 4.258575197889182, "grad_norm": 0.2614171815863196, "learning_rate": 0.00018583337273897775, "loss": 2.8659238815307617, "step": 7265, "token_acc": 0.31620199715887537 }, { "epoch": 4.259161536206391, "grad_norm": 0.28101959485441763, "learning_rate": 0.00018582839940598934, "loss": 2.8032522201538086, "step": 7266, "token_acc": 0.32640431307781387 }, { "epoch": 4.2597478745236, "grad_norm": 0.3260857270166502, "learning_rate": 0.00018582342526675876, "loss": 2.799434185028076, "step": 7267, "token_acc": 0.32405240094241883 }, { "epoch": 4.260334212840809, "grad_norm": 0.2669054420268166, "learning_rate": 0.0001858184503213328, "loss": 2.7674007415771484, "step": 7268, "token_acc": 0.331480740216153 }, { "epoch": 4.260920551158018, "grad_norm": 0.2665435458390013, "learning_rate": 0.00018581347456975812, "loss": 2.83184552192688, "step": 7269, "token_acc": 0.32219774482356295 }, { "epoch": 4.2615068894752275, "grad_norm": 0.31123896032413284, "learning_rate": 0.00018580849801208148, "loss": 2.767821788787842, "step": 7270, "token_acc": 0.3315526904787823 }, { "epoch": 4.262093227792437, "grad_norm": 0.3869402880382641, "learning_rate": 0.00018580352064834958, "loss": 2.814661741256714, "step": 7271, "token_acc": 0.32414627376844 }, { "epoch": 4.262679566109645, "grad_norm": 0.3118106787483905, "learning_rate": 0.00018579854247860927, "loss": 2.7867748737335205, "step": 7272, "token_acc": 0.32802088396230855 }, { "epoch": 4.263265904426854, "grad_norm": 0.28627442903720196, "learning_rate": 0.00018579356350290727, "loss": 2.8296127319335938, "step": 7273, "token_acc": 0.32161217612756515 }, { "epoch": 4.263852242744063, "grad_norm": 0.31924324436555945, "learning_rate": 0.00018578858372129034, "loss": 2.823333501815796, "step": 7274, "token_acc": 0.3227546079545739 }, { "epoch": 4.264438581061272, "grad_norm": 0.26054060664355555, "learning_rate": 0.00018578360313380526, "loss": 2.8126187324523926, "step": 7275, "token_acc": 0.32375245615742554 }, { "epoch": 4.265024919378481, "grad_norm": 0.307485784417134, "learning_rate": 0.00018577862174049882, "loss": 2.801429271697998, "step": 7276, "token_acc": 0.32372586439155815 }, { "epoch": 4.26561125769569, "grad_norm": 0.2671535147129377, "learning_rate": 0.00018577363954141784, "loss": 2.817614793777466, "step": 7277, "token_acc": 0.32333456405928584 }, { "epoch": 4.2661975960128995, "grad_norm": 0.29732471457292126, "learning_rate": 0.0001857686565366091, "loss": 2.8188486099243164, "step": 7278, "token_acc": 0.32449842180632565 }, { "epoch": 4.266783934330109, "grad_norm": 0.2661148400330736, "learning_rate": 0.0001857636727261194, "loss": 2.807769298553467, "step": 7279, "token_acc": 0.3259313607281571 }, { "epoch": 4.267370272647318, "grad_norm": 0.2825555667667071, "learning_rate": 0.00018575868810999553, "loss": 2.7600135803222656, "step": 7280, "token_acc": 0.33153001027141266 }, { "epoch": 4.267956610964527, "grad_norm": 0.28661788779829256, "learning_rate": 0.00018575370268828439, "loss": 2.8024230003356934, "step": 7281, "token_acc": 0.3269853735152646 }, { "epoch": 4.268542949281736, "grad_norm": 0.27240053835138306, "learning_rate": 0.0001857487164610327, "loss": 2.8084990978240967, "step": 7282, "token_acc": 0.32539578767893473 }, { "epoch": 4.269129287598945, "grad_norm": 0.2994600424082782, "learning_rate": 0.00018574372942828745, "loss": 2.8383166790008545, "step": 7283, "token_acc": 0.31961565124024666 }, { "epoch": 4.269715625916153, "grad_norm": 0.2741971250446781, "learning_rate": 0.00018573874159009537, "loss": 2.8139712810516357, "step": 7284, "token_acc": 0.3227656013517301 }, { "epoch": 4.270301964233362, "grad_norm": 0.27702364708963007, "learning_rate": 0.00018573375294650334, "loss": 2.7799324989318848, "step": 7285, "token_acc": 0.32980253004260707 }, { "epoch": 4.2708883025505715, "grad_norm": 0.2510879014431597, "learning_rate": 0.00018572876349755822, "loss": 2.778569221496582, "step": 7286, "token_acc": 0.33082968716377537 }, { "epoch": 4.271474640867781, "grad_norm": 0.291916278558268, "learning_rate": 0.00018572377324330692, "loss": 2.85839581489563, "step": 7287, "token_acc": 0.31741911270270695 }, { "epoch": 4.27206097918499, "grad_norm": 0.2683862718991664, "learning_rate": 0.00018571878218379628, "loss": 2.7897744178771973, "step": 7288, "token_acc": 0.32731188455824856 }, { "epoch": 4.272647317502199, "grad_norm": 0.2671692459182062, "learning_rate": 0.00018571379031907315, "loss": 2.786526918411255, "step": 7289, "token_acc": 0.3282105817429057 }, { "epoch": 4.273233655819408, "grad_norm": 0.2703502658305935, "learning_rate": 0.00018570879764918453, "loss": 2.7870378494262695, "step": 7290, "token_acc": 0.3289858008529233 }, { "epoch": 4.273819994136617, "grad_norm": 0.2728508612477497, "learning_rate": 0.00018570380417417718, "loss": 2.838200807571411, "step": 7291, "token_acc": 0.32023647281787015 }, { "epoch": 4.274406332453826, "grad_norm": 0.24749302082108923, "learning_rate": 0.0001856988098940981, "loss": 2.8244848251342773, "step": 7292, "token_acc": 0.3225096298023175 }, { "epoch": 4.274992670771035, "grad_norm": 0.2653348020573965, "learning_rate": 0.0001856938148089942, "loss": 2.789384365081787, "step": 7293, "token_acc": 0.32779352795443517 }, { "epoch": 4.2755790090882435, "grad_norm": 0.24862873926523077, "learning_rate": 0.00018568881891891238, "loss": 2.8305468559265137, "step": 7294, "token_acc": 0.3214331328963569 }, { "epoch": 4.276165347405453, "grad_norm": 0.2419331682356139, "learning_rate": 0.00018568382222389955, "loss": 2.819166660308838, "step": 7295, "token_acc": 0.3227081124414906 }, { "epoch": 4.276751685722662, "grad_norm": 0.24805843511143963, "learning_rate": 0.00018567882472400268, "loss": 2.806670904159546, "step": 7296, "token_acc": 0.325897966762131 }, { "epoch": 4.277338024039871, "grad_norm": 0.2547008504523437, "learning_rate": 0.00018567382641926868, "loss": 2.810605049133301, "step": 7297, "token_acc": 0.32482637478677606 }, { "epoch": 4.27792436235708, "grad_norm": 0.26407989280879574, "learning_rate": 0.00018566882730974458, "loss": 2.822115182876587, "step": 7298, "token_acc": 0.322613020437868 }, { "epoch": 4.278510700674289, "grad_norm": 0.2751150090347371, "learning_rate": 0.00018566382739547725, "loss": 2.8266501426696777, "step": 7299, "token_acc": 0.3211986427582 }, { "epoch": 4.279097038991498, "grad_norm": 0.2652782642016134, "learning_rate": 0.00018565882667651373, "loss": 2.8442234992980957, "step": 7300, "token_acc": 0.31990304652540813 }, { "epoch": 4.279683377308707, "grad_norm": 0.24070980062212655, "learning_rate": 0.00018565382515290093, "loss": 2.816871166229248, "step": 7301, "token_acc": 0.3222111779931683 }, { "epoch": 4.280269715625916, "grad_norm": 0.28090543592421724, "learning_rate": 0.0001856488228246859, "loss": 2.83585786819458, "step": 7302, "token_acc": 0.3214441835567457 }, { "epoch": 4.2808560539431255, "grad_norm": 0.2524501657756281, "learning_rate": 0.00018564381969191556, "loss": 2.823448657989502, "step": 7303, "token_acc": 0.3221172103571295 }, { "epoch": 4.281442392260335, "grad_norm": 0.2489536568136416, "learning_rate": 0.00018563881575463692, "loss": 2.7881476879119873, "step": 7304, "token_acc": 0.32695628115729664 }, { "epoch": 4.282028730577543, "grad_norm": 0.23986620733242398, "learning_rate": 0.00018563381101289705, "loss": 2.8279004096984863, "step": 7305, "token_acc": 0.32289972592239596 }, { "epoch": 4.282615068894752, "grad_norm": 0.2755080600773978, "learning_rate": 0.0001856288054667429, "loss": 2.8109140396118164, "step": 7306, "token_acc": 0.3242606053809373 }, { "epoch": 4.283201407211961, "grad_norm": 0.3120137641574296, "learning_rate": 0.0001856237991162215, "loss": 2.7446224689483643, "step": 7307, "token_acc": 0.33326992629565944 }, { "epoch": 4.28378774552917, "grad_norm": 0.3396846419683235, "learning_rate": 0.0001856187919613799, "loss": 2.7877445220947266, "step": 7308, "token_acc": 0.32822685874362345 }, { "epoch": 4.284374083846379, "grad_norm": 0.2963000468260124, "learning_rate": 0.0001856137840022651, "loss": 2.8122153282165527, "step": 7309, "token_acc": 0.32449347732274053 }, { "epoch": 4.284960422163588, "grad_norm": 0.2490675519726043, "learning_rate": 0.00018560877523892422, "loss": 2.823389768600464, "step": 7310, "token_acc": 0.3218357413926514 }, { "epoch": 4.2855467604807975, "grad_norm": 0.3128231230966118, "learning_rate": 0.00018560376567140421, "loss": 2.824934720993042, "step": 7311, "token_acc": 0.32309449386871847 }, { "epoch": 4.286133098798007, "grad_norm": 0.3339320736498481, "learning_rate": 0.0001855987552997522, "loss": 2.7988572120666504, "step": 7312, "token_acc": 0.3256756302477548 }, { "epoch": 4.286719437115216, "grad_norm": 0.25450277012409295, "learning_rate": 0.0001855937441240152, "loss": 2.8208513259887695, "step": 7313, "token_acc": 0.32296150437076393 }, { "epoch": 4.287305775432425, "grad_norm": 0.2991275782456912, "learning_rate": 0.00018558873214424032, "loss": 2.7601354122161865, "step": 7314, "token_acc": 0.3321531574757067 }, { "epoch": 4.287892113749633, "grad_norm": 0.36602093579034706, "learning_rate": 0.00018558371936047463, "loss": 2.794074773788452, "step": 7315, "token_acc": 0.329046180405697 }, { "epoch": 4.288478452066842, "grad_norm": 0.26590871957334616, "learning_rate": 0.00018557870577276523, "loss": 2.813007354736328, "step": 7316, "token_acc": 0.32435362611749113 }, { "epoch": 4.289064790384051, "grad_norm": 0.3401038590177105, "learning_rate": 0.0001855736913811592, "loss": 2.833259344100952, "step": 7317, "token_acc": 0.3220998791231413 }, { "epoch": 4.28965112870126, "grad_norm": 0.2672034063573335, "learning_rate": 0.00018556867618570367, "loss": 2.8263285160064697, "step": 7318, "token_acc": 0.32193144911027416 }, { "epoch": 4.2902374670184695, "grad_norm": 0.2851360671836541, "learning_rate": 0.00018556366018644574, "loss": 2.79093599319458, "step": 7319, "token_acc": 0.328227769629708 }, { "epoch": 4.290823805335679, "grad_norm": 0.2725928167778592, "learning_rate": 0.0001855586433834325, "loss": 2.8039135932922363, "step": 7320, "token_acc": 0.3249584160890039 }, { "epoch": 4.291410143652888, "grad_norm": 0.2757176245922568, "learning_rate": 0.0001855536257767111, "loss": 2.805896282196045, "step": 7321, "token_acc": 0.3260785716755733 }, { "epoch": 4.291996481970097, "grad_norm": 0.2767721173300752, "learning_rate": 0.00018554860736632865, "loss": 2.8147716522216797, "step": 7322, "token_acc": 0.3227741882458368 }, { "epoch": 4.292582820287306, "grad_norm": 0.2393160937746302, "learning_rate": 0.00018554358815233235, "loss": 2.8246254920959473, "step": 7323, "token_acc": 0.32186129563261073 }, { "epoch": 4.293169158604515, "grad_norm": 0.2555788052803914, "learning_rate": 0.0001855385681347693, "loss": 2.807732343673706, "step": 7324, "token_acc": 0.3241206852355838 }, { "epoch": 4.293755496921724, "grad_norm": 0.2471076321876937, "learning_rate": 0.00018553354731368665, "loss": 2.823533296585083, "step": 7325, "token_acc": 0.32330630743447575 }, { "epoch": 4.294341835238933, "grad_norm": 0.2761763041860472, "learning_rate": 0.00018552852568913162, "loss": 2.778254270553589, "step": 7326, "token_acc": 0.3290900482619586 }, { "epoch": 4.2949281735561415, "grad_norm": 0.23672794424754837, "learning_rate": 0.0001855235032611513, "loss": 2.802098274230957, "step": 7327, "token_acc": 0.32537915115655325 }, { "epoch": 4.295514511873351, "grad_norm": 0.27248740962549783, "learning_rate": 0.00018551848002979293, "loss": 2.783581018447876, "step": 7328, "token_acc": 0.32810237321536095 }, { "epoch": 4.29610085019056, "grad_norm": 0.2407725208862905, "learning_rate": 0.00018551345599510368, "loss": 2.8062942028045654, "step": 7329, "token_acc": 0.3237197628407883 }, { "epoch": 4.296687188507769, "grad_norm": 0.2637366434839126, "learning_rate": 0.00018550843115713072, "loss": 2.811466932296753, "step": 7330, "token_acc": 0.3243766642459453 }, { "epoch": 4.297273526824978, "grad_norm": 0.24973256606261507, "learning_rate": 0.0001855034055159213, "loss": 2.8131279945373535, "step": 7331, "token_acc": 0.32352479557190167 }, { "epoch": 4.297859865142187, "grad_norm": 0.28059564849282453, "learning_rate": 0.0001854983790715226, "loss": 2.8321104049682617, "step": 7332, "token_acc": 0.32301145330145026 }, { "epoch": 4.298446203459396, "grad_norm": 0.29650161031004485, "learning_rate": 0.00018549335182398182, "loss": 2.832314968109131, "step": 7333, "token_acc": 0.32127787667571456 }, { "epoch": 4.299032541776605, "grad_norm": 0.27229506990058105, "learning_rate": 0.00018548832377334622, "loss": 2.8226585388183594, "step": 7334, "token_acc": 0.32414593592460966 }, { "epoch": 4.299618880093814, "grad_norm": 0.26223986554659007, "learning_rate": 0.000185483294919663, "loss": 2.7886390686035156, "step": 7335, "token_acc": 0.32820848410193243 }, { "epoch": 4.3002052184110235, "grad_norm": 0.25132379075741357, "learning_rate": 0.00018547826526297946, "loss": 2.8314831256866455, "step": 7336, "token_acc": 0.32085945863281845 }, { "epoch": 4.300791556728232, "grad_norm": 0.24211622271724428, "learning_rate": 0.00018547323480334272, "loss": 2.808999538421631, "step": 7337, "token_acc": 0.3240410300245753 }, { "epoch": 4.301377895045441, "grad_norm": 0.2695695518499097, "learning_rate": 0.0001854682035408002, "loss": 2.78363299369812, "step": 7338, "token_acc": 0.3288140199066476 }, { "epoch": 4.30196423336265, "grad_norm": 0.288051327469357, "learning_rate": 0.00018546317147539903, "loss": 2.8311729431152344, "step": 7339, "token_acc": 0.32169637562903064 }, { "epoch": 4.302550571679859, "grad_norm": 0.27628220053742264, "learning_rate": 0.00018545813860718652, "loss": 2.8121166229248047, "step": 7340, "token_acc": 0.3246737337062339 }, { "epoch": 4.303136909997068, "grad_norm": 0.2704143247505957, "learning_rate": 0.00018545310493620998, "loss": 2.8320748805999756, "step": 7341, "token_acc": 0.3223838696559052 }, { "epoch": 4.303723248314277, "grad_norm": 0.3011045280377905, "learning_rate": 0.00018544807046251663, "loss": 2.7819314002990723, "step": 7342, "token_acc": 0.32712941586674343 }, { "epoch": 4.304309586631486, "grad_norm": 0.2991484339880676, "learning_rate": 0.00018544303518615386, "loss": 2.8504786491394043, "step": 7343, "token_acc": 0.31816578108959137 }, { "epoch": 4.3048959249486956, "grad_norm": 0.25622455690861545, "learning_rate": 0.00018543799910716887, "loss": 2.8148651123046875, "step": 7344, "token_acc": 0.32281214293478117 }, { "epoch": 4.305482263265905, "grad_norm": 0.29244727707376394, "learning_rate": 0.00018543296222560903, "loss": 2.822002410888672, "step": 7345, "token_acc": 0.32275306744387655 }, { "epoch": 4.306068601583114, "grad_norm": 0.24415635864595084, "learning_rate": 0.0001854279245415216, "loss": 2.7905263900756836, "step": 7346, "token_acc": 0.3285865667200852 }, { "epoch": 4.306654939900323, "grad_norm": 0.27899228508673524, "learning_rate": 0.00018542288605495396, "loss": 2.8284225463867188, "step": 7347, "token_acc": 0.32066951956214335 }, { "epoch": 4.307241278217531, "grad_norm": 0.2555604196620964, "learning_rate": 0.0001854178467659534, "loss": 2.7665023803710938, "step": 7348, "token_acc": 0.3302447858252647 }, { "epoch": 4.30782761653474, "grad_norm": 0.2632727410892724, "learning_rate": 0.0001854128066745673, "loss": 2.808840036392212, "step": 7349, "token_acc": 0.3248949502796346 }, { "epoch": 4.308413954851949, "grad_norm": 0.2844855579772716, "learning_rate": 0.00018540776578084293, "loss": 2.8241991996765137, "step": 7350, "token_acc": 0.32150459295979505 }, { "epoch": 4.3090002931691584, "grad_norm": 0.2717218439839742, "learning_rate": 0.00018540272408482773, "loss": 2.851439952850342, "step": 7351, "token_acc": 0.317790741144165 }, { "epoch": 4.309586631486368, "grad_norm": 0.2569263156480144, "learning_rate": 0.00018539768158656902, "loss": 2.8031463623046875, "step": 7352, "token_acc": 0.32549196387647894 }, { "epoch": 4.310172969803577, "grad_norm": 0.2913178961816923, "learning_rate": 0.00018539263828611414, "loss": 2.7898645401000977, "step": 7353, "token_acc": 0.3290096287189059 }, { "epoch": 4.310759308120786, "grad_norm": 0.25963484039673174, "learning_rate": 0.0001853875941835105, "loss": 2.787909984588623, "step": 7354, "token_acc": 0.32731684395798105 }, { "epoch": 4.311345646437995, "grad_norm": 0.2589206946766923, "learning_rate": 0.00018538254927880549, "loss": 2.8027899265289307, "step": 7355, "token_acc": 0.3247565641102924 }, { "epoch": 4.311931984755204, "grad_norm": 0.27180997580918637, "learning_rate": 0.00018537750357204647, "loss": 2.7803854942321777, "step": 7356, "token_acc": 0.3289767237148971 }, { "epoch": 4.312518323072413, "grad_norm": 0.2855486724334418, "learning_rate": 0.00018537245706328083, "loss": 2.786447048187256, "step": 7357, "token_acc": 0.32747780023024964 }, { "epoch": 4.313104661389621, "grad_norm": 0.2668825983689697, "learning_rate": 0.00018536740975255603, "loss": 2.8221518993377686, "step": 7358, "token_acc": 0.3229034836926329 }, { "epoch": 4.3136909997068305, "grad_norm": 0.2541301667346999, "learning_rate": 0.00018536236163991943, "loss": 2.838918685913086, "step": 7359, "token_acc": 0.3189109361583265 }, { "epoch": 4.31427733802404, "grad_norm": 0.24058252653276763, "learning_rate": 0.00018535731272541849, "loss": 2.820225715637207, "step": 7360, "token_acc": 0.3220257705637019 }, { "epoch": 4.314863676341249, "grad_norm": 0.2537094571498051, "learning_rate": 0.0001853522630091006, "loss": 2.837794542312622, "step": 7361, "token_acc": 0.32047502715825227 }, { "epoch": 4.315450014658458, "grad_norm": 0.2321610535595397, "learning_rate": 0.00018534721249101321, "loss": 2.796674966812134, "step": 7362, "token_acc": 0.32710979974319043 }, { "epoch": 4.316036352975667, "grad_norm": 0.2729601426540344, "learning_rate": 0.00018534216117120376, "loss": 2.8310489654541016, "step": 7363, "token_acc": 0.32185640290941153 }, { "epoch": 4.316622691292876, "grad_norm": 0.2928943374150621, "learning_rate": 0.00018533710904971974, "loss": 2.782020092010498, "step": 7364, "token_acc": 0.32851574919691706 }, { "epoch": 4.317209029610085, "grad_norm": 0.33186848199360436, "learning_rate": 0.00018533205612660854, "loss": 2.792886257171631, "step": 7365, "token_acc": 0.3267970771687235 }, { "epoch": 4.317795367927294, "grad_norm": 0.29396320143969734, "learning_rate": 0.00018532700240191766, "loss": 2.8050012588500977, "step": 7366, "token_acc": 0.32734517899137705 }, { "epoch": 4.318381706244503, "grad_norm": 0.242382467888204, "learning_rate": 0.00018532194787569458, "loss": 2.8372154235839844, "step": 7367, "token_acc": 0.32093334954809966 }, { "epoch": 4.3189680445617125, "grad_norm": 0.3018708021737018, "learning_rate": 0.00018531689254798679, "loss": 2.857952117919922, "step": 7368, "token_acc": 0.3182269720461962 }, { "epoch": 4.319554382878922, "grad_norm": 0.28574251410242824, "learning_rate": 0.00018531183641884175, "loss": 2.8318610191345215, "step": 7369, "token_acc": 0.3224281947024513 }, { "epoch": 4.32014072119613, "grad_norm": 0.24408509010222723, "learning_rate": 0.00018530677948830695, "loss": 2.8215036392211914, "step": 7370, "token_acc": 0.32163402299545685 }, { "epoch": 4.320727059513339, "grad_norm": 0.34518635217296834, "learning_rate": 0.00018530172175642992, "loss": 2.8243284225463867, "step": 7371, "token_acc": 0.32230077452224043 }, { "epoch": 4.321313397830548, "grad_norm": 0.3171144494115203, "learning_rate": 0.00018529666322325816, "loss": 2.8102946281433105, "step": 7372, "token_acc": 0.32534964241571607 }, { "epoch": 4.321899736147757, "grad_norm": 0.27451174702407827, "learning_rate": 0.0001852916038888392, "loss": 2.815958023071289, "step": 7373, "token_acc": 0.3258603070047708 }, { "epoch": 4.322486074464966, "grad_norm": 0.28523242562530465, "learning_rate": 0.00018528654375322054, "loss": 2.7794761657714844, "step": 7374, "token_acc": 0.32956259816483424 }, { "epoch": 4.323072412782175, "grad_norm": 0.2592134744682898, "learning_rate": 0.00018528148281644972, "loss": 2.801241874694824, "step": 7375, "token_acc": 0.32708814961487026 }, { "epoch": 4.3236587510993845, "grad_norm": 0.25679270863521686, "learning_rate": 0.0001852764210785743, "loss": 2.815281391143799, "step": 7376, "token_acc": 0.3238663564776118 }, { "epoch": 4.324245089416594, "grad_norm": 0.24667412760522206, "learning_rate": 0.0001852713585396418, "loss": 2.7800350189208984, "step": 7377, "token_acc": 0.3298651231305891 }, { "epoch": 4.324831427733803, "grad_norm": 0.2627684435486072, "learning_rate": 0.00018526629519969982, "loss": 2.783808946609497, "step": 7378, "token_acc": 0.32935931246150196 }, { "epoch": 4.325417766051012, "grad_norm": 0.2667008597177768, "learning_rate": 0.00018526123105879586, "loss": 2.8039867877960205, "step": 7379, "token_acc": 0.32489695716777567 }, { "epoch": 4.32600410436822, "grad_norm": 0.2590757655000024, "learning_rate": 0.00018525616611697756, "loss": 2.8031604290008545, "step": 7380, "token_acc": 0.32589459354336836 }, { "epoch": 4.326590442685429, "grad_norm": 0.26233243923293786, "learning_rate": 0.00018525110037429245, "loss": 2.827221632003784, "step": 7381, "token_acc": 0.3228146159296373 }, { "epoch": 4.327176781002638, "grad_norm": 0.26757584022367426, "learning_rate": 0.0001852460338307881, "loss": 2.7913246154785156, "step": 7382, "token_acc": 0.3276428237928304 }, { "epoch": 4.327763119319847, "grad_norm": 0.2717490114631852, "learning_rate": 0.00018524096648651214, "loss": 2.8125948905944824, "step": 7383, "token_acc": 0.32433331341220367 }, { "epoch": 4.3283494576370565, "grad_norm": 0.2616628626748493, "learning_rate": 0.0001852358983415122, "loss": 2.8318963050842285, "step": 7384, "token_acc": 0.3208337026578704 }, { "epoch": 4.328935795954266, "grad_norm": 0.2582633175302739, "learning_rate": 0.00018523082939583585, "loss": 2.7909657955169678, "step": 7385, "token_acc": 0.3286259779608608 }, { "epoch": 4.329522134271475, "grad_norm": 0.28136044849043373, "learning_rate": 0.0001852257596495307, "loss": 2.83647084236145, "step": 7386, "token_acc": 0.32002741569040577 }, { "epoch": 4.330108472588684, "grad_norm": 0.2650291408241554, "learning_rate": 0.00018522068910264435, "loss": 2.8075337409973145, "step": 7387, "token_acc": 0.3239179789704238 }, { "epoch": 4.330694810905893, "grad_norm": 0.25594984829641465, "learning_rate": 0.0001852156177552245, "loss": 2.763432502746582, "step": 7388, "token_acc": 0.3304623341650682 }, { "epoch": 4.331281149223102, "grad_norm": 0.25309265520875107, "learning_rate": 0.0001852105456073187, "loss": 2.833162546157837, "step": 7389, "token_acc": 0.3221918124239755 }, { "epoch": 4.331867487540311, "grad_norm": 0.28936436487018385, "learning_rate": 0.0001852054726589747, "loss": 2.852695941925049, "step": 7390, "token_acc": 0.3183785951263865 }, { "epoch": 4.33245382585752, "grad_norm": 0.3653333311441747, "learning_rate": 0.00018520039891024007, "loss": 2.846194267272949, "step": 7391, "token_acc": 0.3181990552136604 }, { "epoch": 4.3330401641747285, "grad_norm": 0.39009068238795525, "learning_rate": 0.00018519532436116253, "loss": 2.834728717803955, "step": 7392, "token_acc": 0.3199874391584236 }, { "epoch": 4.333626502491938, "grad_norm": 0.29169434278831785, "learning_rate": 0.00018519024901178968, "loss": 2.783836603164673, "step": 7393, "token_acc": 0.32890546496605333 }, { "epoch": 4.334212840809147, "grad_norm": 0.3328962412854287, "learning_rate": 0.00018518517286216928, "loss": 2.8078761100769043, "step": 7394, "token_acc": 0.32448227900267873 }, { "epoch": 4.334799179126356, "grad_norm": 0.3312694110445546, "learning_rate": 0.00018518009591234893, "loss": 2.8235490322113037, "step": 7395, "token_acc": 0.32306550191695566 }, { "epoch": 4.335385517443565, "grad_norm": 0.2649008689432459, "learning_rate": 0.00018517501816237638, "loss": 2.7996339797973633, "step": 7396, "token_acc": 0.3267675244649991 }, { "epoch": 4.335971855760774, "grad_norm": 0.33652924456117955, "learning_rate": 0.00018516993961229932, "loss": 2.8173112869262695, "step": 7397, "token_acc": 0.324001422306507 }, { "epoch": 4.336558194077983, "grad_norm": 0.2509395245866988, "learning_rate": 0.0001851648602621654, "loss": 2.867915391921997, "step": 7398, "token_acc": 0.31734970381884636 }, { "epoch": 4.337144532395192, "grad_norm": 0.31800161571113367, "learning_rate": 0.00018515978011202243, "loss": 2.8555874824523926, "step": 7399, "token_acc": 0.31784655305305715 }, { "epoch": 4.337730870712401, "grad_norm": 0.28991738048334936, "learning_rate": 0.00018515469916191807, "loss": 2.8549158573150635, "step": 7400, "token_acc": 0.3175896735892186 }, { "epoch": 4.3383172090296105, "grad_norm": 0.2549485136651108, "learning_rate": 0.00018514961741190005, "loss": 2.8160927295684814, "step": 7401, "token_acc": 0.3237614304726742 }, { "epoch": 4.338903547346819, "grad_norm": 0.3377181249613439, "learning_rate": 0.0001851445348620161, "loss": 2.8079957962036133, "step": 7402, "token_acc": 0.3247294763389721 }, { "epoch": 4.339489885664028, "grad_norm": 0.22616211111235662, "learning_rate": 0.000185139451512314, "loss": 2.8290390968322754, "step": 7403, "token_acc": 0.32114109136730706 }, { "epoch": 4.340076223981237, "grad_norm": 0.3518895634181134, "learning_rate": 0.00018513436736284147, "loss": 2.8103904724121094, "step": 7404, "token_acc": 0.32318866756115183 }, { "epoch": 4.340662562298446, "grad_norm": 0.24669901838192926, "learning_rate": 0.00018512928241364626, "loss": 2.7957215309143066, "step": 7405, "token_acc": 0.3255691394163288 }, { "epoch": 4.341248900615655, "grad_norm": 0.28820187430618854, "learning_rate": 0.0001851241966647762, "loss": 2.869537591934204, "step": 7406, "token_acc": 0.3178083371921474 }, { "epoch": 4.341835238932864, "grad_norm": 0.24891514803819764, "learning_rate": 0.00018511911011627897, "loss": 2.8168578147888184, "step": 7407, "token_acc": 0.32259477310082535 }, { "epoch": 4.342421577250073, "grad_norm": 0.2571528257986596, "learning_rate": 0.00018511402276820245, "loss": 2.7865803241729736, "step": 7408, "token_acc": 0.3291118935523456 }, { "epoch": 4.3430079155672825, "grad_norm": 0.25810184374558254, "learning_rate": 0.00018510893462059437, "loss": 2.827939987182617, "step": 7409, "token_acc": 0.32046704029890577 }, { "epoch": 4.343594253884492, "grad_norm": 0.2568506348988116, "learning_rate": 0.0001851038456735025, "loss": 2.822042942047119, "step": 7410, "token_acc": 0.3221971065742571 }, { "epoch": 4.344180592201701, "grad_norm": 0.29090885674597405, "learning_rate": 0.0001850987559269747, "loss": 2.7751927375793457, "step": 7411, "token_acc": 0.3301368927550849 }, { "epoch": 4.34476693051891, "grad_norm": 0.24339245370074164, "learning_rate": 0.00018509366538105873, "loss": 2.8235793113708496, "step": 7412, "token_acc": 0.3217509013590916 }, { "epoch": 4.345353268836118, "grad_norm": 0.31155069126324275, "learning_rate": 0.0001850885740358025, "loss": 2.815742015838623, "step": 7413, "token_acc": 0.32216966946441755 }, { "epoch": 4.345939607153327, "grad_norm": 0.2795590712630844, "learning_rate": 0.00018508348189125374, "loss": 2.818142890930176, "step": 7414, "token_acc": 0.322453706102401 }, { "epoch": 4.346525945470536, "grad_norm": 0.23436285630869977, "learning_rate": 0.00018507838894746032, "loss": 2.786525011062622, "step": 7415, "token_acc": 0.3279812394751842 }, { "epoch": 4.347112283787745, "grad_norm": 0.2849407475086039, "learning_rate": 0.0001850732952044701, "loss": 2.814694404602051, "step": 7416, "token_acc": 0.32312804674207535 }, { "epoch": 4.3476986221049545, "grad_norm": 0.24376330498475554, "learning_rate": 0.00018506820066233087, "loss": 2.7773947715759277, "step": 7417, "token_acc": 0.32969840478564305 }, { "epoch": 4.348284960422164, "grad_norm": 0.2764201381438405, "learning_rate": 0.00018506310532109054, "loss": 2.7853097915649414, "step": 7418, "token_acc": 0.3287138884556012 }, { "epoch": 4.348871298739373, "grad_norm": 0.2687160350868754, "learning_rate": 0.00018505800918079695, "loss": 2.812912940979004, "step": 7419, "token_acc": 0.32458040431917196 }, { "epoch": 4.349457637056582, "grad_norm": 0.2518304951226571, "learning_rate": 0.000185052912241498, "loss": 2.8121728897094727, "step": 7420, "token_acc": 0.32392841775142617 }, { "epoch": 4.350043975373791, "grad_norm": 0.30126866689190374, "learning_rate": 0.00018504781450324155, "loss": 2.8240442276000977, "step": 7421, "token_acc": 0.32356303222887617 }, { "epoch": 4.350630313691, "grad_norm": 0.25309129540613, "learning_rate": 0.0001850427159660755, "loss": 2.8050942420959473, "step": 7422, "token_acc": 0.32553090148110236 }, { "epoch": 4.351216652008208, "grad_norm": 0.2843011454637013, "learning_rate": 0.0001850376166300477, "loss": 2.805994987487793, "step": 7423, "token_acc": 0.32634665365843424 }, { "epoch": 4.351802990325417, "grad_norm": 0.24165485876858914, "learning_rate": 0.0001850325164952061, "loss": 2.84590482711792, "step": 7424, "token_acc": 0.31832573631207434 }, { "epoch": 4.3523893286426265, "grad_norm": 0.2914030097509735, "learning_rate": 0.00018502741556159858, "loss": 2.795292854309082, "step": 7425, "token_acc": 0.326193428234822 }, { "epoch": 4.352975666959836, "grad_norm": 0.24601242198914453, "learning_rate": 0.00018502231382927308, "loss": 2.8465681076049805, "step": 7426, "token_acc": 0.3204679409320463 }, { "epoch": 4.353562005277045, "grad_norm": 0.2792770905828105, "learning_rate": 0.00018501721129827748, "loss": 2.814530372619629, "step": 7427, "token_acc": 0.3249713402910222 }, { "epoch": 4.354148343594254, "grad_norm": 0.30287611203612164, "learning_rate": 0.0001850121079686598, "loss": 2.8049874305725098, "step": 7428, "token_acc": 0.3270176839980462 }, { "epoch": 4.354734681911463, "grad_norm": 0.24294831548838733, "learning_rate": 0.00018500700384046787, "loss": 2.8342690467834473, "step": 7429, "token_acc": 0.32040272588315216 }, { "epoch": 4.355321020228672, "grad_norm": 0.28869084411314827, "learning_rate": 0.00018500189891374968, "loss": 2.7839574813842773, "step": 7430, "token_acc": 0.3290280276155926 }, { "epoch": 4.355907358545881, "grad_norm": 0.26009503321293176, "learning_rate": 0.00018499679318855324, "loss": 2.8120007514953613, "step": 7431, "token_acc": 0.32519179583529045 }, { "epoch": 4.35649369686309, "grad_norm": 0.28434393827576804, "learning_rate": 0.00018499168666492643, "loss": 2.7974328994750977, "step": 7432, "token_acc": 0.32622335669359337 }, { "epoch": 4.357080035180299, "grad_norm": 0.28400574142718304, "learning_rate": 0.00018498657934291725, "loss": 2.8036394119262695, "step": 7433, "token_acc": 0.3253259812735579 }, { "epoch": 4.3576663734975085, "grad_norm": 0.24928521714247145, "learning_rate": 0.00018498147122257368, "loss": 2.775306224822998, "step": 7434, "token_acc": 0.3298007419489118 }, { "epoch": 4.358252711814717, "grad_norm": 0.2743040943339715, "learning_rate": 0.00018497636230394374, "loss": 2.824603796005249, "step": 7435, "token_acc": 0.3224029569175877 }, { "epoch": 4.358839050131926, "grad_norm": 0.24145861292711085, "learning_rate": 0.0001849712525870753, "loss": 2.842122793197632, "step": 7436, "token_acc": 0.3195661413718695 }, { "epoch": 4.359425388449135, "grad_norm": 0.2789911434682713, "learning_rate": 0.00018496614207201654, "loss": 2.8071703910827637, "step": 7437, "token_acc": 0.3244850783096543 }, { "epoch": 4.360011726766344, "grad_norm": 0.24424498794953525, "learning_rate": 0.0001849610307588153, "loss": 2.7769598960876465, "step": 7438, "token_acc": 0.32882933261893554 }, { "epoch": 4.360598065083553, "grad_norm": 0.2695181350328296, "learning_rate": 0.0001849559186475197, "loss": 2.861466884613037, "step": 7439, "token_acc": 0.3165495419257515 }, { "epoch": 4.361184403400762, "grad_norm": 0.24782700103932084, "learning_rate": 0.0001849508057381777, "loss": 2.7859151363372803, "step": 7440, "token_acc": 0.3271815286624204 }, { "epoch": 4.361770741717971, "grad_norm": 0.2659572036248464, "learning_rate": 0.00018494569203083734, "loss": 2.8244385719299316, "step": 7441, "token_acc": 0.32223399893235954 }, { "epoch": 4.3623570800351805, "grad_norm": 0.25477958971445763, "learning_rate": 0.0001849405775255467, "loss": 2.809879779815674, "step": 7442, "token_acc": 0.3260767079086042 }, { "epoch": 4.36294341835239, "grad_norm": 0.25182472903105746, "learning_rate": 0.00018493546222235377, "loss": 2.8057496547698975, "step": 7443, "token_acc": 0.3252531956478569 }, { "epoch": 4.363529756669599, "grad_norm": 0.25855156279286806, "learning_rate": 0.00018493034612130664, "loss": 2.859363079071045, "step": 7444, "token_acc": 0.3185364327271377 }, { "epoch": 4.364116094986807, "grad_norm": 0.24395031348275611, "learning_rate": 0.00018492522922245334, "loss": 2.8046674728393555, "step": 7445, "token_acc": 0.3258344240837696 }, { "epoch": 4.364702433304016, "grad_norm": 0.24759459699652248, "learning_rate": 0.00018492011152584196, "loss": 2.8137881755828857, "step": 7446, "token_acc": 0.32270133057026856 }, { "epoch": 4.365288771621225, "grad_norm": 0.23611623997166672, "learning_rate": 0.00018491499303152056, "loss": 2.813845634460449, "step": 7447, "token_acc": 0.32359776862022815 }, { "epoch": 4.365875109938434, "grad_norm": 0.3126461990164729, "learning_rate": 0.00018490987373953724, "loss": 2.8215651512145996, "step": 7448, "token_acc": 0.32253601494532297 }, { "epoch": 4.366461448255643, "grad_norm": 0.3186216936330115, "learning_rate": 0.00018490475364994007, "loss": 2.8012022972106934, "step": 7449, "token_acc": 0.3242361927144536 }, { "epoch": 4.3670477865728525, "grad_norm": 0.29457020427657277, "learning_rate": 0.00018489963276277713, "loss": 2.849219799041748, "step": 7450, "token_acc": 0.31825762803358043 }, { "epoch": 4.367634124890062, "grad_norm": 0.2501204606906152, "learning_rate": 0.00018489451107809655, "loss": 2.794825315475464, "step": 7451, "token_acc": 0.3262410617565401 }, { "epoch": 4.368220463207271, "grad_norm": 0.3406687686599865, "learning_rate": 0.00018488938859594645, "loss": 2.8325717449188232, "step": 7452, "token_acc": 0.32162174144351835 }, { "epoch": 4.36880680152448, "grad_norm": 0.27171352131296966, "learning_rate": 0.00018488426531637492, "loss": 2.804859161376953, "step": 7453, "token_acc": 0.3269292699210619 }, { "epoch": 4.369393139841689, "grad_norm": 0.28518367512576753, "learning_rate": 0.00018487914123943008, "loss": 2.740365982055664, "step": 7454, "token_acc": 0.3346309006129827 }, { "epoch": 4.369979478158898, "grad_norm": 0.29682105898632566, "learning_rate": 0.00018487401636516011, "loss": 2.821575164794922, "step": 7455, "token_acc": 0.3230612125363845 }, { "epoch": 4.370565816476106, "grad_norm": 0.2928318995174857, "learning_rate": 0.00018486889069361314, "loss": 2.8416078090667725, "step": 7456, "token_acc": 0.31991798322862175 }, { "epoch": 4.371152154793315, "grad_norm": 0.32878573778571785, "learning_rate": 0.00018486376422483728, "loss": 2.801227569580078, "step": 7457, "token_acc": 0.3238602219728676 }, { "epoch": 4.3717384931105245, "grad_norm": 0.26157052047857365, "learning_rate": 0.00018485863695888072, "loss": 2.8278298377990723, "step": 7458, "token_acc": 0.32205361306610775 }, { "epoch": 4.372324831427734, "grad_norm": 0.2961892536181837, "learning_rate": 0.00018485350889579162, "loss": 2.841789960861206, "step": 7459, "token_acc": 0.3195097709290404 }, { "epoch": 4.372911169744943, "grad_norm": 0.2443179757475132, "learning_rate": 0.00018484838003561812, "loss": 2.8435611724853516, "step": 7460, "token_acc": 0.3200068988405244 }, { "epoch": 4.373497508062152, "grad_norm": 0.30543071181233283, "learning_rate": 0.00018484325037840845, "loss": 2.8258790969848633, "step": 7461, "token_acc": 0.3233214646438883 }, { "epoch": 4.374083846379361, "grad_norm": 0.23444518822590776, "learning_rate": 0.00018483811992421077, "loss": 2.794872760772705, "step": 7462, "token_acc": 0.3262092148226592 }, { "epoch": 4.37467018469657, "grad_norm": 0.36072474751451244, "learning_rate": 0.00018483298867307327, "loss": 2.831958770751953, "step": 7463, "token_acc": 0.3214374162897071 }, { "epoch": 4.375256523013779, "grad_norm": 0.2805303492712956, "learning_rate": 0.00018482785662504413, "loss": 2.8262906074523926, "step": 7464, "token_acc": 0.32344541957330075 }, { "epoch": 4.375842861330988, "grad_norm": 0.2798396869313885, "learning_rate": 0.00018482272378017163, "loss": 2.860952377319336, "step": 7465, "token_acc": 0.3156788322708102 }, { "epoch": 4.3764291996481965, "grad_norm": 0.2589115684810431, "learning_rate": 0.00018481759013850392, "loss": 2.7820849418640137, "step": 7466, "token_acc": 0.32854825396246756 }, { "epoch": 4.377015537965406, "grad_norm": 0.2847340859494114, "learning_rate": 0.00018481245570008925, "loss": 2.839822292327881, "step": 7467, "token_acc": 0.3198406901019821 }, { "epoch": 4.377601876282615, "grad_norm": 0.2590622290856762, "learning_rate": 0.0001848073204649758, "loss": 2.7962377071380615, "step": 7468, "token_acc": 0.32662777969228274 }, { "epoch": 4.378188214599824, "grad_norm": 0.26939016228154095, "learning_rate": 0.00018480218443321192, "loss": 2.802985906600952, "step": 7469, "token_acc": 0.32542228837808074 }, { "epoch": 4.378774552917033, "grad_norm": 0.25696054590749384, "learning_rate": 0.00018479704760484574, "loss": 2.8410744667053223, "step": 7470, "token_acc": 0.3198787125351274 }, { "epoch": 4.379360891234242, "grad_norm": 0.28611742125613693, "learning_rate": 0.00018479190997992557, "loss": 2.8431754112243652, "step": 7471, "token_acc": 0.32072118756739215 }, { "epoch": 4.379947229551451, "grad_norm": 0.24632064668504558, "learning_rate": 0.0001847867715584997, "loss": 2.8539669513702393, "step": 7472, "token_acc": 0.31879176525901204 }, { "epoch": 4.38053356786866, "grad_norm": 0.270109463436155, "learning_rate": 0.00018478163234061633, "loss": 2.826313018798828, "step": 7473, "token_acc": 0.3217656450318911 }, { "epoch": 4.381119906185869, "grad_norm": 0.2294447079125213, "learning_rate": 0.00018477649232632377, "loss": 2.806281566619873, "step": 7474, "token_acc": 0.325930981200103 }, { "epoch": 4.3817062445030786, "grad_norm": 0.2721520430124895, "learning_rate": 0.00018477135151567033, "loss": 2.8207802772521973, "step": 7475, "token_acc": 0.3217781359967971 }, { "epoch": 4.382292582820288, "grad_norm": 0.24117047248464785, "learning_rate": 0.00018476620990870424, "loss": 2.812748908996582, "step": 7476, "token_acc": 0.32572373991191617 }, { "epoch": 4.382878921137497, "grad_norm": 0.2774552387321152, "learning_rate": 0.00018476106750547384, "loss": 2.846264123916626, "step": 7477, "token_acc": 0.31913908511257505 }, { "epoch": 4.383465259454705, "grad_norm": 0.2493480408202868, "learning_rate": 0.00018475592430602743, "loss": 2.8156681060791016, "step": 7478, "token_acc": 0.32345000321109757 }, { "epoch": 4.384051597771914, "grad_norm": 0.2356671289412216, "learning_rate": 0.00018475078031041333, "loss": 2.829155683517456, "step": 7479, "token_acc": 0.32324218485161593 }, { "epoch": 4.384637936089123, "grad_norm": 0.27505341445681825, "learning_rate": 0.00018474563551867986, "loss": 2.8232085704803467, "step": 7480, "token_acc": 0.3228202033405189 }, { "epoch": 4.385224274406332, "grad_norm": 0.24697200121043764, "learning_rate": 0.00018474048993087533, "loss": 2.812225580215454, "step": 7481, "token_acc": 0.3247289623128549 }, { "epoch": 4.3858106127235414, "grad_norm": 0.24605840924966021, "learning_rate": 0.00018473534354704807, "loss": 2.8080828189849854, "step": 7482, "token_acc": 0.3236353198371021 }, { "epoch": 4.386396951040751, "grad_norm": 0.2540521217304517, "learning_rate": 0.00018473019636724644, "loss": 2.8516621589660645, "step": 7483, "token_acc": 0.3180080738925843 }, { "epoch": 4.38698328935796, "grad_norm": 0.24968551555319693, "learning_rate": 0.0001847250483915188, "loss": 2.819540023803711, "step": 7484, "token_acc": 0.3239070946033658 }, { "epoch": 4.387569627675169, "grad_norm": 0.2577191060926478, "learning_rate": 0.00018471989961991352, "loss": 2.818208694458008, "step": 7485, "token_acc": 0.322424696114665 }, { "epoch": 4.388155965992378, "grad_norm": 0.24788598187225602, "learning_rate": 0.00018471475005247894, "loss": 2.822981357574463, "step": 7486, "token_acc": 0.32137172148826837 }, { "epoch": 4.388742304309587, "grad_norm": 0.23539170279259933, "learning_rate": 0.0001847095996892634, "loss": 2.7876975536346436, "step": 7487, "token_acc": 0.3276886966612163 }, { "epoch": 4.389328642626795, "grad_norm": 0.24338792878012794, "learning_rate": 0.00018470444853031535, "loss": 2.814864158630371, "step": 7488, "token_acc": 0.3226865579758472 }, { "epoch": 4.389914980944004, "grad_norm": 0.22651877835897277, "learning_rate": 0.00018469929657568312, "loss": 2.845851182937622, "step": 7489, "token_acc": 0.31883428374032036 }, { "epoch": 4.3905013192612135, "grad_norm": 0.2590390065359618, "learning_rate": 0.00018469414382541518, "loss": 2.8050179481506348, "step": 7490, "token_acc": 0.3262142053944146 }, { "epoch": 4.391087657578423, "grad_norm": 0.22230814204997276, "learning_rate": 0.00018468899027955984, "loss": 2.8113574981689453, "step": 7491, "token_acc": 0.3242332809787551 }, { "epoch": 4.391673995895632, "grad_norm": 0.2605781214041405, "learning_rate": 0.00018468383593816555, "loss": 2.823817729949951, "step": 7492, "token_acc": 0.32187318133026394 }, { "epoch": 4.392260334212841, "grad_norm": 0.2544185786793318, "learning_rate": 0.0001846786808012808, "loss": 2.8755605220794678, "step": 7493, "token_acc": 0.3159327000380823 }, { "epoch": 4.39284667253005, "grad_norm": 0.28288620277312204, "learning_rate": 0.0001846735248689539, "loss": 2.8294219970703125, "step": 7494, "token_acc": 0.32254745999128537 }, { "epoch": 4.393433010847259, "grad_norm": 0.258163487821477, "learning_rate": 0.00018466836814123335, "loss": 2.836784839630127, "step": 7495, "token_acc": 0.3206309693817391 }, { "epoch": 4.394019349164468, "grad_norm": 0.25159277235452043, "learning_rate": 0.00018466321061816755, "loss": 2.7986326217651367, "step": 7496, "token_acc": 0.32642096717135083 }, { "epoch": 4.394605687481677, "grad_norm": 0.2734817920851079, "learning_rate": 0.00018465805229980498, "loss": 2.877167224884033, "step": 7497, "token_acc": 0.3154312836221546 }, { "epoch": 4.395192025798886, "grad_norm": 0.35692770495698906, "learning_rate": 0.0001846528931861941, "loss": 2.8124876022338867, "step": 7498, "token_acc": 0.32456011989161315 }, { "epoch": 4.395778364116095, "grad_norm": 0.45778941110682464, "learning_rate": 0.00018464773327738336, "loss": 2.8079657554626465, "step": 7499, "token_acc": 0.3247841491163361 }, { "epoch": 4.396364702433304, "grad_norm": 0.3141493753615565, "learning_rate": 0.00018464257257342118, "loss": 2.8134732246398926, "step": 7500, "token_acc": 0.3246066780439981 }, { "epoch": 4.396951040750513, "grad_norm": 0.28180349341437266, "learning_rate": 0.00018463741107435614, "loss": 2.8101422786712646, "step": 7501, "token_acc": 0.32392081977354026 }, { "epoch": 4.397537379067722, "grad_norm": 0.3227499011081679, "learning_rate": 0.00018463224878023668, "loss": 2.824122905731201, "step": 7502, "token_acc": 0.3227849554834808 }, { "epoch": 4.398123717384931, "grad_norm": 0.2568344379646359, "learning_rate": 0.00018462708569111128, "loss": 2.850935459136963, "step": 7503, "token_acc": 0.3177146152055693 }, { "epoch": 4.39871005570214, "grad_norm": 0.32413419867836946, "learning_rate": 0.0001846219218070284, "loss": 2.834317684173584, "step": 7504, "token_acc": 0.32036655390079194 }, { "epoch": 4.399296394019349, "grad_norm": 0.23842504434135364, "learning_rate": 0.00018461675712803667, "loss": 2.854387044906616, "step": 7505, "token_acc": 0.31712410423026033 }, { "epoch": 4.399882732336558, "grad_norm": 0.28480909719674335, "learning_rate": 0.00018461159165418447, "loss": 2.7860805988311768, "step": 7506, "token_acc": 0.32932264989951315 }, { "epoch": 4.4004690706537675, "grad_norm": 0.2554176220314128, "learning_rate": 0.0001846064253855204, "loss": 2.8351945877075195, "step": 7507, "token_acc": 0.320398403225169 }, { "epoch": 4.401055408970977, "grad_norm": 0.25782240507866955, "learning_rate": 0.000184601258322093, "loss": 2.823171615600586, "step": 7508, "token_acc": 0.32194886457594296 }, { "epoch": 4.401641747288186, "grad_norm": 0.2608368634224289, "learning_rate": 0.0001845960904639507, "loss": 2.797649383544922, "step": 7509, "token_acc": 0.3266473962211777 }, { "epoch": 4.402228085605394, "grad_norm": 0.2952272562801867, "learning_rate": 0.00018459092181114222, "loss": 2.8523616790771484, "step": 7510, "token_acc": 0.31784478497281266 }, { "epoch": 4.402814423922603, "grad_norm": 0.2424137248369666, "learning_rate": 0.00018458575236371595, "loss": 2.8539533615112305, "step": 7511, "token_acc": 0.31906207900569294 }, { "epoch": 4.403400762239812, "grad_norm": 0.25429988737721926, "learning_rate": 0.00018458058212172056, "loss": 2.857726812362671, "step": 7512, "token_acc": 0.31731123880796475 }, { "epoch": 4.403987100557021, "grad_norm": 0.24527297593155334, "learning_rate": 0.00018457541108520456, "loss": 2.8450088500976562, "step": 7513, "token_acc": 0.3190523296590564 }, { "epoch": 4.40457343887423, "grad_norm": 0.25075864752553134, "learning_rate": 0.00018457023925421654, "loss": 2.8414525985717773, "step": 7514, "token_acc": 0.3192677027125418 }, { "epoch": 4.4051597771914395, "grad_norm": 0.2305568752809105, "learning_rate": 0.00018456506662880507, "loss": 2.79876446723938, "step": 7515, "token_acc": 0.32583267659380316 }, { "epoch": 4.405746115508649, "grad_norm": 0.2527565832974834, "learning_rate": 0.00018455989320901876, "loss": 2.76139497756958, "step": 7516, "token_acc": 0.33267204131099165 }, { "epoch": 4.406332453825858, "grad_norm": 0.23173111998779827, "learning_rate": 0.0001845547189949062, "loss": 2.8168601989746094, "step": 7517, "token_acc": 0.32405750923882526 }, { "epoch": 4.406918792143067, "grad_norm": 0.26359720632726735, "learning_rate": 0.000184549543986516, "loss": 2.836848497390747, "step": 7518, "token_acc": 0.3207381704909224 }, { "epoch": 4.407505130460276, "grad_norm": 0.24613130124419194, "learning_rate": 0.00018454436818389676, "loss": 2.829709529876709, "step": 7519, "token_acc": 0.3211919588273764 }, { "epoch": 4.408091468777485, "grad_norm": 0.259813917786715, "learning_rate": 0.0001845391915870971, "loss": 2.826183319091797, "step": 7520, "token_acc": 0.3205655251304783 }, { "epoch": 4.408677807094693, "grad_norm": 0.24254868245861122, "learning_rate": 0.00018453401419616565, "loss": 2.810854911804199, "step": 7521, "token_acc": 0.32620550372716767 }, { "epoch": 4.409264145411902, "grad_norm": 0.2513801498981925, "learning_rate": 0.00018452883601115106, "loss": 2.874814748764038, "step": 7522, "token_acc": 0.31515510924857015 }, { "epoch": 4.4098504837291115, "grad_norm": 0.25210140035107903, "learning_rate": 0.00018452365703210195, "loss": 2.8333945274353027, "step": 7523, "token_acc": 0.32058536383247055 }, { "epoch": 4.410436822046321, "grad_norm": 0.2526980781632405, "learning_rate": 0.00018451847725906698, "loss": 2.8420867919921875, "step": 7524, "token_acc": 0.3200683255620701 }, { "epoch": 4.41102316036353, "grad_norm": 0.26129697425638454, "learning_rate": 0.00018451329669209483, "loss": 2.8255414962768555, "step": 7525, "token_acc": 0.3221202341460315 }, { "epoch": 4.411609498680739, "grad_norm": 0.2608816802452931, "learning_rate": 0.00018450811533123412, "loss": 2.8358936309814453, "step": 7526, "token_acc": 0.32078285669431433 }, { "epoch": 4.412195836997948, "grad_norm": 0.27501187345812267, "learning_rate": 0.00018450293317653354, "loss": 2.823366165161133, "step": 7527, "token_acc": 0.32151920889122254 }, { "epoch": 4.412782175315157, "grad_norm": 0.2998283932480553, "learning_rate": 0.00018449775022804176, "loss": 2.7763662338256836, "step": 7528, "token_acc": 0.3320185584180403 }, { "epoch": 4.413368513632366, "grad_norm": 0.25235150930051264, "learning_rate": 0.0001844925664858075, "loss": 2.822849750518799, "step": 7529, "token_acc": 0.3229004208772192 }, { "epoch": 4.413954851949575, "grad_norm": 0.273642005806271, "learning_rate": 0.00018448738194987944, "loss": 2.8518691062927246, "step": 7530, "token_acc": 0.3196752728240618 }, { "epoch": 4.4145411902667835, "grad_norm": 0.3012883180946971, "learning_rate": 0.00018448219662030625, "loss": 2.824174642562866, "step": 7531, "token_acc": 0.3232736654775785 }, { "epoch": 4.415127528583993, "grad_norm": 0.27965091291508765, "learning_rate": 0.0001844770104971367, "loss": 2.803652763366699, "step": 7532, "token_acc": 0.3261475946141396 }, { "epoch": 4.415713866901202, "grad_norm": 0.237697541514221, "learning_rate": 0.00018447182358041943, "loss": 2.7950549125671387, "step": 7533, "token_acc": 0.32593421172430836 }, { "epoch": 4.416300205218411, "grad_norm": 0.2900524086476858, "learning_rate": 0.00018446663587020322, "loss": 2.835469961166382, "step": 7534, "token_acc": 0.32011954334233317 }, { "epoch": 4.41688654353562, "grad_norm": 0.2846375564624913, "learning_rate": 0.0001844614473665368, "loss": 2.808065176010132, "step": 7535, "token_acc": 0.32274238948857975 }, { "epoch": 4.417472881852829, "grad_norm": 0.2586241173064911, "learning_rate": 0.00018445625806946887, "loss": 2.8396291732788086, "step": 7536, "token_acc": 0.32136194395966394 }, { "epoch": 4.418059220170038, "grad_norm": 0.3049325154891526, "learning_rate": 0.00018445106797904823, "loss": 2.8562135696411133, "step": 7537, "token_acc": 0.31745504122573526 }, { "epoch": 4.418645558487247, "grad_norm": 0.35100214991079315, "learning_rate": 0.0001844458770953236, "loss": 2.843250274658203, "step": 7538, "token_acc": 0.3202434437362867 }, { "epoch": 4.419231896804456, "grad_norm": 0.27282960892861985, "learning_rate": 0.0001844406854183437, "loss": 2.8035359382629395, "step": 7539, "token_acc": 0.32677366645582967 }, { "epoch": 4.4198182351216655, "grad_norm": 0.2644995704039485, "learning_rate": 0.0001844354929481574, "loss": 2.8357295989990234, "step": 7540, "token_acc": 0.3197456769504127 }, { "epoch": 4.420404573438875, "grad_norm": 0.29052950665999944, "learning_rate": 0.0001844302996848134, "loss": 2.8587915897369385, "step": 7541, "token_acc": 0.31716682169887106 }, { "epoch": 4.420990911756084, "grad_norm": 0.24364408761410675, "learning_rate": 0.00018442510562836052, "loss": 2.8119564056396484, "step": 7542, "token_acc": 0.325652274762248 }, { "epoch": 4.421577250073292, "grad_norm": 0.2524167382271862, "learning_rate": 0.00018441991077884753, "loss": 2.8635408878326416, "step": 7543, "token_acc": 0.31652915766182316 }, { "epoch": 4.422163588390501, "grad_norm": 0.25292487422078413, "learning_rate": 0.00018441471513632322, "loss": 2.811081886291504, "step": 7544, "token_acc": 0.32360527482326956 }, { "epoch": 4.42274992670771, "grad_norm": 0.23421140131752005, "learning_rate": 0.00018440951870083642, "loss": 2.818826913833618, "step": 7545, "token_acc": 0.32250273864418005 }, { "epoch": 4.423336265024919, "grad_norm": 0.24485967734155728, "learning_rate": 0.00018440432147243596, "loss": 2.8264498710632324, "step": 7546, "token_acc": 0.32216061527454837 }, { "epoch": 4.423922603342128, "grad_norm": 0.24473415634227536, "learning_rate": 0.0001843991234511706, "loss": 2.8714261054992676, "step": 7547, "token_acc": 0.3162602423956264 }, { "epoch": 4.4245089416593375, "grad_norm": 0.24255771660243144, "learning_rate": 0.00018439392463708923, "loss": 2.793022632598877, "step": 7548, "token_acc": 0.32887164464389596 }, { "epoch": 4.425095279976547, "grad_norm": 0.256376106147076, "learning_rate": 0.00018438872503024066, "loss": 2.8813676834106445, "step": 7549, "token_acc": 0.3148656135198662 }, { "epoch": 4.425681618293756, "grad_norm": 0.24394831549153587, "learning_rate": 0.00018438352463067372, "loss": 2.8206028938293457, "step": 7550, "token_acc": 0.3236345916546808 }, { "epoch": 4.426267956610965, "grad_norm": 0.28263447895312116, "learning_rate": 0.00018437832343843726, "loss": 2.828641891479492, "step": 7551, "token_acc": 0.3197032168465391 }, { "epoch": 4.426854294928174, "grad_norm": 0.29927512465755, "learning_rate": 0.00018437312145358018, "loss": 2.8647751808166504, "step": 7552, "token_acc": 0.3148723176246112 }, { "epoch": 4.427440633245382, "grad_norm": 0.24584354832458374, "learning_rate": 0.00018436791867615132, "loss": 2.834036111831665, "step": 7553, "token_acc": 0.3193284545845889 }, { "epoch": 4.428026971562591, "grad_norm": 0.25984077818669643, "learning_rate": 0.00018436271510619952, "loss": 2.800201892852783, "step": 7554, "token_acc": 0.3248483932926171 }, { "epoch": 4.4286133098798, "grad_norm": 0.31749065808529503, "learning_rate": 0.00018435751074377375, "loss": 2.7959418296813965, "step": 7555, "token_acc": 0.3262571339408974 }, { "epoch": 4.4291996481970095, "grad_norm": 0.31148652057948895, "learning_rate": 0.00018435230558892278, "loss": 2.816371440887451, "step": 7556, "token_acc": 0.32256624304344217 }, { "epoch": 4.429785986514219, "grad_norm": 0.27053789502434866, "learning_rate": 0.0001843470996416956, "loss": 2.842940330505371, "step": 7557, "token_acc": 0.31853133218010377 }, { "epoch": 4.430372324831428, "grad_norm": 0.3012481775206665, "learning_rate": 0.00018434189290214106, "loss": 2.838082790374756, "step": 7558, "token_acc": 0.3200302128808309 }, { "epoch": 4.430958663148637, "grad_norm": 0.26973785392052835, "learning_rate": 0.0001843366853703081, "loss": 2.804936408996582, "step": 7559, "token_acc": 0.32386304810094324 }, { "epoch": 4.431545001465846, "grad_norm": 0.26781086810824756, "learning_rate": 0.0001843314770462456, "loss": 2.8340702056884766, "step": 7560, "token_acc": 0.3190427747904859 }, { "epoch": 4.432131339783055, "grad_norm": 0.3130702051821676, "learning_rate": 0.00018432626793000255, "loss": 2.84051513671875, "step": 7561, "token_acc": 0.3185650262246334 }, { "epoch": 4.432717678100264, "grad_norm": 0.2582564755805302, "learning_rate": 0.00018432105802162783, "loss": 2.8592031002044678, "step": 7562, "token_acc": 0.31721227749082914 }, { "epoch": 4.433304016417473, "grad_norm": 0.29265668627900765, "learning_rate": 0.00018431584732117037, "loss": 2.8405814170837402, "step": 7563, "token_acc": 0.3198559578162176 }, { "epoch": 4.4338903547346815, "grad_norm": 0.24068537032061196, "learning_rate": 0.00018431063582867915, "loss": 2.8366122245788574, "step": 7564, "token_acc": 0.3204174326524279 }, { "epoch": 4.434476693051891, "grad_norm": 0.30265062253321445, "learning_rate": 0.00018430542354420313, "loss": 2.8384690284729004, "step": 7565, "token_acc": 0.3201527841791029 }, { "epoch": 4.4350630313691, "grad_norm": 0.2882114115040358, "learning_rate": 0.00018430021046779126, "loss": 2.8192219734191895, "step": 7566, "token_acc": 0.3220837041309244 }, { "epoch": 4.435649369686309, "grad_norm": 0.2422760938544285, "learning_rate": 0.0001842949965994925, "loss": 2.8562135696411133, "step": 7567, "token_acc": 0.31603166974969726 }, { "epoch": 4.436235708003518, "grad_norm": 0.24261972065093299, "learning_rate": 0.00018428978193935585, "loss": 2.791912078857422, "step": 7568, "token_acc": 0.32633406271277177 }, { "epoch": 4.436822046320727, "grad_norm": 0.26858292217634006, "learning_rate": 0.00018428456648743026, "loss": 2.8356292247772217, "step": 7569, "token_acc": 0.3218397972047879 }, { "epoch": 4.437408384637936, "grad_norm": 0.26882968517475314, "learning_rate": 0.00018427935024376474, "loss": 2.8390235900878906, "step": 7570, "token_acc": 0.31889707394937417 }, { "epoch": 4.437994722955145, "grad_norm": 0.23885338800863146, "learning_rate": 0.0001842741332084083, "loss": 2.8280577659606934, "step": 7571, "token_acc": 0.32107144639484764 }, { "epoch": 4.438581061272354, "grad_norm": 0.2593815909380509, "learning_rate": 0.00018426891538140999, "loss": 2.8573594093322754, "step": 7572, "token_acc": 0.31738128816270983 }, { "epoch": 4.4391673995895635, "grad_norm": 0.234711161106046, "learning_rate": 0.00018426369676281871, "loss": 2.793976306915283, "step": 7573, "token_acc": 0.32644046144019206 }, { "epoch": 4.439753737906772, "grad_norm": 0.2576300328520498, "learning_rate": 0.00018425847735268356, "loss": 2.8758177757263184, "step": 7574, "token_acc": 0.314799037143869 }, { "epoch": 4.440340076223981, "grad_norm": 0.23774499876129487, "learning_rate": 0.00018425325715105357, "loss": 2.810102939605713, "step": 7575, "token_acc": 0.3242615734539399 }, { "epoch": 4.44092641454119, "grad_norm": 0.2408904460414873, "learning_rate": 0.00018424803615797774, "loss": 2.8378005027770996, "step": 7576, "token_acc": 0.31937275904566953 }, { "epoch": 4.441512752858399, "grad_norm": 0.25831445762506355, "learning_rate": 0.00018424281437350514, "loss": 2.8527908325195312, "step": 7577, "token_acc": 0.31651328026854775 }, { "epoch": 4.442099091175608, "grad_norm": 0.25854493025000974, "learning_rate": 0.00018423759179768485, "loss": 2.842883825302124, "step": 7578, "token_acc": 0.3202257238726486 }, { "epoch": 4.442685429492817, "grad_norm": 0.2607906446626564, "learning_rate": 0.00018423236843056586, "loss": 2.837601900100708, "step": 7579, "token_acc": 0.320505957040621 }, { "epoch": 4.443271767810026, "grad_norm": 0.2523269174841092, "learning_rate": 0.00018422714427219727, "loss": 2.8212642669677734, "step": 7580, "token_acc": 0.321689218759091 }, { "epoch": 4.4438581061272355, "grad_norm": 0.23376552188734268, "learning_rate": 0.00018422191932262818, "loss": 2.8117740154266357, "step": 7581, "token_acc": 0.3251956550058621 }, { "epoch": 4.444444444444445, "grad_norm": 0.28522257517835853, "learning_rate": 0.00018421669358190763, "loss": 2.8422253131866455, "step": 7582, "token_acc": 0.3203974454397352 }, { "epoch": 4.445030782761654, "grad_norm": 0.2901075874254731, "learning_rate": 0.00018421146705008474, "loss": 2.833270788192749, "step": 7583, "token_acc": 0.32162403498845604 }, { "epoch": 4.445617121078863, "grad_norm": 0.2701571805264613, "learning_rate": 0.0001842062397272086, "loss": 2.821427822113037, "step": 7584, "token_acc": 0.32338791844794806 }, { "epoch": 4.446203459396072, "grad_norm": 0.2663567426397365, "learning_rate": 0.0001842010116133283, "loss": 2.8433446884155273, "step": 7585, "token_acc": 0.3204010438858117 }, { "epoch": 4.44678979771328, "grad_norm": 0.2685978192882868, "learning_rate": 0.00018419578270849294, "loss": 2.869859218597412, "step": 7586, "token_acc": 0.31602902497714935 }, { "epoch": 4.447376136030489, "grad_norm": 0.25702851703654667, "learning_rate": 0.00018419055301275168, "loss": 2.8544278144836426, "step": 7587, "token_acc": 0.3187604842242475 }, { "epoch": 4.447962474347698, "grad_norm": 0.24812503461958954, "learning_rate": 0.0001841853225261536, "loss": 2.8512682914733887, "step": 7588, "token_acc": 0.31857121003518435 }, { "epoch": 4.4485488126649075, "grad_norm": 0.2730652004481719, "learning_rate": 0.00018418009124874789, "loss": 2.8665080070495605, "step": 7589, "token_acc": 0.3148870455121688 }, { "epoch": 4.449135150982117, "grad_norm": 0.24112196148090861, "learning_rate": 0.00018417485918058364, "loss": 2.8224263191223145, "step": 7590, "token_acc": 0.3240234079103125 }, { "epoch": 4.449721489299326, "grad_norm": 0.25514301679841356, "learning_rate": 0.00018416962632171, "loss": 2.8792271614074707, "step": 7591, "token_acc": 0.31615314235641295 }, { "epoch": 4.450307827616535, "grad_norm": 0.249957691130563, "learning_rate": 0.00018416439267217617, "loss": 2.8368616104125977, "step": 7592, "token_acc": 0.32170399748626227 }, { "epoch": 4.450894165933744, "grad_norm": 0.24715512652708144, "learning_rate": 0.00018415915823203127, "loss": 2.829893112182617, "step": 7593, "token_acc": 0.32298647821948134 }, { "epoch": 4.451480504250953, "grad_norm": 0.24901244890256663, "learning_rate": 0.00018415392300132446, "loss": 2.8170974254608154, "step": 7594, "token_acc": 0.3252518729361072 }, { "epoch": 4.452066842568162, "grad_norm": 0.28268378554774404, "learning_rate": 0.00018414868698010496, "loss": 2.819002151489258, "step": 7595, "token_acc": 0.3228680407498418 }, { "epoch": 4.45265318088537, "grad_norm": 0.2456659097790723, "learning_rate": 0.00018414345016842196, "loss": 2.805337429046631, "step": 7596, "token_acc": 0.3270001268579403 }, { "epoch": 4.4532395192025795, "grad_norm": 0.25690610632220606, "learning_rate": 0.0001841382125663246, "loss": 2.821662425994873, "step": 7597, "token_acc": 0.32342470961378433 }, { "epoch": 4.453825857519789, "grad_norm": 0.29265604747385204, "learning_rate": 0.00018413297417386212, "loss": 2.8232250213623047, "step": 7598, "token_acc": 0.3229396851724518 }, { "epoch": 4.454412195836998, "grad_norm": 0.3453231541848402, "learning_rate": 0.00018412773499108372, "loss": 2.8285274505615234, "step": 7599, "token_acc": 0.3225101318331771 }, { "epoch": 4.454998534154207, "grad_norm": 0.2839377598121682, "learning_rate": 0.00018412249501803863, "loss": 2.845349073410034, "step": 7600, "token_acc": 0.3197847008736623 }, { "epoch": 4.455584872471416, "grad_norm": 0.2724549842334263, "learning_rate": 0.00018411725425477603, "loss": 2.8273396492004395, "step": 7601, "token_acc": 0.3214776865395716 }, { "epoch": 4.456171210788625, "grad_norm": 0.37296077424605156, "learning_rate": 0.00018411201270134519, "loss": 2.820012092590332, "step": 7602, "token_acc": 0.3242395146625757 }, { "epoch": 4.456757549105834, "grad_norm": 0.24039967189706796, "learning_rate": 0.00018410677035779534, "loss": 2.8533453941345215, "step": 7603, "token_acc": 0.31911072865836376 }, { "epoch": 4.457343887423043, "grad_norm": 0.30928955260272556, "learning_rate": 0.00018410152722417569, "loss": 2.789132833480835, "step": 7604, "token_acc": 0.3275837783155981 }, { "epoch": 4.457930225740252, "grad_norm": 0.27759620470150814, "learning_rate": 0.00018409628330053553, "loss": 2.8005530834198, "step": 7605, "token_acc": 0.3262782444959444 }, { "epoch": 4.4585165640574616, "grad_norm": 0.29330952398537463, "learning_rate": 0.00018409103858692413, "loss": 2.8571019172668457, "step": 7606, "token_acc": 0.3177122280789063 }, { "epoch": 4.45910290237467, "grad_norm": 0.2790118431679959, "learning_rate": 0.00018408579308339072, "loss": 2.820038318634033, "step": 7607, "token_acc": 0.32336490729625883 }, { "epoch": 4.459689240691879, "grad_norm": 0.2958092304985593, "learning_rate": 0.00018408054678998461, "loss": 2.868168830871582, "step": 7608, "token_acc": 0.315758896151053 }, { "epoch": 4.460275579009088, "grad_norm": 0.28362988914876425, "learning_rate": 0.00018407529970675503, "loss": 2.820218086242676, "step": 7609, "token_acc": 0.32304653551394485 }, { "epoch": 4.460861917326297, "grad_norm": 0.2924205538995259, "learning_rate": 0.0001840700518337513, "loss": 2.848245620727539, "step": 7610, "token_acc": 0.31917014907381364 }, { "epoch": 4.461448255643506, "grad_norm": 0.2685462146681838, "learning_rate": 0.00018406480317102278, "loss": 2.8176627159118652, "step": 7611, "token_acc": 0.323820666914854 }, { "epoch": 4.462034593960715, "grad_norm": 0.2725157207960877, "learning_rate": 0.00018405955371861865, "loss": 2.815708637237549, "step": 7612, "token_acc": 0.32371825659640946 }, { "epoch": 4.4626209322779244, "grad_norm": 0.2472827958635882, "learning_rate": 0.0001840543034765883, "loss": 2.827860116958618, "step": 7613, "token_acc": 0.32094730928397913 }, { "epoch": 4.463207270595134, "grad_norm": 0.27251339159499216, "learning_rate": 0.00018404905244498104, "loss": 2.7749600410461426, "step": 7614, "token_acc": 0.32916604551459666 }, { "epoch": 4.463793608912343, "grad_norm": 0.23876865879343587, "learning_rate": 0.0001840438006238462, "loss": 2.8428664207458496, "step": 7615, "token_acc": 0.32059586329586565 }, { "epoch": 4.464379947229552, "grad_norm": 0.318390904270576, "learning_rate": 0.00018403854801323307, "loss": 2.832314968109131, "step": 7616, "token_acc": 0.3212141358319656 }, { "epoch": 4.464966285546761, "grad_norm": 0.2634444733489264, "learning_rate": 0.00018403329461319105, "loss": 2.839538097381592, "step": 7617, "token_acc": 0.3198911390378612 }, { "epoch": 4.465552623863969, "grad_norm": 0.2934033645289138, "learning_rate": 0.00018402804042376946, "loss": 2.8125712871551514, "step": 7618, "token_acc": 0.3247531326482555 }, { "epoch": 4.466138962181178, "grad_norm": 0.2773547420284079, "learning_rate": 0.00018402278544501764, "loss": 2.8212928771972656, "step": 7619, "token_acc": 0.3231636522792019 }, { "epoch": 4.466725300498387, "grad_norm": 0.283115780973382, "learning_rate": 0.000184017529676985, "loss": 2.8487982749938965, "step": 7620, "token_acc": 0.31756186227341715 }, { "epoch": 4.4673116388155965, "grad_norm": 0.2525810592529912, "learning_rate": 0.0001840122731197209, "loss": 2.850487232208252, "step": 7621, "token_acc": 0.31851261427098937 }, { "epoch": 4.467897977132806, "grad_norm": 0.26651622606811115, "learning_rate": 0.00018400701577327467, "loss": 2.819931745529175, "step": 7622, "token_acc": 0.32462687575131355 }, { "epoch": 4.468484315450015, "grad_norm": 0.2513106816248567, "learning_rate": 0.00018400175763769573, "loss": 2.8364052772521973, "step": 7623, "token_acc": 0.31906281701729683 }, { "epoch": 4.469070653767224, "grad_norm": 0.3403004040400084, "learning_rate": 0.00018399649871303348, "loss": 2.807452440261841, "step": 7624, "token_acc": 0.3250064336347827 }, { "epoch": 4.469656992084433, "grad_norm": 0.28545288123916884, "learning_rate": 0.0001839912389993373, "loss": 2.80859375, "step": 7625, "token_acc": 0.3249975894446341 }, { "epoch": 4.470243330401642, "grad_norm": 0.28630422773219927, "learning_rate": 0.00018398597849665662, "loss": 2.865186929702759, "step": 7626, "token_acc": 0.31636102458408477 }, { "epoch": 4.470829668718851, "grad_norm": 0.2540941942448384, "learning_rate": 0.00018398071720504084, "loss": 2.817577362060547, "step": 7627, "token_acc": 0.3226896408574859 }, { "epoch": 4.47141600703606, "grad_norm": 0.29702887857342286, "learning_rate": 0.0001839754551245394, "loss": 2.8362388610839844, "step": 7628, "token_acc": 0.32158888945310515 }, { "epoch": 4.4720023453532685, "grad_norm": 0.25611099666619275, "learning_rate": 0.00018397019225520168, "loss": 2.8643293380737305, "step": 7629, "token_acc": 0.3151626564543723 }, { "epoch": 4.472588683670478, "grad_norm": 0.29086159210294393, "learning_rate": 0.00018396492859707722, "loss": 2.837221622467041, "step": 7630, "token_acc": 0.318861793227144 }, { "epoch": 4.473175021987687, "grad_norm": 0.2802747278749033, "learning_rate": 0.00018395966415021535, "loss": 2.8511292934417725, "step": 7631, "token_acc": 0.3168966633934354 }, { "epoch": 4.473761360304896, "grad_norm": 0.26831293554504737, "learning_rate": 0.00018395439891466558, "loss": 2.7913360595703125, "step": 7632, "token_acc": 0.32810912369968637 }, { "epoch": 4.474347698622105, "grad_norm": 0.31200261526343953, "learning_rate": 0.00018394913289047736, "loss": 2.8551840782165527, "step": 7633, "token_acc": 0.3164997506648936 }, { "epoch": 4.474934036939314, "grad_norm": 0.24670151769544466, "learning_rate": 0.00018394386607770017, "loss": 2.8199048042297363, "step": 7634, "token_acc": 0.3236031165443487 }, { "epoch": 4.475520375256523, "grad_norm": 0.2919775453483244, "learning_rate": 0.00018393859847638347, "loss": 2.8268532752990723, "step": 7635, "token_acc": 0.3230495517193157 }, { "epoch": 4.476106713573732, "grad_norm": 0.24387830948670028, "learning_rate": 0.00018393333008657673, "loss": 2.801215648651123, "step": 7636, "token_acc": 0.3256830738956026 }, { "epoch": 4.476693051890941, "grad_norm": 0.273144353056082, "learning_rate": 0.0001839280609083295, "loss": 2.812460422515869, "step": 7637, "token_acc": 0.32563834491714105 }, { "epoch": 4.4772793902081505, "grad_norm": 0.24173550853568615, "learning_rate": 0.00018392279094169118, "loss": 2.7972640991210938, "step": 7638, "token_acc": 0.3267057925873278 }, { "epoch": 4.477865728525359, "grad_norm": 0.27795020684064486, "learning_rate": 0.00018391752018671133, "loss": 2.8707380294799805, "step": 7639, "token_acc": 0.31554272966144786 }, { "epoch": 4.478452066842568, "grad_norm": 0.2603277479645157, "learning_rate": 0.0001839122486434395, "loss": 2.87839937210083, "step": 7640, "token_acc": 0.31514530680623803 }, { "epoch": 4.479038405159777, "grad_norm": 0.29210511045907017, "learning_rate": 0.00018390697631192511, "loss": 2.851154088973999, "step": 7641, "token_acc": 0.31763267635425035 }, { "epoch": 4.479624743476986, "grad_norm": 0.27213655322917163, "learning_rate": 0.0001839017031922178, "loss": 2.827998638153076, "step": 7642, "token_acc": 0.3203301497589253 }, { "epoch": 4.480211081794195, "grad_norm": 0.3049019968192061, "learning_rate": 0.00018389642928436702, "loss": 2.872258186340332, "step": 7643, "token_acc": 0.3156141225828676 }, { "epoch": 4.480797420111404, "grad_norm": 0.3101507805328225, "learning_rate": 0.00018389115458842238, "loss": 2.8185393810272217, "step": 7644, "token_acc": 0.3225825597362621 }, { "epoch": 4.481383758428613, "grad_norm": 0.2959430211857069, "learning_rate": 0.00018388587910443332, "loss": 2.8609461784362793, "step": 7645, "token_acc": 0.3172168108220871 }, { "epoch": 4.4819700967458225, "grad_norm": 0.268563893286785, "learning_rate": 0.0001838806028324495, "loss": 2.8676962852478027, "step": 7646, "token_acc": 0.3152370034904104 }, { "epoch": 4.482556435063032, "grad_norm": 0.34094091087210315, "learning_rate": 0.00018387532577252043, "loss": 2.8793482780456543, "step": 7647, "token_acc": 0.3144535516914936 }, { "epoch": 4.483142773380241, "grad_norm": 0.28140159307069845, "learning_rate": 0.00018387004792469572, "loss": 2.79286527633667, "step": 7648, "token_acc": 0.3260709474629547 }, { "epoch": 4.48372911169745, "grad_norm": 0.3250694021714327, "learning_rate": 0.00018386476928902492, "loss": 2.8382630348205566, "step": 7649, "token_acc": 0.3209516376456657 }, { "epoch": 4.484315450014659, "grad_norm": 0.27516948356377313, "learning_rate": 0.00018385948986555763, "loss": 2.820890426635742, "step": 7650, "token_acc": 0.3231284495801548 }, { "epoch": 4.484901788331867, "grad_norm": 0.31502497244087235, "learning_rate": 0.00018385420965434342, "loss": 2.835212230682373, "step": 7651, "token_acc": 0.3200458731747557 }, { "epoch": 4.485488126649076, "grad_norm": 0.24235914572527933, "learning_rate": 0.0001838489286554319, "loss": 2.842158079147339, "step": 7652, "token_acc": 0.3187563894096249 }, { "epoch": 4.486074464966285, "grad_norm": 0.2942956750675823, "learning_rate": 0.00018384364686887267, "loss": 2.856235980987549, "step": 7653, "token_acc": 0.31860349335189225 }, { "epoch": 4.4866608032834945, "grad_norm": 0.2289267153608233, "learning_rate": 0.0001838383642947154, "loss": 2.812288761138916, "step": 7654, "token_acc": 0.3251164079091425 }, { "epoch": 4.487247141600704, "grad_norm": 0.29317433239100943, "learning_rate": 0.00018383308093300964, "loss": 2.8690412044525146, "step": 7655, "token_acc": 0.3162895083292732 }, { "epoch": 4.487833479917913, "grad_norm": 0.2465923727972142, "learning_rate": 0.00018382779678380507, "loss": 2.8019704818725586, "step": 7656, "token_acc": 0.32623603228688036 }, { "epoch": 4.488419818235122, "grad_norm": 0.24356240021414, "learning_rate": 0.00018382251184715132, "loss": 2.811600923538208, "step": 7657, "token_acc": 0.3241353404450265 }, { "epoch": 4.489006156552331, "grad_norm": 0.24048811537882683, "learning_rate": 0.00018381722612309797, "loss": 2.8528389930725098, "step": 7658, "token_acc": 0.317128401275659 }, { "epoch": 4.48959249486954, "grad_norm": 0.24933375790915877, "learning_rate": 0.00018381193961169477, "loss": 2.8360793590545654, "step": 7659, "token_acc": 0.31974614499722104 }, { "epoch": 4.490178833186749, "grad_norm": 0.2555165924536825, "learning_rate": 0.0001838066523129913, "loss": 2.8652899265289307, "step": 7660, "token_acc": 0.3151818532054686 }, { "epoch": 4.490765171503957, "grad_norm": 0.252482794351262, "learning_rate": 0.00018380136422703732, "loss": 2.8721370697021484, "step": 7661, "token_acc": 0.3140247078185871 }, { "epoch": 4.4913515098211665, "grad_norm": 0.25200924397046026, "learning_rate": 0.0001837960753538824, "loss": 2.864933967590332, "step": 7662, "token_acc": 0.31629532778689806 }, { "epoch": 4.491937848138376, "grad_norm": 0.2658190953437351, "learning_rate": 0.00018379078569357628, "loss": 2.840512275695801, "step": 7663, "token_acc": 0.32234886157544557 }, { "epoch": 4.492524186455585, "grad_norm": 0.24935345999547168, "learning_rate": 0.00018378549524616865, "loss": 2.838761329650879, "step": 7664, "token_acc": 0.3208653955749468 }, { "epoch": 4.493110524772794, "grad_norm": 0.275530838764388, "learning_rate": 0.0001837802040117092, "loss": 2.793308734893799, "step": 7665, "token_acc": 0.32805589684810604 }, { "epoch": 4.493696863090003, "grad_norm": 0.25050665467110955, "learning_rate": 0.00018377491199024758, "loss": 2.8488712310791016, "step": 7666, "token_acc": 0.31815756111656335 }, { "epoch": 4.494283201407212, "grad_norm": 0.26769348010086874, "learning_rate": 0.0001837696191818336, "loss": 2.830003261566162, "step": 7667, "token_acc": 0.32257463616590887 }, { "epoch": 4.494869539724421, "grad_norm": 0.2926102681536656, "learning_rate": 0.00018376432558651692, "loss": 2.804551124572754, "step": 7668, "token_acc": 0.325306432935038 }, { "epoch": 4.49545587804163, "grad_norm": 0.25951568773353506, "learning_rate": 0.00018375903120434727, "loss": 2.7872252464294434, "step": 7669, "token_acc": 0.3274770784190246 }, { "epoch": 4.496042216358839, "grad_norm": 0.3362641735425992, "learning_rate": 0.00018375373603537435, "loss": 2.848722457885742, "step": 7670, "token_acc": 0.3180244797977276 }, { "epoch": 4.4966285546760485, "grad_norm": 0.2321194769328074, "learning_rate": 0.000183748440079648, "loss": 2.8219823837280273, "step": 7671, "token_acc": 0.3237619453218662 }, { "epoch": 4.497214892993257, "grad_norm": 0.33269834293338074, "learning_rate": 0.00018374314333721787, "loss": 2.801387310028076, "step": 7672, "token_acc": 0.3267991074195748 }, { "epoch": 4.497801231310466, "grad_norm": 0.26864861427734366, "learning_rate": 0.00018373784580813377, "loss": 2.8773651123046875, "step": 7673, "token_acc": 0.31324278218280566 }, { "epoch": 4.498387569627675, "grad_norm": 0.30152021613191005, "learning_rate": 0.00018373254749244543, "loss": 2.8401050567626953, "step": 7674, "token_acc": 0.3213943361985876 }, { "epoch": 4.498973907944884, "grad_norm": 0.31484038328458597, "learning_rate": 0.00018372724839020265, "loss": 2.840540885925293, "step": 7675, "token_acc": 0.3195842058891965 }, { "epoch": 4.499560246262093, "grad_norm": 0.2788085496247335, "learning_rate": 0.00018372194850145522, "loss": 2.8317971229553223, "step": 7676, "token_acc": 0.31956652147831305 }, { "epoch": 4.500146584579302, "grad_norm": 0.331859588550261, "learning_rate": 0.00018371664782625287, "loss": 2.7961292266845703, "step": 7677, "token_acc": 0.3262073908221557 }, { "epoch": 4.500732922896511, "grad_norm": 0.24874643438082666, "learning_rate": 0.0001837113463646454, "loss": 2.851634979248047, "step": 7678, "token_acc": 0.3199990560422474 }, { "epoch": 4.5013192612137205, "grad_norm": 0.27834895067658855, "learning_rate": 0.0001837060441166827, "loss": 2.8316891193389893, "step": 7679, "token_acc": 0.32185122790863147 }, { "epoch": 4.50190559953093, "grad_norm": 0.2689830235687572, "learning_rate": 0.00018370074108241445, "loss": 2.8524489402770996, "step": 7680, "token_acc": 0.3187062707684563 }, { "epoch": 4.502491937848139, "grad_norm": 0.27836591029607943, "learning_rate": 0.00018369543726189056, "loss": 2.8248372077941895, "step": 7681, "token_acc": 0.32205663189269745 }, { "epoch": 4.503078276165347, "grad_norm": 0.25176688596720914, "learning_rate": 0.0001836901326551608, "loss": 2.853407382965088, "step": 7682, "token_acc": 0.3180991711685238 }, { "epoch": 4.503664614482556, "grad_norm": 0.2734199076634955, "learning_rate": 0.00018368482726227505, "loss": 2.7773144245147705, "step": 7683, "token_acc": 0.3311217482899772 }, { "epoch": 4.504250952799765, "grad_norm": 0.25878350547295664, "learning_rate": 0.0001836795210832831, "loss": 2.787787675857544, "step": 7684, "token_acc": 0.32750612455073624 }, { "epoch": 4.504837291116974, "grad_norm": 0.26514168643647407, "learning_rate": 0.00018367421411823477, "loss": 2.8163645267486572, "step": 7685, "token_acc": 0.3240872691864917 }, { "epoch": 4.505423629434183, "grad_norm": 0.25164542393815403, "learning_rate": 0.00018366890636717996, "loss": 2.8686861991882324, "step": 7686, "token_acc": 0.3161483062149907 }, { "epoch": 4.5060099677513925, "grad_norm": 0.2526830508818113, "learning_rate": 0.00018366359783016857, "loss": 2.803330898284912, "step": 7687, "token_acc": 0.3258927285911367 }, { "epoch": 4.506596306068602, "grad_norm": 0.2351431618171929, "learning_rate": 0.00018365828850725038, "loss": 2.8135061264038086, "step": 7688, "token_acc": 0.32241193457807116 }, { "epoch": 4.507182644385811, "grad_norm": 0.24847770104026204, "learning_rate": 0.0001836529783984753, "loss": 2.8131935596466064, "step": 7689, "token_acc": 0.3239288675942136 }, { "epoch": 4.50776898270302, "grad_norm": 0.2425791739890838, "learning_rate": 0.00018364766750389322, "loss": 2.8346095085144043, "step": 7690, "token_acc": 0.32081788605583716 }, { "epoch": 4.508355321020229, "grad_norm": 0.2381367978570313, "learning_rate": 0.00018364235582355403, "loss": 2.7978782653808594, "step": 7691, "token_acc": 0.32510165705009725 }, { "epoch": 4.508941659337438, "grad_norm": 0.26345114827975474, "learning_rate": 0.0001836370433575076, "loss": 2.827767848968506, "step": 7692, "token_acc": 0.3228906957574771 }, { "epoch": 4.509527997654647, "grad_norm": 0.25325000226447536, "learning_rate": 0.00018363173010580385, "loss": 2.8251793384552, "step": 7693, "token_acc": 0.32250608655500984 }, { "epoch": 4.510114335971855, "grad_norm": 0.2363759963825418, "learning_rate": 0.00018362641606849272, "loss": 2.8705995082855225, "step": 7694, "token_acc": 0.3160190345715146 }, { "epoch": 4.5107006742890645, "grad_norm": 0.2510667041304358, "learning_rate": 0.00018362110124562405, "loss": 2.860729217529297, "step": 7695, "token_acc": 0.31565470167141313 }, { "epoch": 4.511287012606274, "grad_norm": 0.2469260663570348, "learning_rate": 0.00018361578563724784, "loss": 2.821065902709961, "step": 7696, "token_acc": 0.32349133977936745 }, { "epoch": 4.511873350923483, "grad_norm": 0.26322320319626746, "learning_rate": 0.000183610469243414, "loss": 2.7909674644470215, "step": 7697, "token_acc": 0.3268820714236826 }, { "epoch": 4.512459689240692, "grad_norm": 0.25041222761402426, "learning_rate": 0.00018360515206417247, "loss": 2.845888614654541, "step": 7698, "token_acc": 0.3183089665704809 }, { "epoch": 4.513046027557901, "grad_norm": 0.2189679603488117, "learning_rate": 0.00018359983409957318, "loss": 2.8282458782196045, "step": 7699, "token_acc": 0.32251330786407456 }, { "epoch": 4.51363236587511, "grad_norm": 0.2427598487040979, "learning_rate": 0.00018359451534966613, "loss": 2.8354454040527344, "step": 7700, "token_acc": 0.3202570102519683 }, { "epoch": 4.514218704192319, "grad_norm": 0.22610551529621634, "learning_rate": 0.00018358919581450123, "loss": 2.87906551361084, "step": 7701, "token_acc": 0.31367836167821395 }, { "epoch": 4.514805042509528, "grad_norm": 0.24671542054712703, "learning_rate": 0.00018358387549412844, "loss": 2.8574416637420654, "step": 7702, "token_acc": 0.3174234810415715 }, { "epoch": 4.515391380826737, "grad_norm": 0.2739724118830067, "learning_rate": 0.00018357855438859782, "loss": 2.8229784965515137, "step": 7703, "token_acc": 0.32224383175075316 }, { "epoch": 4.515977719143946, "grad_norm": 0.3096248697251086, "learning_rate": 0.00018357323249795933, "loss": 2.8115198612213135, "step": 7704, "token_acc": 0.32467439302396023 }, { "epoch": 4.516564057461155, "grad_norm": 0.3448268732013189, "learning_rate": 0.0001835679098222629, "loss": 2.8472132682800293, "step": 7705, "token_acc": 0.3194717052843045 }, { "epoch": 4.517150395778364, "grad_norm": 0.2784068835044052, "learning_rate": 0.00018356258636155855, "loss": 2.848871946334839, "step": 7706, "token_acc": 0.3195979794932086 }, { "epoch": 4.517736734095573, "grad_norm": 0.25972195249527863, "learning_rate": 0.0001835572621158963, "loss": 2.8457953929901123, "step": 7707, "token_acc": 0.3176148566353388 }, { "epoch": 4.518323072412782, "grad_norm": 0.36055509196714314, "learning_rate": 0.0001835519370853262, "loss": 2.824819564819336, "step": 7708, "token_acc": 0.3227502756495434 }, { "epoch": 4.518909410729991, "grad_norm": 0.25794553624486094, "learning_rate": 0.00018354661126989823, "loss": 2.8323235511779785, "step": 7709, "token_acc": 0.321746131320494 }, { "epoch": 4.5194957490472, "grad_norm": 0.31642442782176083, "learning_rate": 0.00018354128466966242, "loss": 2.84260892868042, "step": 7710, "token_acc": 0.31852300148992313 }, { "epoch": 4.520082087364409, "grad_norm": 0.2785551984835124, "learning_rate": 0.00018353595728466885, "loss": 2.8041605949401855, "step": 7711, "token_acc": 0.32649987978183564 }, { "epoch": 4.5206684256816185, "grad_norm": 0.3010482688901615, "learning_rate": 0.00018353062911496745, "loss": 2.843400478363037, "step": 7712, "token_acc": 0.3191584350381648 }, { "epoch": 4.521254763998828, "grad_norm": 0.3251697655403242, "learning_rate": 0.0001835253001606084, "loss": 2.8313210010528564, "step": 7713, "token_acc": 0.3208970412394739 }, { "epoch": 4.521841102316037, "grad_norm": 0.2538737971415092, "learning_rate": 0.00018351997042164171, "loss": 2.812047243118286, "step": 7714, "token_acc": 0.3255329524132594 }, { "epoch": 4.522427440633246, "grad_norm": 0.3606575559231632, "learning_rate": 0.00018351463989811742, "loss": 2.8564791679382324, "step": 7715, "token_acc": 0.3180157916006093 }, { "epoch": 4.523013778950454, "grad_norm": 0.24491645535846915, "learning_rate": 0.00018350930859008563, "loss": 2.848299026489258, "step": 7716, "token_acc": 0.3195786447335857 }, { "epoch": 4.523600117267663, "grad_norm": 0.3157460233640792, "learning_rate": 0.00018350397649759644, "loss": 2.8247528076171875, "step": 7717, "token_acc": 0.32257156738072545 }, { "epoch": 4.524186455584872, "grad_norm": 0.2294509113681746, "learning_rate": 0.0001834986436206999, "loss": 2.8300352096557617, "step": 7718, "token_acc": 0.3203935667950408 }, { "epoch": 4.524772793902081, "grad_norm": 0.2941809156530237, "learning_rate": 0.00018349330995944612, "loss": 2.823375940322876, "step": 7719, "token_acc": 0.3231691992797274 }, { "epoch": 4.5253591322192905, "grad_norm": 0.2599852444692932, "learning_rate": 0.0001834879755138852, "loss": 2.845916271209717, "step": 7720, "token_acc": 0.31844565877352765 }, { "epoch": 4.5259454705365, "grad_norm": 0.2623526449500155, "learning_rate": 0.00018348264028406725, "loss": 2.8408684730529785, "step": 7721, "token_acc": 0.32048369463776233 }, { "epoch": 4.526531808853709, "grad_norm": 0.2655947954033707, "learning_rate": 0.00018347730427004238, "loss": 2.847322702407837, "step": 7722, "token_acc": 0.3197789151540735 }, { "epoch": 4.527118147170918, "grad_norm": 0.2560615980751466, "learning_rate": 0.00018347196747186075, "loss": 2.8806638717651367, "step": 7723, "token_acc": 0.3148151623948583 }, { "epoch": 4.527704485488127, "grad_norm": 0.2760801221416481, "learning_rate": 0.0001834666298895724, "loss": 2.8188042640686035, "step": 7724, "token_acc": 0.32337401880445094 }, { "epoch": 4.528290823805335, "grad_norm": 0.2568655371583902, "learning_rate": 0.00018346129152322762, "loss": 2.818319320678711, "step": 7725, "token_acc": 0.3233568433953386 }, { "epoch": 4.528877162122544, "grad_norm": 0.25719504129035675, "learning_rate": 0.00018345595237287643, "loss": 2.8262627124786377, "step": 7726, "token_acc": 0.3199121214513116 }, { "epoch": 4.529463500439753, "grad_norm": 0.2508524629357222, "learning_rate": 0.00018345061243856903, "loss": 2.8120267391204834, "step": 7727, "token_acc": 0.32446483497136464 }, { "epoch": 4.5300498387569625, "grad_norm": 0.23786750855180663, "learning_rate": 0.0001834452717203556, "loss": 2.8563313484191895, "step": 7728, "token_acc": 0.318266097169212 }, { "epoch": 4.530636177074172, "grad_norm": 0.24631307969280639, "learning_rate": 0.00018343993021828622, "loss": 2.8671798706054688, "step": 7729, "token_acc": 0.31686509529494944 }, { "epoch": 4.531222515391381, "grad_norm": 0.23813098711120922, "learning_rate": 0.0001834345879324112, "loss": 2.817723274230957, "step": 7730, "token_acc": 0.32439479057027276 }, { "epoch": 4.53180885370859, "grad_norm": 0.268699102148224, "learning_rate": 0.00018342924486278061, "loss": 2.805649757385254, "step": 7731, "token_acc": 0.32546697038724376 }, { "epoch": 4.532395192025799, "grad_norm": 0.27066613649717747, "learning_rate": 0.00018342390100944473, "loss": 2.8722825050354004, "step": 7732, "token_acc": 0.3155248404873159 }, { "epoch": 4.532981530343008, "grad_norm": 0.2705891384284354, "learning_rate": 0.0001834185563724537, "loss": 2.828000068664551, "step": 7733, "token_acc": 0.322695175248412 }, { "epoch": 4.533567868660217, "grad_norm": 0.24087655041270514, "learning_rate": 0.00018341321095185773, "loss": 2.844444990158081, "step": 7734, "token_acc": 0.31993119086715144 }, { "epoch": 4.534154206977426, "grad_norm": 0.2503007659872924, "learning_rate": 0.00018340786474770705, "loss": 2.8256092071533203, "step": 7735, "token_acc": 0.32170008482762863 }, { "epoch": 4.534740545294635, "grad_norm": 0.24199362316045547, "learning_rate": 0.00018340251776005186, "loss": 2.8503260612487793, "step": 7736, "token_acc": 0.31915609225834923 }, { "epoch": 4.535326883611844, "grad_norm": 0.2398653466851469, "learning_rate": 0.00018339716998894243, "loss": 2.8577136993408203, "step": 7737, "token_acc": 0.3166500891096957 }, { "epoch": 4.535913221929053, "grad_norm": 0.24424923574770097, "learning_rate": 0.00018339182143442895, "loss": 2.8343915939331055, "step": 7738, "token_acc": 0.31977845030604696 }, { "epoch": 4.536499560246262, "grad_norm": 0.2428426928409791, "learning_rate": 0.00018338647209656167, "loss": 2.8432366847991943, "step": 7739, "token_acc": 0.32017942494199164 }, { "epoch": 4.537085898563471, "grad_norm": 0.2575030018794465, "learning_rate": 0.00018338112197539085, "loss": 2.796196937561035, "step": 7740, "token_acc": 0.3271116900032563 }, { "epoch": 4.53767223688068, "grad_norm": 0.2245588683961331, "learning_rate": 0.00018337577107096676, "loss": 2.8128504753112793, "step": 7741, "token_acc": 0.3253845651179507 }, { "epoch": 4.538258575197889, "grad_norm": 0.26632691472626097, "learning_rate": 0.00018337041938333966, "loss": 2.868323802947998, "step": 7742, "token_acc": 0.3162025963168528 }, { "epoch": 4.538844913515098, "grad_norm": 0.22042362667050988, "learning_rate": 0.0001833650669125598, "loss": 2.804248094558716, "step": 7743, "token_acc": 0.3243084074722617 }, { "epoch": 4.5394312518323074, "grad_norm": 0.27116714508763695, "learning_rate": 0.00018335971365867745, "loss": 2.870344638824463, "step": 7744, "token_acc": 0.3169775457448426 }, { "epoch": 4.540017590149517, "grad_norm": 0.2905417931458559, "learning_rate": 0.00018335435962174297, "loss": 2.859323024749756, "step": 7745, "token_acc": 0.3160662545370864 }, { "epoch": 4.540603928466726, "grad_norm": 0.32842308906063283, "learning_rate": 0.00018334900480180654, "loss": 2.852921485900879, "step": 7746, "token_acc": 0.31870050343756345 }, { "epoch": 4.541190266783934, "grad_norm": 0.3295010187610696, "learning_rate": 0.00018334364919891856, "loss": 2.863541841506958, "step": 7747, "token_acc": 0.316482854959829 }, { "epoch": 4.541776605101143, "grad_norm": 0.2990242865085673, "learning_rate": 0.00018333829281312933, "loss": 2.8801183700561523, "step": 7748, "token_acc": 0.314817130523191 }, { "epoch": 4.542362943418352, "grad_norm": 0.26277815322585135, "learning_rate": 0.0001833329356444891, "loss": 2.8537063598632812, "step": 7749, "token_acc": 0.31776040719869114 }, { "epoch": 4.542949281735561, "grad_norm": 0.2995046065951158, "learning_rate": 0.00018332757769304824, "loss": 2.8264999389648438, "step": 7750, "token_acc": 0.32131325236826636 }, { "epoch": 4.54353562005277, "grad_norm": 0.26877552571802543, "learning_rate": 0.00018332221895885707, "loss": 2.826925277709961, "step": 7751, "token_acc": 0.3233190381571242 }, { "epoch": 4.5441219583699795, "grad_norm": 0.23985361545519024, "learning_rate": 0.00018331685944196594, "loss": 2.84481143951416, "step": 7752, "token_acc": 0.3194187333169015 }, { "epoch": 4.544708296687189, "grad_norm": 0.26971939580553167, "learning_rate": 0.0001833114991424252, "loss": 2.833148956298828, "step": 7753, "token_acc": 0.3218081483366362 }, { "epoch": 4.545294635004398, "grad_norm": 0.24868414843816886, "learning_rate": 0.00018330613806028515, "loss": 2.8257768154144287, "step": 7754, "token_acc": 0.3216031205976738 }, { "epoch": 4.545880973321607, "grad_norm": 0.25962824104643906, "learning_rate": 0.00018330077619559622, "loss": 2.8251991271972656, "step": 7755, "token_acc": 0.3235306494968464 }, { "epoch": 4.546467311638816, "grad_norm": 0.27524757104128766, "learning_rate": 0.00018329541354840875, "loss": 2.828958034515381, "step": 7756, "token_acc": 0.32073924626884975 }, { "epoch": 4.547053649956025, "grad_norm": 0.24281601500884478, "learning_rate": 0.0001832900501187731, "loss": 2.8128223419189453, "step": 7757, "token_acc": 0.3229294589441851 }, { "epoch": 4.547639988273234, "grad_norm": 0.2454552967395895, "learning_rate": 0.00018328468590673964, "loss": 2.8386659622192383, "step": 7758, "token_acc": 0.32068397976032526 }, { "epoch": 4.548226326590442, "grad_norm": 0.2465237837874172, "learning_rate": 0.0001832793209123588, "loss": 2.809868812561035, "step": 7759, "token_acc": 0.3246127410102848 }, { "epoch": 4.5488126649076515, "grad_norm": 0.2657239968045349, "learning_rate": 0.000183273955135681, "loss": 2.806796073913574, "step": 7760, "token_acc": 0.3254946519450248 }, { "epoch": 4.549399003224861, "grad_norm": 0.24072194250558124, "learning_rate": 0.00018326858857675655, "loss": 2.8695144653320312, "step": 7761, "token_acc": 0.3147618632131327 }, { "epoch": 4.54998534154207, "grad_norm": 0.26340862936920906, "learning_rate": 0.00018326322123563595, "loss": 2.834646463394165, "step": 7762, "token_acc": 0.3201235014441701 }, { "epoch": 4.550571679859279, "grad_norm": 0.2657876876813026, "learning_rate": 0.00018325785311236955, "loss": 2.8300623893737793, "step": 7763, "token_acc": 0.3214318322189103 }, { "epoch": 4.551158018176488, "grad_norm": 0.2673887957125959, "learning_rate": 0.00018325248420700784, "loss": 2.8325138092041016, "step": 7764, "token_acc": 0.3220799824789601 }, { "epoch": 4.551744356493697, "grad_norm": 0.29585785907176415, "learning_rate": 0.00018324711451960123, "loss": 2.839362144470215, "step": 7765, "token_acc": 0.3210291555893481 }, { "epoch": 4.552330694810906, "grad_norm": 0.26568993539180763, "learning_rate": 0.00018324174405020017, "loss": 2.861663341522217, "step": 7766, "token_acc": 0.31811166543109887 }, { "epoch": 4.552917033128115, "grad_norm": 0.2411867977446255, "learning_rate": 0.00018323637279885505, "loss": 2.826723098754883, "step": 7767, "token_acc": 0.32242093109889325 }, { "epoch": 4.5535033714453235, "grad_norm": 0.25972251553506026, "learning_rate": 0.0001832310007656164, "loss": 2.822232246398926, "step": 7768, "token_acc": 0.32115589465343625 }, { "epoch": 4.554089709762533, "grad_norm": 0.29872736305051434, "learning_rate": 0.0001832256279505346, "loss": 2.8394222259521484, "step": 7769, "token_acc": 0.32147943801015216 }, { "epoch": 4.554676048079742, "grad_norm": 0.3222791630195521, "learning_rate": 0.00018322025435366026, "loss": 2.888413906097412, "step": 7770, "token_acc": 0.31269483690344424 }, { "epoch": 4.555262386396951, "grad_norm": 0.23816429935576924, "learning_rate": 0.00018321487997504372, "loss": 2.8502755165100098, "step": 7771, "token_acc": 0.3173263370978052 }, { "epoch": 4.55584872471416, "grad_norm": 0.2830132020626502, "learning_rate": 0.00018320950481473552, "loss": 2.855494499206543, "step": 7772, "token_acc": 0.3171771984594812 }, { "epoch": 4.556435063031369, "grad_norm": 0.3152414331914778, "learning_rate": 0.00018320412887278616, "loss": 2.852656841278076, "step": 7773, "token_acc": 0.31800564708868256 }, { "epoch": 4.557021401348578, "grad_norm": 0.28174293302466674, "learning_rate": 0.0001831987521492461, "loss": 2.819453001022339, "step": 7774, "token_acc": 0.3228402010780189 }, { "epoch": 4.557607739665787, "grad_norm": 0.23975331590030624, "learning_rate": 0.0001831933746441659, "loss": 2.7996208667755127, "step": 7775, "token_acc": 0.32663832589104913 }, { "epoch": 4.558194077982996, "grad_norm": 0.27299421877467245, "learning_rate": 0.00018318799635759603, "loss": 2.8425116539001465, "step": 7776, "token_acc": 0.3198681153750968 }, { "epoch": 4.5587804163002055, "grad_norm": 0.23405750301381278, "learning_rate": 0.00018318261728958706, "loss": 2.8368468284606934, "step": 7777, "token_acc": 0.3206944868782857 }, { "epoch": 4.559366754617415, "grad_norm": 0.23190484163000724, "learning_rate": 0.0001831772374401895, "loss": 2.8276829719543457, "step": 7778, "token_acc": 0.322621805138251 }, { "epoch": 4.559953092934624, "grad_norm": 0.2435189152224899, "learning_rate": 0.00018317185680945383, "loss": 2.8515148162841797, "step": 7779, "token_acc": 0.31801157734681357 }, { "epoch": 4.560539431251832, "grad_norm": 0.2218372995137078, "learning_rate": 0.00018316647539743066, "loss": 2.8129982948303223, "step": 7780, "token_acc": 0.32315392008066507 }, { "epoch": 4.561125769569041, "grad_norm": 0.24771568155657886, "learning_rate": 0.00018316109320417053, "loss": 2.863996744155884, "step": 7781, "token_acc": 0.31552516411378556 }, { "epoch": 4.56171210788625, "grad_norm": 0.22352530859501157, "learning_rate": 0.00018315571022972397, "loss": 2.8565673828125, "step": 7782, "token_acc": 0.3180355092399173 }, { "epoch": 4.562298446203459, "grad_norm": 0.265209352841456, "learning_rate": 0.00018315032647414162, "loss": 2.8475418090820312, "step": 7783, "token_acc": 0.31968437810740213 }, { "epoch": 4.562884784520668, "grad_norm": 0.2858027314490506, "learning_rate": 0.00018314494193747395, "loss": 2.8420217037200928, "step": 7784, "token_acc": 0.32071090335114133 }, { "epoch": 4.5634711228378775, "grad_norm": 0.22678790269619628, "learning_rate": 0.0001831395566197716, "loss": 2.8355610370635986, "step": 7785, "token_acc": 0.32029979588030233 }, { "epoch": 4.564057461155087, "grad_norm": 0.28791520626009, "learning_rate": 0.00018313417052108513, "loss": 2.849102020263672, "step": 7786, "token_acc": 0.31815035555129245 }, { "epoch": 4.564643799472296, "grad_norm": 0.274288373159216, "learning_rate": 0.0001831287836414652, "loss": 2.8657054901123047, "step": 7787, "token_acc": 0.3156505886974125 }, { "epoch": 4.565230137789505, "grad_norm": 0.23586249416258506, "learning_rate": 0.0001831233959809623, "loss": 2.8282546997070312, "step": 7788, "token_acc": 0.322474327967383 }, { "epoch": 4.565816476106714, "grad_norm": 0.32314642666883303, "learning_rate": 0.00018311800753962717, "loss": 2.8721020221710205, "step": 7789, "token_acc": 0.31546210369235006 }, { "epoch": 4.566402814423922, "grad_norm": 0.29037838166563834, "learning_rate": 0.00018311261831751032, "loss": 2.862663745880127, "step": 7790, "token_acc": 0.31788014633894984 }, { "epoch": 4.566989152741131, "grad_norm": 0.23265434731409984, "learning_rate": 0.00018310722831466243, "loss": 2.8437297344207764, "step": 7791, "token_acc": 0.31765811417413337 }, { "epoch": 4.56757549105834, "grad_norm": 0.2800196567931235, "learning_rate": 0.00018310183753113415, "loss": 2.8966193199157715, "step": 7792, "token_acc": 0.3117929985702088 }, { "epoch": 4.5681618293755495, "grad_norm": 0.23139904469358016, "learning_rate": 0.00018309644596697605, "loss": 2.8361258506774902, "step": 7793, "token_acc": 0.3207524775977687 }, { "epoch": 4.568748167692759, "grad_norm": 0.22594515254513886, "learning_rate": 0.0001830910536222388, "loss": 2.8370652198791504, "step": 7794, "token_acc": 0.31921325786846555 }, { "epoch": 4.569334506009968, "grad_norm": 0.24472308179898072, "learning_rate": 0.0001830856604969731, "loss": 2.8430404663085938, "step": 7795, "token_acc": 0.320558266045107 }, { "epoch": 4.569920844327177, "grad_norm": 0.27807011160822387, "learning_rate": 0.00018308026659122958, "loss": 2.8427541255950928, "step": 7796, "token_acc": 0.31951301449937514 }, { "epoch": 4.570507182644386, "grad_norm": 0.256299708169299, "learning_rate": 0.00018307487190505887, "loss": 2.8402578830718994, "step": 7797, "token_acc": 0.3198915327774388 }, { "epoch": 4.571093520961595, "grad_norm": 0.23425294686614173, "learning_rate": 0.00018306947643851172, "loss": 2.8055636882781982, "step": 7798, "token_acc": 0.32476934771742755 }, { "epoch": 4.571679859278804, "grad_norm": 0.27587340913572717, "learning_rate": 0.00018306408019163876, "loss": 2.829970598220825, "step": 7799, "token_acc": 0.3211069555588057 }, { "epoch": 4.572266197596013, "grad_norm": 0.25970224807584597, "learning_rate": 0.0001830586831644907, "loss": 2.854948043823242, "step": 7800, "token_acc": 0.3184034189286032 }, { "epoch": 4.572852535913222, "grad_norm": 0.23958864770343005, "learning_rate": 0.00018305328535711822, "loss": 2.846958637237549, "step": 7801, "token_acc": 0.3191086677937993 }, { "epoch": 4.573438874230431, "grad_norm": 0.2857315569416896, "learning_rate": 0.00018304788676957206, "loss": 2.862722396850586, "step": 7802, "token_acc": 0.3171784826656557 }, { "epoch": 4.57402521254764, "grad_norm": 0.23342086728346284, "learning_rate": 0.0001830424874019029, "loss": 2.8656482696533203, "step": 7803, "token_acc": 0.3162522688064393 }, { "epoch": 4.574611550864849, "grad_norm": 0.25177894095160985, "learning_rate": 0.00018303708725416149, "loss": 2.8239309787750244, "step": 7804, "token_acc": 0.32340771561155673 }, { "epoch": 4.575197889182058, "grad_norm": 0.27419890713268885, "learning_rate": 0.00018303168632639852, "loss": 2.8536462783813477, "step": 7805, "token_acc": 0.3174355331058873 }, { "epoch": 4.575784227499267, "grad_norm": 0.2721272329225644, "learning_rate": 0.00018302628461866477, "loss": 2.856517791748047, "step": 7806, "token_acc": 0.31823291010986926 }, { "epoch": 4.576370565816476, "grad_norm": 0.27017719850495087, "learning_rate": 0.00018302088213101092, "loss": 2.874941349029541, "step": 7807, "token_acc": 0.3150082049068856 }, { "epoch": 4.576956904133685, "grad_norm": 0.27348075464918403, "learning_rate": 0.00018301547886348778, "loss": 2.8055880069732666, "step": 7808, "token_acc": 0.32659511928361473 }, { "epoch": 4.577543242450894, "grad_norm": 0.3572565493349303, "learning_rate": 0.00018301007481614606, "loss": 2.8623769283294678, "step": 7809, "token_acc": 0.31739804473692534 }, { "epoch": 4.5781295807681035, "grad_norm": 0.30548058456911925, "learning_rate": 0.00018300466998903657, "loss": 2.8241465091705322, "step": 7810, "token_acc": 0.3217803614547543 }, { "epoch": 4.578715919085313, "grad_norm": 0.24111871402030108, "learning_rate": 0.00018299926438221004, "loss": 2.8353607654571533, "step": 7811, "token_acc": 0.32163103144036054 }, { "epoch": 4.579302257402521, "grad_norm": 0.37468889057899146, "learning_rate": 0.00018299385799571728, "loss": 2.821384906768799, "step": 7812, "token_acc": 0.3228421136732069 }, { "epoch": 4.57988859571973, "grad_norm": 0.27531522437819533, "learning_rate": 0.00018298845082960905, "loss": 2.8252944946289062, "step": 7813, "token_acc": 0.32101906020003773 }, { "epoch": 4.580474934036939, "grad_norm": 0.2463421751008095, "learning_rate": 0.00018298304288393615, "loss": 2.812098503112793, "step": 7814, "token_acc": 0.3242411284603719 }, { "epoch": 4.581061272354148, "grad_norm": 0.24738873897900018, "learning_rate": 0.00018297763415874938, "loss": 2.8685102462768555, "step": 7815, "token_acc": 0.3167015599706038 }, { "epoch": 4.581647610671357, "grad_norm": 0.2533642443646533, "learning_rate": 0.00018297222465409955, "loss": 2.8492283821105957, "step": 7816, "token_acc": 0.31875176892457174 }, { "epoch": 4.582233948988566, "grad_norm": 0.2630910394796865, "learning_rate": 0.00018296681437003745, "loss": 2.8641209602355957, "step": 7817, "token_acc": 0.31842938665605924 }, { "epoch": 4.5828202873057755, "grad_norm": 0.23626625670532087, "learning_rate": 0.000182961403306614, "loss": 2.8185791969299316, "step": 7818, "token_acc": 0.32204823704518354 }, { "epoch": 4.583406625622985, "grad_norm": 0.2577162684483696, "learning_rate": 0.0001829559914638799, "loss": 2.8485255241394043, "step": 7819, "token_acc": 0.31982671350371233 }, { "epoch": 4.583992963940194, "grad_norm": 0.24053110902988406, "learning_rate": 0.00018295057884188607, "loss": 2.8399159908294678, "step": 7820, "token_acc": 0.319246256094878 }, { "epoch": 4.584579302257403, "grad_norm": 0.2513793498290437, "learning_rate": 0.00018294516544068332, "loss": 2.83280086517334, "step": 7821, "token_acc": 0.32151699474263357 }, { "epoch": 4.585165640574612, "grad_norm": 0.23046525781071497, "learning_rate": 0.0001829397512603225, "loss": 2.8607687950134277, "step": 7822, "token_acc": 0.3169031854334047 }, { "epoch": 4.585751978891821, "grad_norm": 0.24966284611350234, "learning_rate": 0.0001829343363008545, "loss": 2.8428847789764404, "step": 7823, "token_acc": 0.32100159051071375 }, { "epoch": 4.586338317209029, "grad_norm": 0.24049942168834643, "learning_rate": 0.00018292892056233015, "loss": 2.8300681114196777, "step": 7824, "token_acc": 0.3207356734327689 }, { "epoch": 4.586924655526238, "grad_norm": 0.22481430650226586, "learning_rate": 0.00018292350404480035, "loss": 2.870157241821289, "step": 7825, "token_acc": 0.3145985618753535 }, { "epoch": 4.5875109938434475, "grad_norm": 0.23572608428657887, "learning_rate": 0.00018291808674831595, "loss": 2.8344593048095703, "step": 7826, "token_acc": 0.3206641519030262 }, { "epoch": 4.588097332160657, "grad_norm": 0.2481824877489451, "learning_rate": 0.0001829126686729279, "loss": 2.811589241027832, "step": 7827, "token_acc": 0.32411180998898026 }, { "epoch": 4.588683670477866, "grad_norm": 0.2450593350411226, "learning_rate": 0.000182907249818687, "loss": 2.85050106048584, "step": 7828, "token_acc": 0.3180259753501829 }, { "epoch": 4.589270008795075, "grad_norm": 0.24852448877468736, "learning_rate": 0.00018290183018564426, "loss": 2.8246614933013916, "step": 7829, "token_acc": 0.32319598721562603 }, { "epoch": 4.589856347112284, "grad_norm": 0.27086950756073297, "learning_rate": 0.0001828964097738505, "loss": 2.8780689239501953, "step": 7830, "token_acc": 0.31384846838143543 }, { "epoch": 4.590442685429493, "grad_norm": 0.24772652969747724, "learning_rate": 0.0001828909885833567, "loss": 2.8088693618774414, "step": 7831, "token_acc": 0.3244980862235561 }, { "epoch": 4.591029023746702, "grad_norm": 0.2796965972877741, "learning_rate": 0.00018288556661421375, "loss": 2.862424612045288, "step": 7832, "token_acc": 0.3149672024203946 }, { "epoch": 4.59161536206391, "grad_norm": 0.3445269304582732, "learning_rate": 0.0001828801438664726, "loss": 2.7970192432403564, "step": 7833, "token_acc": 0.32732064860009946 }, { "epoch": 4.5922017003811195, "grad_norm": 0.27887179214695224, "learning_rate": 0.00018287472034018415, "loss": 2.8342642784118652, "step": 7834, "token_acc": 0.32108564289538766 }, { "epoch": 4.592788038698329, "grad_norm": 0.2585063846205059, "learning_rate": 0.0001828692960353994, "loss": 2.8776583671569824, "step": 7835, "token_acc": 0.3147913803386666 }, { "epoch": 4.593374377015538, "grad_norm": 0.2831366553211178, "learning_rate": 0.00018286387095216929, "loss": 2.847933292388916, "step": 7836, "token_acc": 0.3188762085993271 }, { "epoch": 4.593960715332747, "grad_norm": 0.29325624175012105, "learning_rate": 0.00018285844509054473, "loss": 2.8458805084228516, "step": 7837, "token_acc": 0.319692704970949 }, { "epoch": 4.594547053649956, "grad_norm": 0.22571006618178352, "learning_rate": 0.00018285301845057675, "loss": 2.8271756172180176, "step": 7838, "token_acc": 0.32144297845553 }, { "epoch": 4.595133391967165, "grad_norm": 0.31784175217154903, "learning_rate": 0.00018284759103231633, "loss": 2.863452911376953, "step": 7839, "token_acc": 0.3160327320356369 }, { "epoch": 4.595719730284374, "grad_norm": 0.3037661600289063, "learning_rate": 0.00018284216283581442, "loss": 2.8247365951538086, "step": 7840, "token_acc": 0.32238351441571966 }, { "epoch": 4.596306068601583, "grad_norm": 0.2586601393515336, "learning_rate": 0.000182836733861122, "loss": 2.8592166900634766, "step": 7841, "token_acc": 0.3171403675778043 }, { "epoch": 4.596892406918792, "grad_norm": 0.29988819639009845, "learning_rate": 0.00018283130410829012, "loss": 2.8516881465911865, "step": 7842, "token_acc": 0.31908946692518825 }, { "epoch": 4.5974787452360015, "grad_norm": 0.2693751474874164, "learning_rate": 0.00018282587357736974, "loss": 2.8492956161499023, "step": 7843, "token_acc": 0.31783809356662673 }, { "epoch": 4.598065083553211, "grad_norm": 0.27202559174314184, "learning_rate": 0.0001828204422684119, "loss": 2.8375842571258545, "step": 7844, "token_acc": 0.3208790445028977 }, { "epoch": 4.598651421870419, "grad_norm": 0.2899618506522721, "learning_rate": 0.0001828150101814676, "loss": 2.8572168350219727, "step": 7845, "token_acc": 0.31727218325615 }, { "epoch": 4.599237760187628, "grad_norm": 0.24226520770534732, "learning_rate": 0.00018280957731658788, "loss": 2.850071907043457, "step": 7846, "token_acc": 0.317172711221743 }, { "epoch": 4.599824098504837, "grad_norm": 0.23559293701167766, "learning_rate": 0.00018280414367382374, "loss": 2.8359532356262207, "step": 7847, "token_acc": 0.31939518533630457 }, { "epoch": 4.600410436822046, "grad_norm": 0.24035691124530492, "learning_rate": 0.00018279870925322632, "loss": 2.893117904663086, "step": 7848, "token_acc": 0.31267529638132274 }, { "epoch": 4.600996775139255, "grad_norm": 0.2554469630008216, "learning_rate": 0.00018279327405484652, "loss": 2.811690330505371, "step": 7849, "token_acc": 0.324535231264522 }, { "epoch": 4.601583113456464, "grad_norm": 0.2396565782233264, "learning_rate": 0.00018278783807873552, "loss": 2.848278045654297, "step": 7850, "token_acc": 0.3203838251180919 }, { "epoch": 4.6021694517736735, "grad_norm": 0.25213005700708674, "learning_rate": 0.00018278240132494432, "loss": 2.8447041511535645, "step": 7851, "token_acc": 0.32032794122931757 }, { "epoch": 4.602755790090883, "grad_norm": 0.2947351926593888, "learning_rate": 0.000182776963793524, "loss": 2.866109848022461, "step": 7852, "token_acc": 0.31711083668276285 }, { "epoch": 4.603342128408092, "grad_norm": 0.28526032288419606, "learning_rate": 0.0001827715254845257, "loss": 2.8366386890411377, "step": 7853, "token_acc": 0.32065958237809034 }, { "epoch": 4.603928466725301, "grad_norm": 0.23936794135796188, "learning_rate": 0.00018276608639800039, "loss": 2.8710098266601562, "step": 7854, "token_acc": 0.31488979653772714 }, { "epoch": 4.604514805042509, "grad_norm": 0.2805361461361784, "learning_rate": 0.00018276064653399926, "loss": 2.8772459030151367, "step": 7855, "token_acc": 0.3143370061531657 }, { "epoch": 4.605101143359718, "grad_norm": 0.24174380212392402, "learning_rate": 0.00018275520589257336, "loss": 2.815774440765381, "step": 7856, "token_acc": 0.32284009340136643 }, { "epoch": 4.605687481676927, "grad_norm": 0.2833659999438425, "learning_rate": 0.00018274976447377384, "loss": 2.8373234272003174, "step": 7857, "token_acc": 0.32079275323695255 }, { "epoch": 4.606273819994136, "grad_norm": 0.27678413035499294, "learning_rate": 0.0001827443222776518, "loss": 2.8721837997436523, "step": 7858, "token_acc": 0.3146242712021359 }, { "epoch": 4.6068601583113455, "grad_norm": 0.2572475958811099, "learning_rate": 0.00018273887930425828, "loss": 2.8448867797851562, "step": 7859, "token_acc": 0.3174822167673039 }, { "epoch": 4.607446496628555, "grad_norm": 0.2870587087704763, "learning_rate": 0.00018273343555364456, "loss": 2.832009792327881, "step": 7860, "token_acc": 0.32387689709390727 }, { "epoch": 4.608032834945764, "grad_norm": 0.29934819039576477, "learning_rate": 0.00018272799102586165, "loss": 2.870086669921875, "step": 7861, "token_acc": 0.314192259224115 }, { "epoch": 4.608619173262973, "grad_norm": 0.26667296532610885, "learning_rate": 0.00018272254572096076, "loss": 2.8548755645751953, "step": 7862, "token_acc": 0.31737830458986327 }, { "epoch": 4.609205511580182, "grad_norm": 0.29723707616637146, "learning_rate": 0.00018271709963899304, "loss": 2.8359103202819824, "step": 7863, "token_acc": 0.3206878099817106 }, { "epoch": 4.609791849897391, "grad_norm": 0.2648519552224178, "learning_rate": 0.00018271165278000958, "loss": 2.8475544452667236, "step": 7864, "token_acc": 0.3190191384580127 }, { "epoch": 4.6103781882146, "grad_norm": 0.2656417297804327, "learning_rate": 0.00018270620514406166, "loss": 2.835358142852783, "step": 7865, "token_acc": 0.32036495936593334 }, { "epoch": 4.610964526531809, "grad_norm": 0.2559937316247502, "learning_rate": 0.00018270075673120035, "loss": 2.827864646911621, "step": 7866, "token_acc": 0.3214763251615773 }, { "epoch": 4.6115508648490176, "grad_norm": 0.27065655416297224, "learning_rate": 0.00018269530754147688, "loss": 2.8784027099609375, "step": 7867, "token_acc": 0.3158678361761053 }, { "epoch": 4.612137203166227, "grad_norm": 0.2734542264336458, "learning_rate": 0.0001826898575749424, "loss": 2.8656935691833496, "step": 7868, "token_acc": 0.31775990265596316 }, { "epoch": 4.612723541483436, "grad_norm": 0.29642634184843547, "learning_rate": 0.0001826844068316482, "loss": 2.90169620513916, "step": 7869, "token_acc": 0.31115029944285966 }, { "epoch": 4.613309879800645, "grad_norm": 0.24135977214080212, "learning_rate": 0.00018267895531164538, "loss": 2.8739449977874756, "step": 7870, "token_acc": 0.31650011243064596 }, { "epoch": 4.613896218117854, "grad_norm": 0.2681046065163028, "learning_rate": 0.0001826735030149852, "loss": 2.823215961456299, "step": 7871, "token_acc": 0.3232908578933598 }, { "epoch": 4.614482556435063, "grad_norm": 0.27730839712865574, "learning_rate": 0.00018266804994171882, "loss": 2.873626947402954, "step": 7872, "token_acc": 0.3131181404665526 }, { "epoch": 4.615068894752272, "grad_norm": 0.2833721416001539, "learning_rate": 0.00018266259609189754, "loss": 2.837857961654663, "step": 7873, "token_acc": 0.32025166947943334 }, { "epoch": 4.615655233069481, "grad_norm": 0.2444579688224471, "learning_rate": 0.00018265714146557257, "loss": 2.822655200958252, "step": 7874, "token_acc": 0.3246272132342961 }, { "epoch": 4.6162415713866904, "grad_norm": 0.26811158077423547, "learning_rate": 0.00018265168606279515, "loss": 2.8396034240722656, "step": 7875, "token_acc": 0.3194193122813502 }, { "epoch": 4.616827909703899, "grad_norm": 0.26225489752115166, "learning_rate": 0.00018264622988361647, "loss": 2.8712821006774902, "step": 7876, "token_acc": 0.31535483305678313 }, { "epoch": 4.617414248021108, "grad_norm": 0.25238118945757915, "learning_rate": 0.00018264077292808785, "loss": 2.782496452331543, "step": 7877, "token_acc": 0.3298738094008054 }, { "epoch": 4.618000586338317, "grad_norm": 0.24067269461754287, "learning_rate": 0.0001826353151962605, "loss": 2.842684745788574, "step": 7878, "token_acc": 0.3180024376926282 }, { "epoch": 4.618586924655526, "grad_norm": 0.25935330099613063, "learning_rate": 0.00018262985668818574, "loss": 2.813405752182007, "step": 7879, "token_acc": 0.3238514854036227 }, { "epoch": 4.619173262972735, "grad_norm": 0.23512847483886792, "learning_rate": 0.00018262439740391483, "loss": 2.8301210403442383, "step": 7880, "token_acc": 0.32151287372940424 }, { "epoch": 4.619759601289944, "grad_norm": 0.2379078941600059, "learning_rate": 0.00018261893734349905, "loss": 2.883781909942627, "step": 7881, "token_acc": 0.31364861028533475 }, { "epoch": 4.620345939607153, "grad_norm": 0.2312883308309407, "learning_rate": 0.00018261347650698966, "loss": 2.8516929149627686, "step": 7882, "token_acc": 0.3181142273426691 }, { "epoch": 4.6209322779243625, "grad_norm": 0.2499195545615533, "learning_rate": 0.000182608014894438, "loss": 2.8743200302124023, "step": 7883, "token_acc": 0.31432021464577464 }, { "epoch": 4.621518616241572, "grad_norm": 0.24105962534228284, "learning_rate": 0.00018260255250589533, "loss": 2.8688597679138184, "step": 7884, "token_acc": 0.3155860806807666 }, { "epoch": 4.622104954558781, "grad_norm": 0.26792981412727995, "learning_rate": 0.000182597089341413, "loss": 2.8817801475524902, "step": 7885, "token_acc": 0.31389339074203976 }, { "epoch": 4.62269129287599, "grad_norm": 0.29762353346825826, "learning_rate": 0.00018259162540104233, "loss": 2.8442492485046387, "step": 7886, "token_acc": 0.319871581387294 }, { "epoch": 4.623277631193199, "grad_norm": 0.32931802844472696, "learning_rate": 0.00018258616068483465, "loss": 2.8766069412231445, "step": 7887, "token_acc": 0.3144373313167399 }, { "epoch": 4.623863969510407, "grad_norm": 0.34291367122111555, "learning_rate": 0.00018258069519284123, "loss": 2.854952335357666, "step": 7888, "token_acc": 0.31881827893545295 }, { "epoch": 4.624450307827616, "grad_norm": 0.2918959051083099, "learning_rate": 0.00018257522892511346, "loss": 2.863406181335449, "step": 7889, "token_acc": 0.316234646268233 }, { "epoch": 4.625036646144825, "grad_norm": 0.26709298393070696, "learning_rate": 0.00018256976188170274, "loss": 2.8505213260650635, "step": 7890, "token_acc": 0.319156483522216 }, { "epoch": 4.6256229844620345, "grad_norm": 0.33236602466018683, "learning_rate": 0.0001825642940626603, "loss": 2.855293035507202, "step": 7891, "token_acc": 0.316943463782598 }, { "epoch": 4.626209322779244, "grad_norm": 0.27008123478044915, "learning_rate": 0.00018255882546803763, "loss": 2.812650680541992, "step": 7892, "token_acc": 0.32468300050895044 }, { "epoch": 4.626795661096453, "grad_norm": 0.2575234294273729, "learning_rate": 0.00018255335609788605, "loss": 2.818479061126709, "step": 7893, "token_acc": 0.323048706876538 }, { "epoch": 4.627381999413662, "grad_norm": 0.2473397339241833, "learning_rate": 0.0001825478859522569, "loss": 2.837080240249634, "step": 7894, "token_acc": 0.32048089154856396 }, { "epoch": 4.627968337730871, "grad_norm": 0.24936538176168208, "learning_rate": 0.00018254241503120157, "loss": 2.863457202911377, "step": 7895, "token_acc": 0.31589722775939666 }, { "epoch": 4.62855467604808, "grad_norm": 0.2491430871006636, "learning_rate": 0.00018253694333477153, "loss": 2.825207233428955, "step": 7896, "token_acc": 0.3221353063354448 }, { "epoch": 4.629141014365289, "grad_norm": 0.2658614065131225, "learning_rate": 0.0001825314708630181, "loss": 2.8497893810272217, "step": 7897, "token_acc": 0.3195753584047652 }, { "epoch": 4.629727352682497, "grad_norm": 0.24281292267718096, "learning_rate": 0.00018252599761599272, "loss": 2.8542709350585938, "step": 7898, "token_acc": 0.31626812068046234 }, { "epoch": 4.6303136909997065, "grad_norm": 0.2993708275189512, "learning_rate": 0.00018252052359374682, "loss": 2.8671603202819824, "step": 7899, "token_acc": 0.31535155774416573 }, { "epoch": 4.630900029316916, "grad_norm": 0.2554954225216052, "learning_rate": 0.00018251504879633176, "loss": 2.8506011962890625, "step": 7900, "token_acc": 0.3195963343752355 }, { "epoch": 4.631486367634125, "grad_norm": 0.2431903895642876, "learning_rate": 0.00018250957322379902, "loss": 2.8726491928100586, "step": 7901, "token_acc": 0.31569443627044463 }, { "epoch": 4.632072705951334, "grad_norm": 0.2820239402368769, "learning_rate": 0.00018250409687620004, "loss": 2.832711935043335, "step": 7902, "token_acc": 0.3218508196894364 }, { "epoch": 4.632659044268543, "grad_norm": 0.22484150997921395, "learning_rate": 0.00018249861975358625, "loss": 2.8249940872192383, "step": 7903, "token_acc": 0.32391055990253925 }, { "epoch": 4.633245382585752, "grad_norm": 0.2612888906469905, "learning_rate": 0.00018249314185600905, "loss": 2.8473429679870605, "step": 7904, "token_acc": 0.31836237656405375 }, { "epoch": 4.633831720902961, "grad_norm": 0.2417603188943553, "learning_rate": 0.00018248766318351998, "loss": 2.84458065032959, "step": 7905, "token_acc": 0.3188160774758404 }, { "epoch": 4.63441805922017, "grad_norm": 0.25230317882723874, "learning_rate": 0.00018248218373617046, "loss": 2.8415584564208984, "step": 7906, "token_acc": 0.3186439195550731 }, { "epoch": 4.635004397537379, "grad_norm": 0.24961419769172538, "learning_rate": 0.00018247670351401199, "loss": 2.835369348526001, "step": 7907, "token_acc": 0.32092409207343287 }, { "epoch": 4.6355907358545885, "grad_norm": 0.2365763623754291, "learning_rate": 0.000182471222517096, "loss": 2.781708240509033, "step": 7908, "token_acc": 0.32672511514384045 }, { "epoch": 4.636177074171798, "grad_norm": 0.25215445113088497, "learning_rate": 0.000182465740745474, "loss": 2.875487804412842, "step": 7909, "token_acc": 0.31556962297870506 }, { "epoch": 4.636763412489006, "grad_norm": 0.25069168278640086, "learning_rate": 0.0001824602581991975, "loss": 2.835012197494507, "step": 7910, "token_acc": 0.3215695379614581 }, { "epoch": 4.637349750806215, "grad_norm": 0.2693362697575483, "learning_rate": 0.000182454774878318, "loss": 2.8289010524749756, "step": 7911, "token_acc": 0.32309097475569604 }, { "epoch": 4.637936089123424, "grad_norm": 0.30904125073954924, "learning_rate": 0.000182449290782887, "loss": 2.811495065689087, "step": 7912, "token_acc": 0.324672267690302 }, { "epoch": 4.638522427440633, "grad_norm": 0.2766838901905144, "learning_rate": 0.00018244380591295601, "loss": 2.875392436981201, "step": 7913, "token_acc": 0.31481676103958023 }, { "epoch": 4.639108765757842, "grad_norm": 0.24824851438576034, "learning_rate": 0.00018243832026857654, "loss": 2.8296966552734375, "step": 7914, "token_acc": 0.32114433974454804 }, { "epoch": 4.639695104075051, "grad_norm": 0.262304411358143, "learning_rate": 0.00018243283384980017, "loss": 2.8442044258117676, "step": 7915, "token_acc": 0.3191658406482797 }, { "epoch": 4.6402814423922605, "grad_norm": 0.2363574121149564, "learning_rate": 0.00018242734665667839, "loss": 2.829318046569824, "step": 7916, "token_acc": 0.3225189605160676 }, { "epoch": 4.64086778070947, "grad_norm": 0.2613041675942796, "learning_rate": 0.00018242185868926276, "loss": 2.852980136871338, "step": 7917, "token_acc": 0.31839223213277634 }, { "epoch": 4.641454119026679, "grad_norm": 0.2523636579909328, "learning_rate": 0.00018241636994760483, "loss": 2.842521905899048, "step": 7918, "token_acc": 0.31982866508527474 }, { "epoch": 4.642040457343887, "grad_norm": 0.2607440257073755, "learning_rate": 0.00018241088043175616, "loss": 2.8563544750213623, "step": 7919, "token_acc": 0.31525375843964915 }, { "epoch": 4.642626795661096, "grad_norm": 0.2633314326850421, "learning_rate": 0.00018240539014176832, "loss": 2.8983917236328125, "step": 7920, "token_acc": 0.31031506038932516 }, { "epoch": 4.643213133978305, "grad_norm": 0.254956005658883, "learning_rate": 0.00018239989907769288, "loss": 2.848633289337158, "step": 7921, "token_acc": 0.31896490174975606 }, { "epoch": 4.643799472295514, "grad_norm": 0.26149952836772633, "learning_rate": 0.00018239440723958144, "loss": 2.869771718978882, "step": 7922, "token_acc": 0.31713107616657277 }, { "epoch": 4.644385810612723, "grad_norm": 0.23566169086452762, "learning_rate": 0.00018238891462748555, "loss": 2.8389174938201904, "step": 7923, "token_acc": 0.3185520788101055 }, { "epoch": 4.6449721489299325, "grad_norm": 0.24112410038996554, "learning_rate": 0.00018238342124145686, "loss": 2.8437905311584473, "step": 7924, "token_acc": 0.31976466473338155 }, { "epoch": 4.645558487247142, "grad_norm": 0.265799625799581, "learning_rate": 0.0001823779270815469, "loss": 2.88850736618042, "step": 7925, "token_acc": 0.312613298207202 }, { "epoch": 4.646144825564351, "grad_norm": 0.28357451765928265, "learning_rate": 0.00018237243214780735, "loss": 2.871522903442383, "step": 7926, "token_acc": 0.3164327401257655 }, { "epoch": 4.64673116388156, "grad_norm": 0.307720034347513, "learning_rate": 0.00018236693644028978, "loss": 2.8813347816467285, "step": 7927, "token_acc": 0.31313363453553056 }, { "epoch": 4.647317502198769, "grad_norm": 0.29712265757239714, "learning_rate": 0.00018236143995904584, "loss": 2.8462624549865723, "step": 7928, "token_acc": 0.3186442569075344 }, { "epoch": 4.647903840515978, "grad_norm": 0.2364171324817206, "learning_rate": 0.00018235594270412717, "loss": 2.8362178802490234, "step": 7929, "token_acc": 0.3209897319186674 }, { "epoch": 4.648490178833187, "grad_norm": 0.2493959058610231, "learning_rate": 0.00018235044467558535, "loss": 2.9147772789001465, "step": 7930, "token_acc": 0.3094253885958503 }, { "epoch": 4.649076517150396, "grad_norm": 0.2496013590745673, "learning_rate": 0.0001823449458734721, "loss": 2.905533790588379, "step": 7931, "token_acc": 0.31080001462072276 }, { "epoch": 4.6496628554676045, "grad_norm": 0.24588646192106783, "learning_rate": 0.00018233944629783908, "loss": 2.8537509441375732, "step": 7932, "token_acc": 0.3181794966223754 }, { "epoch": 4.650249193784814, "grad_norm": 0.24161723195144777, "learning_rate": 0.00018233394594873787, "loss": 2.844881534576416, "step": 7933, "token_acc": 0.31873395585262454 }, { "epoch": 4.650835532102023, "grad_norm": 0.250931657193241, "learning_rate": 0.00018232844482622018, "loss": 2.827566623687744, "step": 7934, "token_acc": 0.320910831689462 }, { "epoch": 4.651421870419232, "grad_norm": 0.23534196420030198, "learning_rate": 0.0001823229429303377, "loss": 2.8187108039855957, "step": 7935, "token_acc": 0.32303635387061164 }, { "epoch": 4.652008208736441, "grad_norm": 0.2385197962506422, "learning_rate": 0.00018231744026114211, "loss": 2.8507349491119385, "step": 7936, "token_acc": 0.3177959881964093 }, { "epoch": 4.65259454705365, "grad_norm": 0.3206774731811037, "learning_rate": 0.0001823119368186851, "loss": 2.866292953491211, "step": 7937, "token_acc": 0.31595817751659927 }, { "epoch": 4.653180885370859, "grad_norm": 0.36090073723247323, "learning_rate": 0.00018230643260301838, "loss": 2.8455991744995117, "step": 7938, "token_acc": 0.3206590519090519 }, { "epoch": 4.653767223688068, "grad_norm": 0.2658573355069035, "learning_rate": 0.0001823009276141936, "loss": 2.837843418121338, "step": 7939, "token_acc": 0.3206343609510606 }, { "epoch": 4.654353562005277, "grad_norm": 0.31477198523763134, "learning_rate": 0.0001822954218522625, "loss": 2.876812219619751, "step": 7940, "token_acc": 0.3146120220013915 }, { "epoch": 4.654939900322486, "grad_norm": 0.3228202901294854, "learning_rate": 0.0001822899153172768, "loss": 2.8511581420898438, "step": 7941, "token_acc": 0.31764272991968945 }, { "epoch": 4.655526238639695, "grad_norm": 0.2749991488934795, "learning_rate": 0.00018228440800928825, "loss": 2.876026153564453, "step": 7942, "token_acc": 0.31263049446785657 }, { "epoch": 4.656112576956904, "grad_norm": 0.4193431531418702, "learning_rate": 0.0001822788999283486, "loss": 2.8509178161621094, "step": 7943, "token_acc": 0.3180476616551776 }, { "epoch": 4.656698915274113, "grad_norm": 0.29646913743894887, "learning_rate": 0.00018227339107450952, "loss": 2.8604750633239746, "step": 7944, "token_acc": 0.3187382594003505 }, { "epoch": 4.657285253591322, "grad_norm": 0.34258409133184176, "learning_rate": 0.00018226788144782278, "loss": 2.8860936164855957, "step": 7945, "token_acc": 0.31379240080577975 }, { "epoch": 4.657871591908531, "grad_norm": 0.2778106906108733, "learning_rate": 0.00018226237104834018, "loss": 2.845273494720459, "step": 7946, "token_acc": 0.3196495404776459 }, { "epoch": 4.65845793022574, "grad_norm": 0.32539060250475726, "learning_rate": 0.00018225685987611345, "loss": 2.8268966674804688, "step": 7947, "token_acc": 0.32381397037844006 }, { "epoch": 4.659044268542949, "grad_norm": 0.26938212222598285, "learning_rate": 0.00018225134793119438, "loss": 2.860983371734619, "step": 7948, "token_acc": 0.315939866669127 }, { "epoch": 4.6596306068601585, "grad_norm": 0.2893828278782226, "learning_rate": 0.0001822458352136347, "loss": 2.861258029937744, "step": 7949, "token_acc": 0.31519825569416227 }, { "epoch": 4.660216945177368, "grad_norm": 0.25317295995541844, "learning_rate": 0.00018224032172348625, "loss": 2.852689743041992, "step": 7950, "token_acc": 0.31891356805822874 }, { "epoch": 4.660803283494577, "grad_norm": 0.3261909196885654, "learning_rate": 0.00018223480746080078, "loss": 2.905864953994751, "step": 7951, "token_acc": 0.3091468965886574 }, { "epoch": 4.661389621811786, "grad_norm": 0.25245319834731045, "learning_rate": 0.0001822292924256301, "loss": 2.8631138801574707, "step": 7952, "token_acc": 0.31756120465556725 }, { "epoch": 4.661975960128994, "grad_norm": 0.2844190801329634, "learning_rate": 0.00018222377661802607, "loss": 2.8484835624694824, "step": 7953, "token_acc": 0.31778582323278926 }, { "epoch": 4.662562298446203, "grad_norm": 0.23058552984987232, "learning_rate": 0.00018221826003804039, "loss": 2.840463638305664, "step": 7954, "token_acc": 0.3180210987734192 }, { "epoch": 4.663148636763412, "grad_norm": 0.2751777190666098, "learning_rate": 0.00018221274268572497, "loss": 2.8497517108917236, "step": 7955, "token_acc": 0.31913728840875194 }, { "epoch": 4.663734975080621, "grad_norm": 0.25836269470298967, "learning_rate": 0.00018220722456113164, "loss": 2.8627238273620605, "step": 7956, "token_acc": 0.3169352205981883 }, { "epoch": 4.6643213133978305, "grad_norm": 0.25395891382911834, "learning_rate": 0.0001822017056643122, "loss": 2.8270535469055176, "step": 7957, "token_acc": 0.32116043956043955 }, { "epoch": 4.66490765171504, "grad_norm": 0.2726792777379215, "learning_rate": 0.0001821961859953185, "loss": 2.887840747833252, "step": 7958, "token_acc": 0.3124549606764582 }, { "epoch": 4.665493990032249, "grad_norm": 0.24130748121970802, "learning_rate": 0.00018219066555420237, "loss": 2.8166680335998535, "step": 7959, "token_acc": 0.32191510284579533 }, { "epoch": 4.666080328349458, "grad_norm": 0.27491132128726337, "learning_rate": 0.00018218514434101572, "loss": 2.8271172046661377, "step": 7960, "token_acc": 0.32222906387942746 }, { "epoch": 4.666666666666667, "grad_norm": 0.23130173787802005, "learning_rate": 0.0001821796223558104, "loss": 2.83827543258667, "step": 7961, "token_acc": 0.3205263938276968 }, { "epoch": 4.667253004983876, "grad_norm": 0.2608491043570111, "learning_rate": 0.00018217409959863824, "loss": 2.858046531677246, "step": 7962, "token_acc": 0.3169588723385103 }, { "epoch": 4.667839343301084, "grad_norm": 0.2362315929110999, "learning_rate": 0.00018216857606955113, "loss": 2.861868381500244, "step": 7963, "token_acc": 0.3168875723547992 }, { "epoch": 4.668425681618293, "grad_norm": 0.25824729934195145, "learning_rate": 0.000182163051768601, "loss": 2.842451572418213, "step": 7964, "token_acc": 0.319540054386884 }, { "epoch": 4.6690120199355025, "grad_norm": 0.22844543115350627, "learning_rate": 0.0001821575266958397, "loss": 2.842209577560425, "step": 7965, "token_acc": 0.32015692532748186 }, { "epoch": 4.669598358252712, "grad_norm": 0.22485787448105587, "learning_rate": 0.00018215200085131916, "loss": 2.877556800842285, "step": 7966, "token_acc": 0.3157010345379869 }, { "epoch": 4.670184696569921, "grad_norm": 0.23112491015744832, "learning_rate": 0.00018214647423509125, "loss": 2.8361425399780273, "step": 7967, "token_acc": 0.32119608040147857 }, { "epoch": 4.67077103488713, "grad_norm": 0.2923105287568493, "learning_rate": 0.00018214094684720794, "loss": 2.8411386013031006, "step": 7968, "token_acc": 0.3192867630054695 }, { "epoch": 4.671357373204339, "grad_norm": 0.23865625290820194, "learning_rate": 0.0001821354186877211, "loss": 2.817558765411377, "step": 7969, "token_acc": 0.3234574077424986 }, { "epoch": 4.671943711521548, "grad_norm": 0.26803140063182135, "learning_rate": 0.00018212988975668267, "loss": 2.806683301925659, "step": 7970, "token_acc": 0.3263003728263803 }, { "epoch": 4.672530049838757, "grad_norm": 0.258209703356879, "learning_rate": 0.00018212436005414463, "loss": 2.9041097164154053, "step": 7971, "token_acc": 0.3106649020892631 }, { "epoch": 4.673116388155966, "grad_norm": 0.28489373065528234, "learning_rate": 0.00018211882958015885, "loss": 2.8429460525512695, "step": 7972, "token_acc": 0.32182492674106866 }, { "epoch": 4.673702726473175, "grad_norm": 0.2792121475768161, "learning_rate": 0.00018211329833477734, "loss": 2.835817337036133, "step": 7973, "token_acc": 0.3199662123829095 }, { "epoch": 4.6742890647903845, "grad_norm": 0.25021435887536836, "learning_rate": 0.00018210776631805207, "loss": 2.8920738697052, "step": 7974, "token_acc": 0.31345546704501764 }, { "epoch": 4.674875403107593, "grad_norm": 0.27677434025971537, "learning_rate": 0.00018210223353003495, "loss": 2.8787167072296143, "step": 7975, "token_acc": 0.31589636285309863 }, { "epoch": 4.675461741424802, "grad_norm": 0.25272235555150385, "learning_rate": 0.00018209669997077795, "loss": 2.8661491870880127, "step": 7976, "token_acc": 0.31556929508912784 }, { "epoch": 4.676048079742011, "grad_norm": 0.2972781577093249, "learning_rate": 0.00018209116564033316, "loss": 2.835538387298584, "step": 7977, "token_acc": 0.3201033386327504 }, { "epoch": 4.67663441805922, "grad_norm": 0.2580242814336882, "learning_rate": 0.00018208563053875244, "loss": 2.847827911376953, "step": 7978, "token_acc": 0.32073199211749237 }, { "epoch": 4.677220756376429, "grad_norm": 0.2798228671940621, "learning_rate": 0.00018208009466608779, "loss": 2.9027786254882812, "step": 7979, "token_acc": 0.31051386341281867 }, { "epoch": 4.677807094693638, "grad_norm": 0.2733044828517619, "learning_rate": 0.0001820745580223913, "loss": 2.829592704772949, "step": 7980, "token_acc": 0.32280676458436736 }, { "epoch": 4.678393433010847, "grad_norm": 0.2651282841484067, "learning_rate": 0.00018206902060771495, "loss": 2.794020175933838, "step": 7981, "token_acc": 0.32606704622751154 }, { "epoch": 4.6789797713280565, "grad_norm": 0.25673741549563334, "learning_rate": 0.00018206348242211072, "loss": 2.8610496520996094, "step": 7982, "token_acc": 0.31692036519808187 }, { "epoch": 4.679566109645266, "grad_norm": 0.24341138039443314, "learning_rate": 0.00018205794346563066, "loss": 2.860891819000244, "step": 7983, "token_acc": 0.3173751504290842 }, { "epoch": 4.680152447962474, "grad_norm": 0.2458464416624701, "learning_rate": 0.0001820524037383268, "loss": 2.9053516387939453, "step": 7984, "token_acc": 0.3105147441211453 }, { "epoch": 4.680738786279683, "grad_norm": 0.2643695435385149, "learning_rate": 0.00018204686324025117, "loss": 2.8444507122039795, "step": 7985, "token_acc": 0.3188622120596206 }, { "epoch": 4.681325124596892, "grad_norm": 0.25700610613011127, "learning_rate": 0.0001820413219714558, "loss": 2.849257230758667, "step": 7986, "token_acc": 0.3185922312443828 }, { "epoch": 4.681911462914101, "grad_norm": 0.25570420504328395, "learning_rate": 0.00018203577993199278, "loss": 2.853827714920044, "step": 7987, "token_acc": 0.3177199841113875 }, { "epoch": 4.68249780123131, "grad_norm": 0.23093366489804387, "learning_rate": 0.00018203023712191416, "loss": 2.8710155487060547, "step": 7988, "token_acc": 0.3174503935716628 }, { "epoch": 4.683084139548519, "grad_norm": 0.259817989196454, "learning_rate": 0.000182024693541272, "loss": 2.8631575107574463, "step": 7989, "token_acc": 0.31644829199812824 }, { "epoch": 4.6836704778657285, "grad_norm": 0.22821289909175868, "learning_rate": 0.00018201914919011838, "loss": 2.8164682388305664, "step": 7990, "token_acc": 0.32182777879097846 }, { "epoch": 4.684256816182938, "grad_norm": 0.2545399035501483, "learning_rate": 0.00018201360406850533, "loss": 2.8166565895080566, "step": 7991, "token_acc": 0.32352871985885123 }, { "epoch": 4.684843154500147, "grad_norm": 0.22092961822369694, "learning_rate": 0.00018200805817648503, "loss": 2.805501699447632, "step": 7992, "token_acc": 0.32548701943312763 }, { "epoch": 4.685429492817356, "grad_norm": 0.24656958950551727, "learning_rate": 0.0001820025115141095, "loss": 2.8383491039276123, "step": 7993, "token_acc": 0.32129780199661845 }, { "epoch": 4.686015831134565, "grad_norm": 0.2263372661714545, "learning_rate": 0.0001819969640814309, "loss": 2.8347816467285156, "step": 7994, "token_acc": 0.32211190483683166 }, { "epoch": 4.686602169451774, "grad_norm": 0.24553631997631356, "learning_rate": 0.00018199141587850131, "loss": 2.874990940093994, "step": 7995, "token_acc": 0.3141520486607071 }, { "epoch": 4.687188507768982, "grad_norm": 0.2725009905957728, "learning_rate": 0.00018198586690537286, "loss": 2.856537342071533, "step": 7996, "token_acc": 0.3173330045946369 }, { "epoch": 4.687774846086191, "grad_norm": 0.2433171123430189, "learning_rate": 0.00018198031716209765, "loss": 2.849447250366211, "step": 7997, "token_acc": 0.3178343966168496 }, { "epoch": 4.6883611844034006, "grad_norm": 0.2370078966811938, "learning_rate": 0.00018197476664872782, "loss": 2.829416513442993, "step": 7998, "token_acc": 0.32200723773491796 }, { "epoch": 4.68894752272061, "grad_norm": 0.24459060585952624, "learning_rate": 0.00018196921536531554, "loss": 2.8380603790283203, "step": 7999, "token_acc": 0.3205729924282408 }, { "epoch": 4.689533861037819, "grad_norm": 0.2662025490636676, "learning_rate": 0.00018196366331191293, "loss": 2.876652717590332, "step": 8000, "token_acc": 0.31395115423218467 }, { "epoch": 4.690120199355028, "grad_norm": 0.27376208069123026, "learning_rate": 0.00018195811048857214, "loss": 2.9305503368377686, "step": 8001, "token_acc": 0.3075252778919278 }, { "epoch": 4.690706537672237, "grad_norm": 0.3289502120308831, "learning_rate": 0.00018195255689534536, "loss": 2.892932891845703, "step": 8002, "token_acc": 0.31132625383098833 }, { "epoch": 4.691292875989446, "grad_norm": 0.2990535550780179, "learning_rate": 0.00018194700253228475, "loss": 2.832838296890259, "step": 8003, "token_acc": 0.3212853103612293 }, { "epoch": 4.691879214306655, "grad_norm": 0.24510422661029935, "learning_rate": 0.00018194144739944244, "loss": 2.8240671157836914, "step": 8004, "token_acc": 0.32344140714154934 }, { "epoch": 4.692465552623864, "grad_norm": 0.28514575646441226, "learning_rate": 0.0001819358914968707, "loss": 2.8391356468200684, "step": 8005, "token_acc": 0.3203686249869096 }, { "epoch": 4.693051890941073, "grad_norm": 0.2800849462489954, "learning_rate": 0.0001819303348246216, "loss": 2.886533737182617, "step": 8006, "token_acc": 0.31420718360525646 }, { "epoch": 4.693638229258282, "grad_norm": 0.23530728231588485, "learning_rate": 0.00018192477738274745, "loss": 2.8834023475646973, "step": 8007, "token_acc": 0.31434186768645705 }, { "epoch": 4.694224567575491, "grad_norm": 0.2515115127843232, "learning_rate": 0.00018191921917130042, "loss": 2.850801467895508, "step": 8008, "token_acc": 0.3189156297972045 }, { "epoch": 4.6948109058927, "grad_norm": 0.23822270754618435, "learning_rate": 0.0001819136601903327, "loss": 2.8425331115722656, "step": 8009, "token_acc": 0.3206677865895743 }, { "epoch": 4.695397244209909, "grad_norm": 0.23888649121738686, "learning_rate": 0.00018190810043989652, "loss": 2.838322401046753, "step": 8010, "token_acc": 0.31950578352453546 }, { "epoch": 4.695983582527118, "grad_norm": 0.2742156585715057, "learning_rate": 0.00018190253992004412, "loss": 2.9043946266174316, "step": 8011, "token_acc": 0.31131339615360404 }, { "epoch": 4.696569920844327, "grad_norm": 0.24498139730584972, "learning_rate": 0.0001818969786308277, "loss": 2.9003829956054688, "step": 8012, "token_acc": 0.3109862688319833 }, { "epoch": 4.697156259161536, "grad_norm": 0.25583271990760836, "learning_rate": 0.00018189141657229952, "loss": 2.851210117340088, "step": 8013, "token_acc": 0.3185222489088608 }, { "epoch": 4.6977425974787455, "grad_norm": 0.26129160995062894, "learning_rate": 0.0001818858537445119, "loss": 2.88232159614563, "step": 8014, "token_acc": 0.31325655373696865 }, { "epoch": 4.698328935795955, "grad_norm": 0.2332040665447364, "learning_rate": 0.00018188029014751695, "loss": 2.8614609241485596, "step": 8015, "token_acc": 0.31674782259808404 }, { "epoch": 4.698915274113164, "grad_norm": 0.27007296395041647, "learning_rate": 0.00018187472578136703, "loss": 2.8396332263946533, "step": 8016, "token_acc": 0.32091937501519774 }, { "epoch": 4.699501612430373, "grad_norm": 0.25439380224047087, "learning_rate": 0.0001818691606461144, "loss": 2.8662452697753906, "step": 8017, "token_acc": 0.3161944471261099 }, { "epoch": 4.700087950747581, "grad_norm": 0.2464550817689056, "learning_rate": 0.00018186359474181132, "loss": 2.8305578231811523, "step": 8018, "token_acc": 0.3242475540907815 }, { "epoch": 4.70067428906479, "grad_norm": 0.2804523568929671, "learning_rate": 0.0001818580280685101, "loss": 2.808176040649414, "step": 8019, "token_acc": 0.3246752573041169 }, { "epoch": 4.701260627381999, "grad_norm": 0.24355875836021335, "learning_rate": 0.00018185246062626297, "loss": 2.8570289611816406, "step": 8020, "token_acc": 0.3192484513551142 }, { "epoch": 4.701846965699208, "grad_norm": 0.2591666605364451, "learning_rate": 0.0001818468924151223, "loss": 2.836561679840088, "step": 8021, "token_acc": 0.3195663998756702 }, { "epoch": 4.7024333040164175, "grad_norm": 0.3159520745470281, "learning_rate": 0.00018184132343514035, "loss": 2.87027645111084, "step": 8022, "token_acc": 0.31565142208501745 }, { "epoch": 4.703019642333627, "grad_norm": 0.22764517951130156, "learning_rate": 0.00018183575368636948, "loss": 2.8519668579101562, "step": 8023, "token_acc": 0.31714101958110236 }, { "epoch": 4.703605980650836, "grad_norm": 0.24133055707444134, "learning_rate": 0.00018183018316886193, "loss": 2.8013575077056885, "step": 8024, "token_acc": 0.3245651557846445 }, { "epoch": 4.704192318968045, "grad_norm": 0.2291506622631952, "learning_rate": 0.0001818246118826701, "loss": 2.8687868118286133, "step": 8025, "token_acc": 0.31608920655073053 }, { "epoch": 4.704778657285254, "grad_norm": 0.26539143969326395, "learning_rate": 0.00018181903982784632, "loss": 2.9064242839813232, "step": 8026, "token_acc": 0.31050965140902426 }, { "epoch": 4.705364995602462, "grad_norm": 0.28817198202862465, "learning_rate": 0.0001818134670044429, "loss": 2.854684829711914, "step": 8027, "token_acc": 0.31704691922346095 }, { "epoch": 4.705951333919671, "grad_norm": 0.24112896565155048, "learning_rate": 0.00018180789341251216, "loss": 2.8974506855010986, "step": 8028, "token_acc": 0.3102875919715961 }, { "epoch": 4.70653767223688, "grad_norm": 0.2642072136625414, "learning_rate": 0.00018180231905210657, "loss": 2.8863887786865234, "step": 8029, "token_acc": 0.31324339967982645 }, { "epoch": 4.7071240105540895, "grad_norm": 0.2404251366049508, "learning_rate": 0.00018179674392327839, "loss": 2.833858013153076, "step": 8030, "token_acc": 0.32072781459971 }, { "epoch": 4.707710348871299, "grad_norm": 0.276559978329488, "learning_rate": 0.00018179116802608002, "loss": 2.8274240493774414, "step": 8031, "token_acc": 0.3212447566503922 }, { "epoch": 4.708296687188508, "grad_norm": 0.23826030630660644, "learning_rate": 0.00018178559136056382, "loss": 2.833381414413452, "step": 8032, "token_acc": 0.3213932870890681 }, { "epoch": 4.708883025505717, "grad_norm": 0.26007341496577335, "learning_rate": 0.00018178001392678224, "loss": 2.8373050689697266, "step": 8033, "token_acc": 0.3197908683821939 }, { "epoch": 4.709469363822926, "grad_norm": 0.2529567529068354, "learning_rate": 0.0001817744357247876, "loss": 2.861542224884033, "step": 8034, "token_acc": 0.3165112263434989 }, { "epoch": 4.710055702140135, "grad_norm": 0.2431402630977329, "learning_rate": 0.00018176885675463237, "loss": 2.8428993225097656, "step": 8035, "token_acc": 0.32066344909024924 }, { "epoch": 4.710642040457344, "grad_norm": 0.27955260704489493, "learning_rate": 0.00018176327701636887, "loss": 2.838435173034668, "step": 8036, "token_acc": 0.32021783035199075 }, { "epoch": 4.711228378774553, "grad_norm": 0.2689898052707983, "learning_rate": 0.00018175769651004956, "loss": 2.928544044494629, "step": 8037, "token_acc": 0.3064314717224155 }, { "epoch": 4.711814717091762, "grad_norm": 0.23226460164563856, "learning_rate": 0.0001817521152357269, "loss": 2.873896837234497, "step": 8038, "token_acc": 0.31294827071692444 }, { "epoch": 4.7124010554089715, "grad_norm": 0.3125019805654407, "learning_rate": 0.00018174653319345322, "loss": 2.856449842453003, "step": 8039, "token_acc": 0.31791541685280916 }, { "epoch": 4.71298739372618, "grad_norm": 0.293705375638339, "learning_rate": 0.00018174095038328108, "loss": 2.878654718399048, "step": 8040, "token_acc": 0.3133308121885264 }, { "epoch": 4.713573732043389, "grad_norm": 0.2833306270756169, "learning_rate": 0.00018173536680526282, "loss": 2.928429126739502, "step": 8041, "token_acc": 0.3069624181485274 }, { "epoch": 4.714160070360598, "grad_norm": 0.3334948847758536, "learning_rate": 0.00018172978245945096, "loss": 2.797441005706787, "step": 8042, "token_acc": 0.32532701908418943 }, { "epoch": 4.714746408677807, "grad_norm": 0.2609469122965276, "learning_rate": 0.0001817241973458979, "loss": 2.8259265422821045, "step": 8043, "token_acc": 0.3216305792150534 }, { "epoch": 4.715332746995016, "grad_norm": 0.310934316469313, "learning_rate": 0.00018171861146465613, "loss": 2.833998203277588, "step": 8044, "token_acc": 0.32099941402957677 }, { "epoch": 4.715919085312225, "grad_norm": 0.296903504195043, "learning_rate": 0.0001817130248157781, "loss": 2.854832172393799, "step": 8045, "token_acc": 0.3185565057029898 }, { "epoch": 4.716505423629434, "grad_norm": 0.25370060665138006, "learning_rate": 0.00018170743739931634, "loss": 2.9049510955810547, "step": 8046, "token_acc": 0.3119570974237474 }, { "epoch": 4.7170917619466435, "grad_norm": 0.29004846654180894, "learning_rate": 0.00018170184921532335, "loss": 2.836297035217285, "step": 8047, "token_acc": 0.3192653619321492 }, { "epoch": 4.717678100263853, "grad_norm": 0.259727850682258, "learning_rate": 0.0001816962602638515, "loss": 2.886216640472412, "step": 8048, "token_acc": 0.31320203047809336 }, { "epoch": 4.718264438581061, "grad_norm": 0.28288949521918405, "learning_rate": 0.00018169067054495344, "loss": 2.846942901611328, "step": 8049, "token_acc": 0.3183509006147651 }, { "epoch": 4.71885077689827, "grad_norm": 0.23848079580669918, "learning_rate": 0.00018168508005868156, "loss": 2.868199348449707, "step": 8050, "token_acc": 0.31611411542562073 }, { "epoch": 4.719437115215479, "grad_norm": 0.28026393841099584, "learning_rate": 0.00018167948880508844, "loss": 2.8703441619873047, "step": 8051, "token_acc": 0.3159572697580561 }, { "epoch": 4.720023453532688, "grad_norm": 0.23050309804981484, "learning_rate": 0.00018167389678422658, "loss": 2.850451707839966, "step": 8052, "token_acc": 0.3186301081493566 }, { "epoch": 4.720609791849897, "grad_norm": 0.2830963211551632, "learning_rate": 0.00018166830399614855, "loss": 2.8724722862243652, "step": 8053, "token_acc": 0.31553533814138335 }, { "epoch": 4.721196130167106, "grad_norm": 0.22212479486255632, "learning_rate": 0.00018166271044090684, "loss": 2.855909824371338, "step": 8054, "token_acc": 0.3167231549354983 }, { "epoch": 4.7217824684843155, "grad_norm": 0.266277037788316, "learning_rate": 0.00018165711611855398, "loss": 2.8273510932922363, "step": 8055, "token_acc": 0.3225707329756305 }, { "epoch": 4.722368806801525, "grad_norm": 0.23221877059005042, "learning_rate": 0.0001816515210291426, "loss": 2.836549758911133, "step": 8056, "token_acc": 0.321211021609377 }, { "epoch": 4.722955145118734, "grad_norm": 0.28472583793168915, "learning_rate": 0.00018164592517272516, "loss": 2.8237316608428955, "step": 8057, "token_acc": 0.32335621103368783 }, { "epoch": 4.723541483435943, "grad_norm": 0.24309086585736395, "learning_rate": 0.00018164032854935428, "loss": 2.8503198623657227, "step": 8058, "token_acc": 0.3178483733525079 }, { "epoch": 4.724127821753152, "grad_norm": 0.2999907377752205, "learning_rate": 0.00018163473115908254, "loss": 2.8550238609313965, "step": 8059, "token_acc": 0.315762456680691 }, { "epoch": 4.724714160070361, "grad_norm": 0.2760338034534839, "learning_rate": 0.0001816291330019625, "loss": 2.8779211044311523, "step": 8060, "token_acc": 0.3149451594530054 }, { "epoch": 4.725300498387569, "grad_norm": 0.24798308936885677, "learning_rate": 0.00018162353407804674, "loss": 2.8319313526153564, "step": 8061, "token_acc": 0.32106564349063427 }, { "epoch": 4.725886836704778, "grad_norm": 0.2833270982505081, "learning_rate": 0.00018161793438738788, "loss": 2.8686540126800537, "step": 8062, "token_acc": 0.3167144369561288 }, { "epoch": 4.7264731750219875, "grad_norm": 0.23611917812134892, "learning_rate": 0.00018161233393003848, "loss": 2.8891663551330566, "step": 8063, "token_acc": 0.3137665913602004 }, { "epoch": 4.727059513339197, "grad_norm": 0.27098040325117845, "learning_rate": 0.00018160673270605122, "loss": 2.8479437828063965, "step": 8064, "token_acc": 0.3195982744251141 }, { "epoch": 4.727645851656406, "grad_norm": 0.24603809204183594, "learning_rate": 0.00018160113071547865, "loss": 2.8657913208007812, "step": 8065, "token_acc": 0.3162626321942782 }, { "epoch": 4.728232189973615, "grad_norm": 0.24323331262389142, "learning_rate": 0.00018159552795837342, "loss": 2.87576961517334, "step": 8066, "token_acc": 0.31376961614874727 }, { "epoch": 4.728818528290824, "grad_norm": 0.2523842135857536, "learning_rate": 0.00018158992443478814, "loss": 2.8928680419921875, "step": 8067, "token_acc": 0.3121880900337666 }, { "epoch": 4.729404866608033, "grad_norm": 0.2826819382569553, "learning_rate": 0.00018158432014477548, "loss": 2.848867654800415, "step": 8068, "token_acc": 0.3189370325405099 }, { "epoch": 4.729991204925242, "grad_norm": 0.2557982808002101, "learning_rate": 0.00018157871508838808, "loss": 2.86297607421875, "step": 8069, "token_acc": 0.3177658013903948 }, { "epoch": 4.730577543242451, "grad_norm": 0.25008369582151574, "learning_rate": 0.00018157310926567857, "loss": 2.8835794925689697, "step": 8070, "token_acc": 0.31495925817410103 }, { "epoch": 4.7311638815596595, "grad_norm": 0.2849336252487903, "learning_rate": 0.00018156750267669963, "loss": 2.834134340286255, "step": 8071, "token_acc": 0.31951847868974215 }, { "epoch": 4.731750219876869, "grad_norm": 0.26762375812623396, "learning_rate": 0.00018156189532150387, "loss": 2.849240303039551, "step": 8072, "token_acc": 0.31931949160056117 }, { "epoch": 4.732336558194078, "grad_norm": 0.24002315538425464, "learning_rate": 0.00018155628720014407, "loss": 2.8511886596679688, "step": 8073, "token_acc": 0.31729686880697583 }, { "epoch": 4.732922896511287, "grad_norm": 0.2635355740921285, "learning_rate": 0.00018155067831267282, "loss": 2.8659536838531494, "step": 8074, "token_acc": 0.3167993424476213 }, { "epoch": 4.733509234828496, "grad_norm": 0.2393455253509564, "learning_rate": 0.00018154506865914285, "loss": 2.8910579681396484, "step": 8075, "token_acc": 0.3137299845215431 }, { "epoch": 4.734095573145705, "grad_norm": 0.24560682147272686, "learning_rate": 0.00018153945823960683, "loss": 2.8448266983032227, "step": 8076, "token_acc": 0.318531694023175 }, { "epoch": 4.734681911462914, "grad_norm": 0.281710323013585, "learning_rate": 0.00018153384705411747, "loss": 2.8232245445251465, "step": 8077, "token_acc": 0.3231604776359348 }, { "epoch": 4.735268249780123, "grad_norm": 0.22888018458060053, "learning_rate": 0.0001815282351027275, "loss": 2.907013416290283, "step": 8078, "token_acc": 0.30971276606904946 }, { "epoch": 4.735854588097332, "grad_norm": 0.29966226721629974, "learning_rate": 0.0001815226223854896, "loss": 2.8319334983825684, "step": 8079, "token_acc": 0.3228889922068721 }, { "epoch": 4.7364409264145415, "grad_norm": 0.2616392494777424, "learning_rate": 0.00018151700890245653, "loss": 2.878106117248535, "step": 8080, "token_acc": 0.3144527511325691 }, { "epoch": 4.737027264731751, "grad_norm": 0.2718129825803422, "learning_rate": 0.00018151139465368102, "loss": 2.8504018783569336, "step": 8081, "token_acc": 0.319645171437559 }, { "epoch": 4.73761360304896, "grad_norm": 0.3660693104364194, "learning_rate": 0.0001815057796392158, "loss": 2.8621160984039307, "step": 8082, "token_acc": 0.3170808640059905 }, { "epoch": 4.738199941366168, "grad_norm": 0.25128602214835555, "learning_rate": 0.00018150016385911358, "loss": 2.853025436401367, "step": 8083, "token_acc": 0.3181481979558903 }, { "epoch": 4.738786279683377, "grad_norm": 0.400131418509681, "learning_rate": 0.00018149454731342717, "loss": 2.8442623615264893, "step": 8084, "token_acc": 0.3212362605393168 }, { "epoch": 4.739372618000586, "grad_norm": 0.2657749731207815, "learning_rate": 0.00018148893000220927, "loss": 2.859973430633545, "step": 8085, "token_acc": 0.3172285134347968 }, { "epoch": 4.739958956317795, "grad_norm": 0.33401079897119496, "learning_rate": 0.0001814833119255127, "loss": 2.8709328174591064, "step": 8086, "token_acc": 0.31419178755314375 }, { "epoch": 4.740545294635004, "grad_norm": 0.24594412668907412, "learning_rate": 0.00018147769308339022, "loss": 2.852072238922119, "step": 8087, "token_acc": 0.3178781432418798 }, { "epoch": 4.7411316329522135, "grad_norm": 0.41557281416006797, "learning_rate": 0.0001814720734758946, "loss": 2.8715970516204834, "step": 8088, "token_acc": 0.3145869188865366 }, { "epoch": 4.741717971269423, "grad_norm": 0.24645535176614186, "learning_rate": 0.00018146645310307866, "loss": 2.8728585243225098, "step": 8089, "token_acc": 0.314820069717615 }, { "epoch": 4.742304309586632, "grad_norm": 0.35845486988276215, "learning_rate": 0.00018146083196499512, "loss": 2.877509355545044, "step": 8090, "token_acc": 0.31447917524458696 }, { "epoch": 4.742890647903841, "grad_norm": 0.2273087996350966, "learning_rate": 0.00018145521006169687, "loss": 2.8547534942626953, "step": 8091, "token_acc": 0.31841609152621914 }, { "epoch": 4.743476986221049, "grad_norm": 0.3305647923242742, "learning_rate": 0.0001814495873932367, "loss": 2.8630473613739014, "step": 8092, "token_acc": 0.3179541476011058 }, { "epoch": 4.744063324538258, "grad_norm": 0.2580908344970165, "learning_rate": 0.00018144396395966737, "loss": 2.883364200592041, "step": 8093, "token_acc": 0.3130672089041096 }, { "epoch": 4.744649662855467, "grad_norm": 0.26081472447270754, "learning_rate": 0.00018143833976104178, "loss": 2.841121196746826, "step": 8094, "token_acc": 0.3202383984793332 }, { "epoch": 4.745236001172676, "grad_norm": 0.2897863667413356, "learning_rate": 0.00018143271479741267, "loss": 2.807852268218994, "step": 8095, "token_acc": 0.3253842203622233 }, { "epoch": 4.7458223394898855, "grad_norm": 0.24111356368258086, "learning_rate": 0.000181427089068833, "loss": 2.88679838180542, "step": 8096, "token_acc": 0.31266485359720153 }, { "epoch": 4.746408677807095, "grad_norm": 0.28143475111388694, "learning_rate": 0.00018142146257535554, "loss": 2.8864684104919434, "step": 8097, "token_acc": 0.31350402128057525 }, { "epoch": 4.746995016124304, "grad_norm": 0.2810418652779738, "learning_rate": 0.0001814158353170331, "loss": 2.8566131591796875, "step": 8098, "token_acc": 0.317571594524976 }, { "epoch": 4.747581354441513, "grad_norm": 0.2331550043979706, "learning_rate": 0.0001814102072939187, "loss": 2.8767473697662354, "step": 8099, "token_acc": 0.3145498247527879 }, { "epoch": 4.748167692758722, "grad_norm": 0.2753471014871126, "learning_rate": 0.00018140457850606502, "loss": 2.8601951599121094, "step": 8100, "token_acc": 0.3169262336799584 }, { "epoch": 4.748754031075931, "grad_norm": 0.238565844067442, "learning_rate": 0.00018139894895352504, "loss": 2.8498740196228027, "step": 8101, "token_acc": 0.31909064655363056 }, { "epoch": 4.74934036939314, "grad_norm": 0.28366941193718787, "learning_rate": 0.00018139331863635164, "loss": 2.8749842643737793, "step": 8102, "token_acc": 0.3138319682903494 }, { "epoch": 4.749926707710349, "grad_norm": 0.2606398332867438, "learning_rate": 0.00018138768755459768, "loss": 2.873274564743042, "step": 8103, "token_acc": 0.31175566302920843 }, { "epoch": 4.7505130460275575, "grad_norm": 0.23969999726021451, "learning_rate": 0.00018138205570831603, "loss": 2.8490233421325684, "step": 8104, "token_acc": 0.3197406748708543 }, { "epoch": 4.751099384344767, "grad_norm": 0.3193724962367442, "learning_rate": 0.0001813764230975597, "loss": 2.8728792667388916, "step": 8105, "token_acc": 0.31491849102883673 }, { "epoch": 4.751685722661976, "grad_norm": 0.24224678539709218, "learning_rate": 0.0001813707897223815, "loss": 2.849452495574951, "step": 8106, "token_acc": 0.3188687711217103 }, { "epoch": 4.752272060979185, "grad_norm": 0.34001969547209343, "learning_rate": 0.00018136515558283436, "loss": 2.8645622730255127, "step": 8107, "token_acc": 0.315676084723906 }, { "epoch": 4.752858399296394, "grad_norm": 0.26578636917641907, "learning_rate": 0.00018135952067897123, "loss": 2.843933582305908, "step": 8108, "token_acc": 0.32091989655908704 }, { "epoch": 4.753444737613603, "grad_norm": 0.26843087565298923, "learning_rate": 0.00018135388501084503, "loss": 2.8612165451049805, "step": 8109, "token_acc": 0.3179208117371677 }, { "epoch": 4.754031075930812, "grad_norm": 0.2972956692285099, "learning_rate": 0.00018134824857850873, "loss": 2.866142749786377, "step": 8110, "token_acc": 0.3174174524119551 }, { "epoch": 4.754617414248021, "grad_norm": 0.2507653572120258, "learning_rate": 0.00018134261138201522, "loss": 2.868525981903076, "step": 8111, "token_acc": 0.315902056682899 }, { "epoch": 4.75520375256523, "grad_norm": 0.291394311195834, "learning_rate": 0.00018133697342141754, "loss": 2.8417797088623047, "step": 8112, "token_acc": 0.3192974146437734 }, { "epoch": 4.7557900908824395, "grad_norm": 0.25840427164856955, "learning_rate": 0.00018133133469676855, "loss": 2.882445812225342, "step": 8113, "token_acc": 0.31343322085667996 }, { "epoch": 4.756376429199648, "grad_norm": 0.3042362722050162, "learning_rate": 0.0001813256952081213, "loss": 2.8996667861938477, "step": 8114, "token_acc": 0.31164511348735063 }, { "epoch": 4.756962767516857, "grad_norm": 0.2544756825536757, "learning_rate": 0.00018132005495552869, "loss": 2.839451789855957, "step": 8115, "token_acc": 0.31972897924079574 }, { "epoch": 4.757549105834066, "grad_norm": 0.3309493648361358, "learning_rate": 0.0001813144139390438, "loss": 2.836857795715332, "step": 8116, "token_acc": 0.3199251563397986 }, { "epoch": 4.758135444151275, "grad_norm": 0.23947347034792899, "learning_rate": 0.0001813087721587195, "loss": 2.8489277362823486, "step": 8117, "token_acc": 0.3200418476533531 }, { "epoch": 4.758721782468484, "grad_norm": 0.31324013845388626, "learning_rate": 0.0001813031296146089, "loss": 2.876556634902954, "step": 8118, "token_acc": 0.3150759275148082 }, { "epoch": 4.759308120785693, "grad_norm": 0.2164433247404135, "learning_rate": 0.00018129748630676493, "loss": 2.8348793983459473, "step": 8119, "token_acc": 0.32038794303010304 }, { "epoch": 4.759894459102902, "grad_norm": 0.2916040080351027, "learning_rate": 0.00018129184223524063, "loss": 2.8650968074798584, "step": 8120, "token_acc": 0.3159516280435683 }, { "epoch": 4.7604807974201115, "grad_norm": 0.21973361116769724, "learning_rate": 0.00018128619740008902, "loss": 2.8875927925109863, "step": 8121, "token_acc": 0.3125834794223224 }, { "epoch": 4.761067135737321, "grad_norm": 0.2531314638843724, "learning_rate": 0.00018128055180136312, "loss": 2.826516628265381, "step": 8122, "token_acc": 0.32237092760272695 }, { "epoch": 4.76165347405453, "grad_norm": 0.2454021434099326, "learning_rate": 0.00018127490543911595, "loss": 2.840968132019043, "step": 8123, "token_acc": 0.321548291508473 }, { "epoch": 4.762239812371739, "grad_norm": 0.24231611661373578, "learning_rate": 0.00018126925831340058, "loss": 2.8848531246185303, "step": 8124, "token_acc": 0.31293197738009354 }, { "epoch": 4.762826150688948, "grad_norm": 0.24081618550132425, "learning_rate": 0.00018126361042427003, "loss": 2.8327250480651855, "step": 8125, "token_acc": 0.32230493071238914 }, { "epoch": 4.763412489006156, "grad_norm": 0.23295577693478312, "learning_rate": 0.00018125796177177736, "loss": 2.877139091491699, "step": 8126, "token_acc": 0.3141819039633638 }, { "epoch": 4.763998827323365, "grad_norm": 0.2210529617814289, "learning_rate": 0.00018125231235597563, "loss": 2.8429958820343018, "step": 8127, "token_acc": 0.3206383941252591 }, { "epoch": 4.764585165640574, "grad_norm": 0.2358269984069406, "learning_rate": 0.00018124666217691796, "loss": 2.841744899749756, "step": 8128, "token_acc": 0.3189275583216902 }, { "epoch": 4.7651715039577835, "grad_norm": 0.250011870511674, "learning_rate": 0.00018124101123465734, "loss": 2.875566005706787, "step": 8129, "token_acc": 0.31670009449045955 }, { "epoch": 4.765757842274993, "grad_norm": 0.24336475462926818, "learning_rate": 0.0001812353595292469, "loss": 2.813002586364746, "step": 8130, "token_acc": 0.3259146891848563 }, { "epoch": 4.766344180592202, "grad_norm": 0.24246383004577401, "learning_rate": 0.0001812297070607397, "loss": 2.7973411083221436, "step": 8131, "token_acc": 0.3273682828205451 }, { "epoch": 4.766930518909411, "grad_norm": 0.24876944370997475, "learning_rate": 0.00018122405382918887, "loss": 2.8359577655792236, "step": 8132, "token_acc": 0.32116274075354484 }, { "epoch": 4.76751685722662, "grad_norm": 0.2485667718374598, "learning_rate": 0.00018121839983464754, "loss": 2.8358585834503174, "step": 8133, "token_acc": 0.3202875839639654 }, { "epoch": 4.768103195543829, "grad_norm": 0.2700892449211088, "learning_rate": 0.00018121274507716876, "loss": 2.8785624504089355, "step": 8134, "token_acc": 0.3127131256690775 }, { "epoch": 4.768689533861037, "grad_norm": 0.24662695940358334, "learning_rate": 0.0001812070895568057, "loss": 2.875436782836914, "step": 8135, "token_acc": 0.315440865348103 }, { "epoch": 4.7692758721782464, "grad_norm": 0.2642085880960896, "learning_rate": 0.00018120143327361144, "loss": 2.8548102378845215, "step": 8136, "token_acc": 0.31806089579778635 }, { "epoch": 4.769862210495456, "grad_norm": 0.28583099227997527, "learning_rate": 0.0001811957762276391, "loss": 2.9181082248687744, "step": 8137, "token_acc": 0.3095460629707205 }, { "epoch": 4.770448548812665, "grad_norm": 0.23415825729571144, "learning_rate": 0.0001811901184189419, "loss": 2.903862237930298, "step": 8138, "token_acc": 0.30974019332071806 }, { "epoch": 4.771034887129874, "grad_norm": 0.29193814350934794, "learning_rate": 0.00018118445984757292, "loss": 2.8313355445861816, "step": 8139, "token_acc": 0.32268198799954717 }, { "epoch": 4.771621225447083, "grad_norm": 0.2488021375262915, "learning_rate": 0.00018117880051358537, "loss": 2.858372926712036, "step": 8140, "token_acc": 0.31836297946800607 }, { "epoch": 4.772207563764292, "grad_norm": 0.25016353966451155, "learning_rate": 0.00018117314041703238, "loss": 2.821680784225464, "step": 8141, "token_acc": 0.32261071899880944 }, { "epoch": 4.772793902081501, "grad_norm": 0.2644863114110237, "learning_rate": 0.00018116747955796708, "loss": 2.8611650466918945, "step": 8142, "token_acc": 0.31577585760852694 }, { "epoch": 4.77338024039871, "grad_norm": 0.27077924963485445, "learning_rate": 0.00018116181793644272, "loss": 2.826604127883911, "step": 8143, "token_acc": 0.32209233480661464 }, { "epoch": 4.773966578715919, "grad_norm": 0.2902639271082748, "learning_rate": 0.0001811561555525124, "loss": 2.8524208068847656, "step": 8144, "token_acc": 0.31812241256524837 }, { "epoch": 4.7745529170331285, "grad_norm": 0.2338020160442722, "learning_rate": 0.0001811504924062294, "loss": 2.8528800010681152, "step": 8145, "token_acc": 0.31817058932133 }, { "epoch": 4.775139255350338, "grad_norm": 0.27317734578112846, "learning_rate": 0.00018114482849764687, "loss": 2.853255033493042, "step": 8146, "token_acc": 0.31783677342339123 }, { "epoch": 4.775725593667546, "grad_norm": 0.29942002214019947, "learning_rate": 0.00018113916382681803, "loss": 2.847193479537964, "step": 8147, "token_acc": 0.3209507347929856 }, { "epoch": 4.776311931984755, "grad_norm": 0.3152178153781653, "learning_rate": 0.00018113349839379606, "loss": 2.8592376708984375, "step": 8148, "token_acc": 0.3177396895605521 }, { "epoch": 4.776898270301964, "grad_norm": 0.2755407087772189, "learning_rate": 0.00018112783219863417, "loss": 2.8734045028686523, "step": 8149, "token_acc": 0.3146510640061333 }, { "epoch": 4.777484608619173, "grad_norm": 0.256599712003577, "learning_rate": 0.00018112216524138568, "loss": 2.8599419593811035, "step": 8150, "token_acc": 0.31715971107101276 }, { "epoch": 4.778070946936382, "grad_norm": 0.24107035830239307, "learning_rate": 0.00018111649752210372, "loss": 2.823293447494507, "step": 8151, "token_acc": 0.32303869274189645 }, { "epoch": 4.778657285253591, "grad_norm": 0.24240712571419468, "learning_rate": 0.0001811108290408416, "loss": 2.890720844268799, "step": 8152, "token_acc": 0.313693807477252 }, { "epoch": 4.7792436235708005, "grad_norm": 0.24125320199863215, "learning_rate": 0.00018110515979765252, "loss": 2.8172149658203125, "step": 8153, "token_acc": 0.3217824252180238 }, { "epoch": 4.77982996188801, "grad_norm": 0.2760180006122312, "learning_rate": 0.00018109948979258978, "loss": 2.868124485015869, "step": 8154, "token_acc": 0.3157876718641783 }, { "epoch": 4.780416300205219, "grad_norm": 0.2433756743712632, "learning_rate": 0.00018109381902570659, "loss": 2.891968250274658, "step": 8155, "token_acc": 0.31359143823345564 }, { "epoch": 4.781002638522428, "grad_norm": 0.2465303383082132, "learning_rate": 0.00018108814749705625, "loss": 2.857940912246704, "step": 8156, "token_acc": 0.3179065213933928 }, { "epoch": 4.781588976839636, "grad_norm": 0.28851049234808446, "learning_rate": 0.000181082475206692, "loss": 2.882906436920166, "step": 8157, "token_acc": 0.3131278791520403 }, { "epoch": 4.782175315156845, "grad_norm": 0.2770484626556144, "learning_rate": 0.00018107680215466722, "loss": 2.85306978225708, "step": 8158, "token_acc": 0.31886902278334955 }, { "epoch": 4.782761653474054, "grad_norm": 0.2662430167428787, "learning_rate": 0.0001810711283410351, "loss": 2.8797686100006104, "step": 8159, "token_acc": 0.3143202961322608 }, { "epoch": 4.783347991791263, "grad_norm": 0.2446041227574991, "learning_rate": 0.00018106545376584898, "loss": 2.9015705585479736, "step": 8160, "token_acc": 0.31088644525373677 }, { "epoch": 4.7839343301084725, "grad_norm": 0.2652140190043571, "learning_rate": 0.00018105977842916216, "loss": 2.868628740310669, "step": 8161, "token_acc": 0.31614125015538996 }, { "epoch": 4.784520668425682, "grad_norm": 0.27171634638394543, "learning_rate": 0.00018105410233102795, "loss": 2.8268423080444336, "step": 8162, "token_acc": 0.3212940239386894 }, { "epoch": 4.785107006742891, "grad_norm": 0.2435175237871007, "learning_rate": 0.00018104842547149967, "loss": 2.860830783843994, "step": 8163, "token_acc": 0.31832966892564535 }, { "epoch": 4.7856933450601, "grad_norm": 0.28195816557607195, "learning_rate": 0.00018104274785063064, "loss": 2.8669190406799316, "step": 8164, "token_acc": 0.3138442646000325 }, { "epoch": 4.786279683377309, "grad_norm": 0.2222731700532026, "learning_rate": 0.0001810370694684742, "loss": 2.8316445350646973, "step": 8165, "token_acc": 0.32005220588044125 }, { "epoch": 4.786866021694518, "grad_norm": 0.3236069300476552, "learning_rate": 0.0001810313903250837, "loss": 2.8675243854522705, "step": 8166, "token_acc": 0.3171432505865716 }, { "epoch": 4.787452360011727, "grad_norm": 0.36629393019166573, "learning_rate": 0.0001810257104205125, "loss": 2.8252575397491455, "step": 8167, "token_acc": 0.3230941692009805 }, { "epoch": 4.788038698328936, "grad_norm": 0.2340781079468918, "learning_rate": 0.00018102002975481393, "loss": 2.8369643688201904, "step": 8168, "token_acc": 0.3216903217519748 }, { "epoch": 4.7886250366461445, "grad_norm": 0.3923471445134877, "learning_rate": 0.0001810143483280413, "loss": 2.875633955001831, "step": 8169, "token_acc": 0.31395536869340235 }, { "epoch": 4.789211374963354, "grad_norm": 0.2586293568970939, "learning_rate": 0.0001810086661402481, "loss": 2.8146908283233643, "step": 8170, "token_acc": 0.32401377960671823 }, { "epoch": 4.789797713280563, "grad_norm": 0.3317381261266541, "learning_rate": 0.00018100298319148757, "loss": 2.8480870723724365, "step": 8171, "token_acc": 0.31824836298611914 }, { "epoch": 4.790384051597772, "grad_norm": 0.2489372070631155, "learning_rate": 0.00018099729948181325, "loss": 2.817063093185425, "step": 8172, "token_acc": 0.322509765625 }, { "epoch": 4.790970389914981, "grad_norm": 0.34581254920205595, "learning_rate": 0.0001809916150112784, "loss": 2.867819309234619, "step": 8173, "token_acc": 0.3156962615445209 }, { "epoch": 4.79155672823219, "grad_norm": 0.23278217159996728, "learning_rate": 0.00018098592977993646, "loss": 2.8557162284851074, "step": 8174, "token_acc": 0.3161999328239147 }, { "epoch": 4.792143066549399, "grad_norm": 0.3014441922280524, "learning_rate": 0.00018098024378784087, "loss": 2.84934663772583, "step": 8175, "token_acc": 0.3183370646534398 }, { "epoch": 4.792729404866608, "grad_norm": 0.25092992170602385, "learning_rate": 0.000180974557035045, "loss": 2.8727641105651855, "step": 8176, "token_acc": 0.3164565504603349 }, { "epoch": 4.793315743183817, "grad_norm": 0.2707209433815732, "learning_rate": 0.00018096886952160226, "loss": 2.8431830406188965, "step": 8177, "token_acc": 0.31908322624378727 }, { "epoch": 4.7939020815010265, "grad_norm": 0.24538217576471189, "learning_rate": 0.00018096318124756613, "loss": 2.8503355979919434, "step": 8178, "token_acc": 0.3182705029008561 }, { "epoch": 4.794488419818235, "grad_norm": 0.21997841638318055, "learning_rate": 0.00018095749221299, "loss": 2.8617210388183594, "step": 8179, "token_acc": 0.31850876194502714 }, { "epoch": 4.795074758135444, "grad_norm": 0.2965800898239356, "learning_rate": 0.00018095180241792732, "loss": 2.832221031188965, "step": 8180, "token_acc": 0.319807231373349 }, { "epoch": 4.795661096452653, "grad_norm": 0.23788493450420323, "learning_rate": 0.0001809461118624315, "loss": 2.9093899726867676, "step": 8181, "token_acc": 0.31061972680304367 }, { "epoch": 4.796247434769862, "grad_norm": 0.25070970081028615, "learning_rate": 0.0001809404205465561, "loss": 2.9116873741149902, "step": 8182, "token_acc": 0.31046585916692737 }, { "epoch": 4.796833773087071, "grad_norm": 0.28191506577997527, "learning_rate": 0.00018093472847035449, "loss": 2.9098711013793945, "step": 8183, "token_acc": 0.3081247404789029 }, { "epoch": 4.79742011140428, "grad_norm": 0.22509699417118398, "learning_rate": 0.00018092903563388015, "loss": 2.8509271144866943, "step": 8184, "token_acc": 0.31772347052512057 }, { "epoch": 4.798006449721489, "grad_norm": 0.29864058113816083, "learning_rate": 0.00018092334203718662, "loss": 2.8774447441101074, "step": 8185, "token_acc": 0.3149316025096222 }, { "epoch": 4.7985927880386985, "grad_norm": 0.2620555822535803, "learning_rate": 0.0001809176476803273, "loss": 2.8750672340393066, "step": 8186, "token_acc": 0.3139500111460899 }, { "epoch": 4.799179126355908, "grad_norm": 0.24996631011354264, "learning_rate": 0.0001809119525633557, "loss": 2.8366427421569824, "step": 8187, "token_acc": 0.3221414841258106 }, { "epoch": 4.799765464673117, "grad_norm": 0.30094249500738574, "learning_rate": 0.00018090625668632537, "loss": 2.855557441711426, "step": 8188, "token_acc": 0.31678514668893204 }, { "epoch": 4.800351802990326, "grad_norm": 0.2514099446714227, "learning_rate": 0.00018090056004928977, "loss": 2.8482189178466797, "step": 8189, "token_acc": 0.3199554933239986 }, { "epoch": 4.800938141307535, "grad_norm": 0.2526603848329596, "learning_rate": 0.00018089486265230245, "loss": 2.820018768310547, "step": 8190, "token_acc": 0.3227026264853294 }, { "epoch": 4.801524479624743, "grad_norm": 0.24290049582219164, "learning_rate": 0.00018088916449541688, "loss": 2.8114962577819824, "step": 8191, "token_acc": 0.323745375355197 }, { "epoch": 4.802110817941952, "grad_norm": 0.28825989497942844, "learning_rate": 0.0001808834655786866, "loss": 2.848357677459717, "step": 8192, "token_acc": 0.3189894573785611 }, { "epoch": 4.802697156259161, "grad_norm": 0.24082759248248137, "learning_rate": 0.00018087776590216517, "loss": 2.8723549842834473, "step": 8193, "token_acc": 0.3159647698299001 }, { "epoch": 4.8032834945763705, "grad_norm": 0.2668897304549686, "learning_rate": 0.0001808720654659061, "loss": 2.8832757472991943, "step": 8194, "token_acc": 0.3136895527165622 }, { "epoch": 4.80386983289358, "grad_norm": 0.219158410487806, "learning_rate": 0.00018086636426996292, "loss": 2.8898842334747314, "step": 8195, "token_acc": 0.31334483266914753 }, { "epoch": 4.804456171210789, "grad_norm": 0.2520977482353172, "learning_rate": 0.00018086066231438929, "loss": 2.867830514907837, "step": 8196, "token_acc": 0.3176000827445046 }, { "epoch": 4.805042509527998, "grad_norm": 0.2260520245413571, "learning_rate": 0.00018085495959923863, "loss": 2.820164203643799, "step": 8197, "token_acc": 0.32319370780107615 }, { "epoch": 4.805628847845207, "grad_norm": 0.24143640447731016, "learning_rate": 0.00018084925612456463, "loss": 2.8762216567993164, "step": 8198, "token_acc": 0.3132154620261992 }, { "epoch": 4.806215186162416, "grad_norm": 0.23377367250215608, "learning_rate": 0.0001808435518904208, "loss": 2.85361385345459, "step": 8199, "token_acc": 0.3176249882750211 }, { "epoch": 4.806801524479624, "grad_norm": 0.25714854874495435, "learning_rate": 0.0001808378468968607, "loss": 2.8985462188720703, "step": 8200, "token_acc": 0.3124442888068254 }, { "epoch": 4.807387862796833, "grad_norm": 0.25446293412333043, "learning_rate": 0.000180832141143938, "loss": 2.847727060317993, "step": 8201, "token_acc": 0.3188911693533571 }, { "epoch": 4.8079742011140425, "grad_norm": 0.22141394356354235, "learning_rate": 0.00018082643463170626, "loss": 2.8736815452575684, "step": 8202, "token_acc": 0.3144927806439756 }, { "epoch": 4.808560539431252, "grad_norm": 0.2388494738863037, "learning_rate": 0.00018082072736021906, "loss": 2.850316286087036, "step": 8203, "token_acc": 0.31842846313551926 }, { "epoch": 4.809146877748461, "grad_norm": 0.2416443549327943, "learning_rate": 0.00018081501932953005, "loss": 2.8625502586364746, "step": 8204, "token_acc": 0.3155276396614976 }, { "epoch": 4.80973321606567, "grad_norm": 0.22852947195773407, "learning_rate": 0.00018080931053969282, "loss": 2.8500723838806152, "step": 8205, "token_acc": 0.32011872666943175 }, { "epoch": 4.810319554382879, "grad_norm": 0.2640730853027657, "learning_rate": 0.00018080360099076098, "loss": 2.890326499938965, "step": 8206, "token_acc": 0.31345769654637956 }, { "epoch": 4.810905892700088, "grad_norm": 0.23338253996819766, "learning_rate": 0.00018079789068278825, "loss": 2.8558266162872314, "step": 8207, "token_acc": 0.31727379235516784 }, { "epoch": 4.811492231017297, "grad_norm": 0.26815192656086667, "learning_rate": 0.0001807921796158282, "loss": 2.851827621459961, "step": 8208, "token_acc": 0.3200605617410471 }, { "epoch": 4.812078569334506, "grad_norm": 0.2680973211187716, "learning_rate": 0.00018078646778993447, "loss": 2.8473615646362305, "step": 8209, "token_acc": 0.32104972505829915 }, { "epoch": 4.812664907651715, "grad_norm": 0.22936740359740326, "learning_rate": 0.00018078075520516076, "loss": 2.8656771183013916, "step": 8210, "token_acc": 0.315752506554218 }, { "epoch": 4.8132512459689245, "grad_norm": 0.2562788778282395, "learning_rate": 0.0001807750418615607, "loss": 2.872614860534668, "step": 8211, "token_acc": 0.31601683014809906 }, { "epoch": 4.813837584286133, "grad_norm": 0.23169224743021596, "learning_rate": 0.00018076932775918796, "loss": 2.8568992614746094, "step": 8212, "token_acc": 0.3179971944810837 }, { "epoch": 4.814423922603342, "grad_norm": 0.26156465953851854, "learning_rate": 0.0001807636128980962, "loss": 2.8486838340759277, "step": 8213, "token_acc": 0.3193243499321183 }, { "epoch": 4.815010260920551, "grad_norm": 0.24189732538383077, "learning_rate": 0.00018075789727833917, "loss": 2.8999156951904297, "step": 8214, "token_acc": 0.31061379097093383 }, { "epoch": 4.81559659923776, "grad_norm": 0.23957126575589463, "learning_rate": 0.0001807521808999705, "loss": 2.8752529621124268, "step": 8215, "token_acc": 0.3129721328658691 }, { "epoch": 4.816182937554969, "grad_norm": 0.26684385286455187, "learning_rate": 0.0001807464637630439, "loss": 2.8746471405029297, "step": 8216, "token_acc": 0.3151582578607831 }, { "epoch": 4.816769275872178, "grad_norm": 0.25725339910232115, "learning_rate": 0.0001807407458676131, "loss": 2.8593287467956543, "step": 8217, "token_acc": 0.31664378923961756 }, { "epoch": 4.817355614189387, "grad_norm": 0.27782955888054767, "learning_rate": 0.00018073502721373177, "loss": 2.883793830871582, "step": 8218, "token_acc": 0.31404211903062734 }, { "epoch": 4.8179419525065965, "grad_norm": 0.28777892804597327, "learning_rate": 0.00018072930780145367, "loss": 2.8663835525512695, "step": 8219, "token_acc": 0.3161315015619651 }, { "epoch": 4.818528290823806, "grad_norm": 0.3072605752691343, "learning_rate": 0.00018072358763083251, "loss": 2.862708568572998, "step": 8220, "token_acc": 0.31613593542834545 }, { "epoch": 4.819114629141015, "grad_norm": 0.25538760818729805, "learning_rate": 0.00018071786670192198, "loss": 2.8655238151550293, "step": 8221, "token_acc": 0.3167420582281144 }, { "epoch": 4.819700967458223, "grad_norm": 0.24224807924943095, "learning_rate": 0.0001807121450147759, "loss": 2.8618836402893066, "step": 8222, "token_acc": 0.3178090818952842 }, { "epoch": 4.820287305775432, "grad_norm": 0.2362867878307459, "learning_rate": 0.00018070642256944795, "loss": 2.843153715133667, "step": 8223, "token_acc": 0.3198684755187953 }, { "epoch": 4.820873644092641, "grad_norm": 0.22960889386403519, "learning_rate": 0.00018070069936599196, "loss": 2.8467721939086914, "step": 8224, "token_acc": 0.3194806081991808 }, { "epoch": 4.82145998240985, "grad_norm": 0.22110122169758384, "learning_rate": 0.00018069497540446162, "loss": 2.8222007751464844, "step": 8225, "token_acc": 0.3232364435438241 }, { "epoch": 4.822046320727059, "grad_norm": 0.2540249124375196, "learning_rate": 0.00018068925068491073, "loss": 2.8849048614501953, "step": 8226, "token_acc": 0.31421429030255016 }, { "epoch": 4.8226326590442685, "grad_norm": 0.2639050299425887, "learning_rate": 0.00018068352520739304, "loss": 2.9070823192596436, "step": 8227, "token_acc": 0.3109053157862116 }, { "epoch": 4.823218997361478, "grad_norm": 0.23078505436481792, "learning_rate": 0.00018067779897196237, "loss": 2.877955436706543, "step": 8228, "token_acc": 0.31353088852912886 }, { "epoch": 4.823805335678687, "grad_norm": 0.23496549610824452, "learning_rate": 0.00018067207197867247, "loss": 2.8433361053466797, "step": 8229, "token_acc": 0.3188719554139962 }, { "epoch": 4.824391673995896, "grad_norm": 0.25530349951740045, "learning_rate": 0.0001806663442275772, "loss": 2.8697896003723145, "step": 8230, "token_acc": 0.31594890132669984 }, { "epoch": 4.824978012313105, "grad_norm": 0.2694179582986932, "learning_rate": 0.0001806606157187303, "loss": 2.8228821754455566, "step": 8231, "token_acc": 0.32266536402935136 }, { "epoch": 4.825564350630314, "grad_norm": 0.2251553767729195, "learning_rate": 0.0001806548864521856, "loss": 2.8572213649749756, "step": 8232, "token_acc": 0.3171362264783538 }, { "epoch": 4.826150688947523, "grad_norm": 0.23551163610864667, "learning_rate": 0.00018064915642799692, "loss": 2.8482768535614014, "step": 8233, "token_acc": 0.3182484141168023 }, { "epoch": 4.826737027264731, "grad_norm": 0.2216757676955962, "learning_rate": 0.0001806434256462181, "loss": 2.842334270477295, "step": 8234, "token_acc": 0.31985196382496284 }, { "epoch": 4.8273233655819405, "grad_norm": 0.2566188644556497, "learning_rate": 0.000180637694106903, "loss": 2.879251718521118, "step": 8235, "token_acc": 0.3137808665962374 }, { "epoch": 4.82790970389915, "grad_norm": 0.3068648597098306, "learning_rate": 0.00018063196181010538, "loss": 2.8713197708129883, "step": 8236, "token_acc": 0.31493075513376 }, { "epoch": 4.828496042216359, "grad_norm": 0.28655843664163927, "learning_rate": 0.00018062622875587915, "loss": 2.8696413040161133, "step": 8237, "token_acc": 0.314733687501178 }, { "epoch": 4.829082380533568, "grad_norm": 0.20986594432373837, "learning_rate": 0.00018062049494427815, "loss": 2.869027614593506, "step": 8238, "token_acc": 0.3153961251311162 }, { "epoch": 4.829668718850777, "grad_norm": 0.298237725812062, "learning_rate": 0.00018061476037535624, "loss": 2.8465917110443115, "step": 8239, "token_acc": 0.320963109641183 }, { "epoch": 4.830255057167986, "grad_norm": 0.2268527458653415, "learning_rate": 0.00018060902504916725, "loss": 2.8734958171844482, "step": 8240, "token_acc": 0.31613198634769174 }, { "epoch": 4.830841395485195, "grad_norm": 0.2638291078469678, "learning_rate": 0.00018060328896576513, "loss": 2.8857178688049316, "step": 8241, "token_acc": 0.3135397334451528 }, { "epoch": 4.831427733802404, "grad_norm": 0.2696195367090812, "learning_rate": 0.00018059755212520372, "loss": 2.9038708209991455, "step": 8242, "token_acc": 0.30950549913615105 }, { "epoch": 4.8320140721196125, "grad_norm": 0.22436018974786645, "learning_rate": 0.0001805918145275369, "loss": 2.861236095428467, "step": 8243, "token_acc": 0.3171980968165824 }, { "epoch": 4.832600410436822, "grad_norm": 0.26679029840985113, "learning_rate": 0.0001805860761728186, "loss": 2.88057017326355, "step": 8244, "token_acc": 0.3141044921121205 }, { "epoch": 4.833186748754031, "grad_norm": 0.2226249569287407, "learning_rate": 0.0001805803370611027, "loss": 2.8849852085113525, "step": 8245, "token_acc": 0.3133791653015715 }, { "epoch": 4.83377308707124, "grad_norm": 0.2586694075073, "learning_rate": 0.00018057459719244312, "loss": 2.864267587661743, "step": 8246, "token_acc": 0.31807542262678806 }, { "epoch": 4.834359425388449, "grad_norm": 0.24423887136021102, "learning_rate": 0.00018056885656689376, "loss": 2.859689474105835, "step": 8247, "token_acc": 0.31816064309239034 }, { "epoch": 4.834945763705658, "grad_norm": 0.25830508285499704, "learning_rate": 0.00018056311518450854, "loss": 2.878535747528076, "step": 8248, "token_acc": 0.3135531759489391 }, { "epoch": 4.835532102022867, "grad_norm": 0.24118887532754674, "learning_rate": 0.00018055737304534147, "loss": 2.830040216445923, "step": 8249, "token_acc": 0.3210832161678282 }, { "epoch": 4.836118440340076, "grad_norm": 0.23108683381840714, "learning_rate": 0.0001805516301494464, "loss": 2.8736555576324463, "step": 8250, "token_acc": 0.3146463699258148 }, { "epoch": 4.836704778657285, "grad_norm": 0.2532612232141292, "learning_rate": 0.0001805458864968773, "loss": 2.892242431640625, "step": 8251, "token_acc": 0.3121503277524947 }, { "epoch": 4.8372911169744945, "grad_norm": 0.25013810059464525, "learning_rate": 0.00018054014208768813, "loss": 2.8634836673736572, "step": 8252, "token_acc": 0.31592235956090314 }, { "epoch": 4.837877455291704, "grad_norm": 0.2579046103567225, "learning_rate": 0.0001805343969219329, "loss": 2.83475399017334, "step": 8253, "token_acc": 0.32222596803436 }, { "epoch": 4.838463793608913, "grad_norm": 0.24573011345980175, "learning_rate": 0.0001805286509996655, "loss": 2.8755555152893066, "step": 8254, "token_acc": 0.31531732968397685 }, { "epoch": 4.839050131926121, "grad_norm": 0.2503673896450783, "learning_rate": 0.00018052290432093993, "loss": 2.8335912227630615, "step": 8255, "token_acc": 0.3232205639246548 }, { "epoch": 4.83963647024333, "grad_norm": 0.23326978696579043, "learning_rate": 0.0001805171568858102, "loss": 2.8716824054718018, "step": 8256, "token_acc": 0.3158763774722711 }, { "epoch": 4.840222808560539, "grad_norm": 0.2471043054527452, "learning_rate": 0.00018051140869433026, "loss": 2.8203985691070557, "step": 8257, "token_acc": 0.3210322026438726 }, { "epoch": 4.840809146877748, "grad_norm": 0.2425622949435863, "learning_rate": 0.00018050565974655412, "loss": 2.84857177734375, "step": 8258, "token_acc": 0.318638694353548 }, { "epoch": 4.841395485194957, "grad_norm": 0.2490927332105346, "learning_rate": 0.0001804999100425358, "loss": 2.8525524139404297, "step": 8259, "token_acc": 0.3186320091689553 }, { "epoch": 4.8419818235121665, "grad_norm": 0.23407265553562537, "learning_rate": 0.00018049415958232932, "loss": 2.8574249744415283, "step": 8260, "token_acc": 0.31782524417409186 }, { "epoch": 4.842568161829376, "grad_norm": 0.24844312271338953, "learning_rate": 0.00018048840836598867, "loss": 2.859438180923462, "step": 8261, "token_acc": 0.3174997722569982 }, { "epoch": 4.843154500146585, "grad_norm": 0.22160060909432833, "learning_rate": 0.00018048265639356785, "loss": 2.8656527996063232, "step": 8262, "token_acc": 0.3160205681183318 }, { "epoch": 4.843740838463794, "grad_norm": 0.22148171837567276, "learning_rate": 0.00018047690366512095, "loss": 2.854942798614502, "step": 8263, "token_acc": 0.31832615105466094 }, { "epoch": 4.844327176781003, "grad_norm": 0.22503645265857317, "learning_rate": 0.00018047115018070197, "loss": 2.8434882164001465, "step": 8264, "token_acc": 0.3183701004840454 }, { "epoch": 4.844913515098211, "grad_norm": 0.24966726732358113, "learning_rate": 0.000180465395940365, "loss": 2.876203775405884, "step": 8265, "token_acc": 0.3145159541595741 }, { "epoch": 4.84549985341542, "grad_norm": 0.27436033696318435, "learning_rate": 0.00018045964094416406, "loss": 2.8579604625701904, "step": 8266, "token_acc": 0.3177842341511406 }, { "epoch": 4.8460861917326294, "grad_norm": 0.34539052750169497, "learning_rate": 0.0001804538851921532, "loss": 2.8718719482421875, "step": 8267, "token_acc": 0.31489252667550405 }, { "epoch": 4.846672530049839, "grad_norm": 0.32237752319390384, "learning_rate": 0.0001804481286843865, "loss": 2.8554272651672363, "step": 8268, "token_acc": 0.3163815668712613 }, { "epoch": 4.847258868367048, "grad_norm": 0.24655865713050729, "learning_rate": 0.00018044237142091803, "loss": 2.898087501525879, "step": 8269, "token_acc": 0.31197622893000226 }, { "epoch": 4.847845206684257, "grad_norm": 0.3117384981293464, "learning_rate": 0.0001804366134018019, "loss": 2.846686601638794, "step": 8270, "token_acc": 0.32020691766466675 }, { "epoch": 4.848431545001466, "grad_norm": 0.3325263281692976, "learning_rate": 0.00018043085462709216, "loss": 2.870121479034424, "step": 8271, "token_acc": 0.315547755586316 }, { "epoch": 4.849017883318675, "grad_norm": 0.26219748966507395, "learning_rate": 0.0001804250950968429, "loss": 2.9362940788269043, "step": 8272, "token_acc": 0.3064223191332133 }, { "epoch": 4.849604221635884, "grad_norm": 0.26897445707930684, "learning_rate": 0.00018041933481110828, "loss": 2.8543896675109863, "step": 8273, "token_acc": 0.31782677425687333 }, { "epoch": 4.850190559953093, "grad_norm": 0.2553256807560734, "learning_rate": 0.0001804135737699424, "loss": 2.8379414081573486, "step": 8274, "token_acc": 0.3211551675253453 }, { "epoch": 4.850776898270302, "grad_norm": 0.3114082036782669, "learning_rate": 0.00018040781197339932, "loss": 2.8789148330688477, "step": 8275, "token_acc": 0.3135999916265877 }, { "epoch": 4.8513632365875115, "grad_norm": 0.2730560220550665, "learning_rate": 0.0001804020494215332, "loss": 2.8654305934906006, "step": 8276, "token_acc": 0.3151163156404961 }, { "epoch": 4.85194957490472, "grad_norm": 0.2470158961694744, "learning_rate": 0.00018039628611439814, "loss": 2.8697290420532227, "step": 8277, "token_acc": 0.3130341601108178 }, { "epoch": 4.852535913221929, "grad_norm": 0.2565056776298764, "learning_rate": 0.00018039052205204837, "loss": 2.837745189666748, "step": 8278, "token_acc": 0.32126750687596867 }, { "epoch": 4.853122251539138, "grad_norm": 0.21918859869281512, "learning_rate": 0.00018038475723453792, "loss": 2.843376636505127, "step": 8279, "token_acc": 0.3194130065729334 }, { "epoch": 4.853708589856347, "grad_norm": 0.24356065018212725, "learning_rate": 0.000180378991661921, "loss": 2.8585290908813477, "step": 8280, "token_acc": 0.31841282409689176 }, { "epoch": 4.854294928173556, "grad_norm": 0.2367040909788465, "learning_rate": 0.00018037322533425178, "loss": 2.901815414428711, "step": 8281, "token_acc": 0.31010659551705916 }, { "epoch": 4.854881266490765, "grad_norm": 0.2623951539818655, "learning_rate": 0.0001803674582515844, "loss": 2.885009527206421, "step": 8282, "token_acc": 0.31317041973990556 }, { "epoch": 4.855467604807974, "grad_norm": 0.24151255188138437, "learning_rate": 0.0001803616904139731, "loss": 2.8508739471435547, "step": 8283, "token_acc": 0.31892035402882596 }, { "epoch": 4.8560539431251835, "grad_norm": 0.24142413826738005, "learning_rate": 0.00018035592182147193, "loss": 2.8432466983795166, "step": 8284, "token_acc": 0.32071092316911376 }, { "epoch": 4.856640281442393, "grad_norm": 0.25038561087397676, "learning_rate": 0.0001803501524741352, "loss": 2.845156669616699, "step": 8285, "token_acc": 0.3183360282386619 }, { "epoch": 4.857226619759601, "grad_norm": 0.2688844123631264, "learning_rate": 0.00018034438237201706, "loss": 2.9217746257781982, "step": 8286, "token_acc": 0.3069184300314486 }, { "epoch": 4.85781295807681, "grad_norm": 0.26061201656189176, "learning_rate": 0.0001803386115151717, "loss": 2.879915952682495, "step": 8287, "token_acc": 0.3138979157554078 }, { "epoch": 4.858399296394019, "grad_norm": 0.2434241639900745, "learning_rate": 0.00018033283990365336, "loss": 2.8732175827026367, "step": 8288, "token_acc": 0.3150662841596854 }, { "epoch": 4.858985634711228, "grad_norm": 0.2636781934330705, "learning_rate": 0.0001803270675375162, "loss": 2.8783469200134277, "step": 8289, "token_acc": 0.3143435427363254 }, { "epoch": 4.859571973028437, "grad_norm": 0.2412022578869104, "learning_rate": 0.00018032129441681455, "loss": 2.8917031288146973, "step": 8290, "token_acc": 0.3122230485575284 }, { "epoch": 4.860158311345646, "grad_norm": 0.251784973091972, "learning_rate": 0.0001803155205416025, "loss": 2.8607735633850098, "step": 8291, "token_acc": 0.31629122311909796 }, { "epoch": 4.8607446496628555, "grad_norm": 0.31277765136428076, "learning_rate": 0.0001803097459119344, "loss": 2.8644590377807617, "step": 8292, "token_acc": 0.316609756480179 }, { "epoch": 4.861330987980065, "grad_norm": 0.23496105136535844, "learning_rate": 0.00018030397052786444, "loss": 2.8517866134643555, "step": 8293, "token_acc": 0.3193561599387224 }, { "epoch": 4.861917326297274, "grad_norm": 0.2514984031302117, "learning_rate": 0.00018029819438944688, "loss": 2.8404793739318848, "step": 8294, "token_acc": 0.321150203397199 }, { "epoch": 4.862503664614483, "grad_norm": 0.2345558256257672, "learning_rate": 0.00018029241749673602, "loss": 2.8161051273345947, "step": 8295, "token_acc": 0.324529119428893 }, { "epoch": 4.863090002931692, "grad_norm": 0.24003916718476534, "learning_rate": 0.00018028663984978607, "loss": 2.8749234676361084, "step": 8296, "token_acc": 0.3157562037722393 }, { "epoch": 4.863676341248901, "grad_norm": 0.22372755736968827, "learning_rate": 0.00018028086144865132, "loss": 2.8817644119262695, "step": 8297, "token_acc": 0.31306175947675097 }, { "epoch": 4.86426267956611, "grad_norm": 0.24911554432421226, "learning_rate": 0.00018027508229338609, "loss": 2.8640637397766113, "step": 8298, "token_acc": 0.3152502706489206 }, { "epoch": 4.864849017883318, "grad_norm": 0.2365267375596786, "learning_rate": 0.0001802693023840446, "loss": 2.862915515899658, "step": 8299, "token_acc": 0.3156112499933655 }, { "epoch": 4.8654353562005275, "grad_norm": 0.2465057550603483, "learning_rate": 0.0001802635217206812, "loss": 2.845059871673584, "step": 8300, "token_acc": 0.32128665342970675 }, { "epoch": 4.866021694517737, "grad_norm": 0.23719947427105276, "learning_rate": 0.00018025774030335016, "loss": 2.88051700592041, "step": 8301, "token_acc": 0.31389478687591243 }, { "epoch": 4.866608032834946, "grad_norm": 0.22523795178569206, "learning_rate": 0.00018025195813210578, "loss": 2.847609758377075, "step": 8302, "token_acc": 0.31831767605746764 }, { "epoch": 4.867194371152155, "grad_norm": 0.24587668926707795, "learning_rate": 0.00018024617520700243, "loss": 2.8776755332946777, "step": 8303, "token_acc": 0.31404801879583955 }, { "epoch": 4.867780709469364, "grad_norm": 0.2318335054382226, "learning_rate": 0.00018024039152809436, "loss": 2.867398738861084, "step": 8304, "token_acc": 0.3170485157085939 }, { "epoch": 4.868367047786573, "grad_norm": 0.24106518459400422, "learning_rate": 0.00018023460709543597, "loss": 2.8649697303771973, "step": 8305, "token_acc": 0.3154857752804883 }, { "epoch": 4.868953386103782, "grad_norm": 0.2692996784923357, "learning_rate": 0.00018022882190908156, "loss": 2.882925510406494, "step": 8306, "token_acc": 0.31291861699721385 }, { "epoch": 4.869539724420991, "grad_norm": 0.24226530823021974, "learning_rate": 0.00018022303596908548, "loss": 2.8986101150512695, "step": 8307, "token_acc": 0.3103310125558258 }, { "epoch": 4.8701260627381995, "grad_norm": 0.26374124541814953, "learning_rate": 0.00018021724927550205, "loss": 2.8671793937683105, "step": 8308, "token_acc": 0.315154242592974 }, { "epoch": 4.870712401055409, "grad_norm": 0.2436113019715859, "learning_rate": 0.00018021146182838567, "loss": 2.839139223098755, "step": 8309, "token_acc": 0.3204626156089057 }, { "epoch": 4.871298739372618, "grad_norm": 0.23228069491711262, "learning_rate": 0.00018020567362779071, "loss": 2.8331470489501953, "step": 8310, "token_acc": 0.3216310001430174 }, { "epoch": 4.871885077689827, "grad_norm": 0.2463145013589297, "learning_rate": 0.0001801998846737715, "loss": 2.858738422393799, "step": 8311, "token_acc": 0.3177758586188375 }, { "epoch": 4.872471416007036, "grad_norm": 0.23723800238275622, "learning_rate": 0.00018019409496638247, "loss": 2.8994221687316895, "step": 8312, "token_acc": 0.3099905309345034 }, { "epoch": 4.873057754324245, "grad_norm": 0.250814793198358, "learning_rate": 0.00018018830450567793, "loss": 2.9055447578430176, "step": 8313, "token_acc": 0.3108170054316789 }, { "epoch": 4.873644092641454, "grad_norm": 0.27398209307191984, "learning_rate": 0.0001801825132917124, "loss": 2.879744052886963, "step": 8314, "token_acc": 0.3138493930217388 }, { "epoch": 4.874230430958663, "grad_norm": 0.22359184827465498, "learning_rate": 0.00018017672132454018, "loss": 2.8784992694854736, "step": 8315, "token_acc": 0.3132211215806287 }, { "epoch": 4.874816769275872, "grad_norm": 0.2762913441351406, "learning_rate": 0.00018017092860421566, "loss": 2.8213250637054443, "step": 8316, "token_acc": 0.3228874011997838 }, { "epoch": 4.8754031075930815, "grad_norm": 0.27911287184463435, "learning_rate": 0.00018016513513079334, "loss": 2.8378982543945312, "step": 8317, "token_acc": 0.3210342899160099 }, { "epoch": 4.875989445910291, "grad_norm": 0.3467151263096626, "learning_rate": 0.00018015934090432757, "loss": 2.879188060760498, "step": 8318, "token_acc": 0.31426505794368764 }, { "epoch": 4.8765757842275, "grad_norm": 0.41651856958443784, "learning_rate": 0.00018015354592487283, "loss": 2.894289970397949, "step": 8319, "token_acc": 0.31249780912858593 }, { "epoch": 4.877162122544708, "grad_norm": 0.25184072688833115, "learning_rate": 0.00018014775019248354, "loss": 2.9014859199523926, "step": 8320, "token_acc": 0.3118942919352773 }, { "epoch": 4.877748460861917, "grad_norm": 0.4366234877037817, "learning_rate": 0.00018014195370721412, "loss": 2.869117021560669, "step": 8321, "token_acc": 0.3143454485283689 }, { "epoch": 4.878334799179126, "grad_norm": 0.3519327963883528, "learning_rate": 0.00018013615646911903, "loss": 2.82499361038208, "step": 8322, "token_acc": 0.3220378060179883 }, { "epoch": 4.878921137496335, "grad_norm": 0.33397018448864185, "learning_rate": 0.00018013035847825278, "loss": 2.8657753467559814, "step": 8323, "token_acc": 0.31717150526355425 }, { "epoch": 4.879507475813544, "grad_norm": 0.3009519254263361, "learning_rate": 0.00018012455973466973, "loss": 2.842866897583008, "step": 8324, "token_acc": 0.32038674467318284 }, { "epoch": 4.8800938141307535, "grad_norm": 0.3443367633797972, "learning_rate": 0.00018011876023842443, "loss": 2.871098041534424, "step": 8325, "token_acc": 0.31643414811547776 }, { "epoch": 4.880680152447963, "grad_norm": 0.24497043106893787, "learning_rate": 0.00018011295998957136, "loss": 2.9212915897369385, "step": 8326, "token_acc": 0.3088777353944991 }, { "epoch": 4.881266490765172, "grad_norm": 0.316984883994554, "learning_rate": 0.00018010715898816496, "loss": 2.8661885261535645, "step": 8327, "token_acc": 0.31717082662757545 }, { "epoch": 4.881852829082381, "grad_norm": 0.24788825320846958, "learning_rate": 0.00018010135723425975, "loss": 2.8803391456604004, "step": 8328, "token_acc": 0.3138902779346055 }, { "epoch": 4.88243916739959, "grad_norm": 0.2602679117072649, "learning_rate": 0.00018009555472791024, "loss": 2.87166690826416, "step": 8329, "token_acc": 0.315174309717123 }, { "epoch": 4.883025505716798, "grad_norm": 0.31036588749503774, "learning_rate": 0.00018008975146917094, "loss": 2.848505973815918, "step": 8330, "token_acc": 0.31965453121420234 }, { "epoch": 4.883611844034007, "grad_norm": 0.21788158773523006, "learning_rate": 0.00018008394745809632, "loss": 2.8551535606384277, "step": 8331, "token_acc": 0.31942335256912363 }, { "epoch": 4.884198182351216, "grad_norm": 0.26274104727704695, "learning_rate": 0.00018007814269474092, "loss": 2.8808937072753906, "step": 8332, "token_acc": 0.31517946944933367 }, { "epoch": 4.8847845206684255, "grad_norm": 0.25641079607513045, "learning_rate": 0.00018007233717915927, "loss": 2.819014549255371, "step": 8333, "token_acc": 0.3236232315436507 }, { "epoch": 4.885370858985635, "grad_norm": 0.23555712937131118, "learning_rate": 0.00018006653091140595, "loss": 2.8311595916748047, "step": 8334, "token_acc": 0.3205550882216173 }, { "epoch": 4.885957197302844, "grad_norm": 0.29136000202512463, "learning_rate": 0.00018006072389153545, "loss": 2.8847479820251465, "step": 8335, "token_acc": 0.31392240313407 }, { "epoch": 4.886543535620053, "grad_norm": 0.23515596007198872, "learning_rate": 0.00018005491611960232, "loss": 2.8684260845184326, "step": 8336, "token_acc": 0.31563893266020926 }, { "epoch": 4.887129873937262, "grad_norm": 0.23892637852319623, "learning_rate": 0.00018004910759566114, "loss": 2.8785219192504883, "step": 8337, "token_acc": 0.31416866447779945 }, { "epoch": 4.887716212254471, "grad_norm": 0.24738364982189454, "learning_rate": 0.00018004329831976644, "loss": 2.8637099266052246, "step": 8338, "token_acc": 0.3154155394301029 }, { "epoch": 4.88830255057168, "grad_norm": 0.2241361403139087, "learning_rate": 0.00018003748829197285, "loss": 2.840609073638916, "step": 8339, "token_acc": 0.3190341879260843 }, { "epoch": 4.888888888888889, "grad_norm": 0.24769831876794188, "learning_rate": 0.00018003167751233487, "loss": 2.830899238586426, "step": 8340, "token_acc": 0.32139984662289806 }, { "epoch": 4.889475227206098, "grad_norm": 0.23949859887013486, "learning_rate": 0.00018002586598090713, "loss": 2.8124194145202637, "step": 8341, "token_acc": 0.3246290072423339 }, { "epoch": 4.890061565523307, "grad_norm": 0.22859410466642532, "learning_rate": 0.00018002005369774425, "loss": 2.8836545944213867, "step": 8342, "token_acc": 0.31433519146810085 }, { "epoch": 4.890647903840516, "grad_norm": 0.2505889991202275, "learning_rate": 0.00018001424066290076, "loss": 2.8768460750579834, "step": 8343, "token_acc": 0.3155217826476321 }, { "epoch": 4.891234242157725, "grad_norm": 0.22030788690810008, "learning_rate": 0.0001800084268764313, "loss": 2.869615077972412, "step": 8344, "token_acc": 0.31552486632613047 }, { "epoch": 4.891820580474934, "grad_norm": 0.24817859520213717, "learning_rate": 0.0001800026123383905, "loss": 2.893939971923828, "step": 8345, "token_acc": 0.31317855796643645 }, { "epoch": 4.892406918792143, "grad_norm": 0.2185594769012512, "learning_rate": 0.00017999679704883297, "loss": 2.810187816619873, "step": 8346, "token_acc": 0.32569696807155407 }, { "epoch": 4.892993257109352, "grad_norm": 0.24143949083851032, "learning_rate": 0.0001799909810078133, "loss": 2.9196364879608154, "step": 8347, "token_acc": 0.30799483145836964 }, { "epoch": 4.893579595426561, "grad_norm": 0.225846336782677, "learning_rate": 0.00017998516421538615, "loss": 2.890047073364258, "step": 8348, "token_acc": 0.31326497975655865 }, { "epoch": 4.89416593374377, "grad_norm": 0.23405898896626698, "learning_rate": 0.0001799793466716062, "loss": 2.9187352657318115, "step": 8349, "token_acc": 0.30899945336809354 }, { "epoch": 4.8947522720609795, "grad_norm": 0.23345743570027597, "learning_rate": 0.00017997352837652804, "loss": 2.862542152404785, "step": 8350, "token_acc": 0.3158329101066531 }, { "epoch": 4.895338610378188, "grad_norm": 0.2288312764465444, "learning_rate": 0.00017996770933020634, "loss": 2.874232530593872, "step": 8351, "token_acc": 0.3149623414307114 }, { "epoch": 4.895924948695397, "grad_norm": 0.25099056091449495, "learning_rate": 0.00017996188953269576, "loss": 2.8935036659240723, "step": 8352, "token_acc": 0.31226081970573205 }, { "epoch": 4.896511287012606, "grad_norm": 0.22608767067381705, "learning_rate": 0.00017995606898405103, "loss": 2.8853354454040527, "step": 8353, "token_acc": 0.3130003311316868 }, { "epoch": 4.897097625329815, "grad_norm": 0.2761988276982564, "learning_rate": 0.00017995024768432673, "loss": 2.8745832443237305, "step": 8354, "token_acc": 0.3148744664663632 }, { "epoch": 4.897683963647024, "grad_norm": 0.23446419449030897, "learning_rate": 0.0001799444256335776, "loss": 2.8428549766540527, "step": 8355, "token_acc": 0.3192100895769309 }, { "epoch": 4.898270301964233, "grad_norm": 0.26382609452481925, "learning_rate": 0.0001799386028318583, "loss": 2.8720126152038574, "step": 8356, "token_acc": 0.315113459402122 }, { "epoch": 4.898856640281442, "grad_norm": 0.23617571610873392, "learning_rate": 0.00017993277927922356, "loss": 2.889610528945923, "step": 8357, "token_acc": 0.31346922801624144 }, { "epoch": 4.8994429785986515, "grad_norm": 0.22977684025777068, "learning_rate": 0.00017992695497572806, "loss": 2.8615188598632812, "step": 8358, "token_acc": 0.3171974488538655 }, { "epoch": 4.900029316915861, "grad_norm": 0.24129330942038432, "learning_rate": 0.00017992112992142655, "loss": 2.806392192840576, "step": 8359, "token_acc": 0.3253658904919245 }, { "epoch": 4.90061565523307, "grad_norm": 0.23626231719995006, "learning_rate": 0.0001799153041163737, "loss": 2.8367161750793457, "step": 8360, "token_acc": 0.31884134132819025 }, { "epoch": 4.901201993550279, "grad_norm": 0.2542463727851749, "learning_rate": 0.00017990947756062423, "loss": 2.8421549797058105, "step": 8361, "token_acc": 0.320737015813615 }, { "epoch": 4.901788331867488, "grad_norm": 0.23367954446336806, "learning_rate": 0.00017990365025423292, "loss": 2.8626556396484375, "step": 8362, "token_acc": 0.3170893159541577 }, { "epoch": 4.902374670184696, "grad_norm": 0.24107456560833737, "learning_rate": 0.0001798978221972545, "loss": 2.89695143699646, "step": 8363, "token_acc": 0.3130193758162096 }, { "epoch": 4.902961008501905, "grad_norm": 0.22205478432796627, "learning_rate": 0.0001798919933897437, "loss": 2.9388632774353027, "step": 8364, "token_acc": 0.3057149181629969 }, { "epoch": 4.903547346819114, "grad_norm": 0.24583170317791017, "learning_rate": 0.00017988616383175524, "loss": 2.882878541946411, "step": 8365, "token_acc": 0.3132292641588874 }, { "epoch": 4.9041336851363235, "grad_norm": 0.22301532725874598, "learning_rate": 0.00017988033352334397, "loss": 2.8714468479156494, "step": 8366, "token_acc": 0.3151011658205903 }, { "epoch": 4.904720023453533, "grad_norm": 0.25011571479305245, "learning_rate": 0.0001798745024645646, "loss": 2.8726320266723633, "step": 8367, "token_acc": 0.31377489705738637 }, { "epoch": 4.905306361770742, "grad_norm": 0.29320008303632084, "learning_rate": 0.00017986867065547188, "loss": 2.8680896759033203, "step": 8368, "token_acc": 0.3152449922550408 }, { "epoch": 4.905892700087951, "grad_norm": 0.31867725285958265, "learning_rate": 0.00017986283809612064, "loss": 2.8753085136413574, "step": 8369, "token_acc": 0.31594593034543916 }, { "epoch": 4.90647903840516, "grad_norm": 0.25513645561589104, "learning_rate": 0.00017985700478656563, "loss": 2.8706867694854736, "step": 8370, "token_acc": 0.31672513385909845 }, { "epoch": 4.907065376722369, "grad_norm": 0.2539813547600626, "learning_rate": 0.0001798511707268617, "loss": 2.8828749656677246, "step": 8371, "token_acc": 0.31298156865500365 }, { "epoch": 4.907651715039578, "grad_norm": 0.23029249829891968, "learning_rate": 0.00017984533591706363, "loss": 2.866711139678955, "step": 8372, "token_acc": 0.3161076544338378 }, { "epoch": 4.908238053356786, "grad_norm": 0.26006752445352266, "learning_rate": 0.0001798395003572262, "loss": 2.8342995643615723, "step": 8373, "token_acc": 0.3208169752339252 }, { "epoch": 4.9088243916739955, "grad_norm": 0.22817906499243035, "learning_rate": 0.00017983366404740426, "loss": 2.8753013610839844, "step": 8374, "token_acc": 0.3165643232801957 }, { "epoch": 4.909410729991205, "grad_norm": 0.23486876239497756, "learning_rate": 0.0001798278269876526, "loss": 2.878453254699707, "step": 8375, "token_acc": 0.31363540814543356 }, { "epoch": 4.909997068308414, "grad_norm": 0.2253808898672687, "learning_rate": 0.0001798219891780261, "loss": 2.8972384929656982, "step": 8376, "token_acc": 0.312015641483405 }, { "epoch": 4.910583406625623, "grad_norm": 0.2379170768290947, "learning_rate": 0.00017981615061857955, "loss": 2.873216152191162, "step": 8377, "token_acc": 0.315789062683024 }, { "epoch": 4.911169744942832, "grad_norm": 0.29709065303173665, "learning_rate": 0.00017981031130936785, "loss": 2.8706111907958984, "step": 8378, "token_acc": 0.3143550165380375 }, { "epoch": 4.911756083260041, "grad_norm": 0.29621275029561955, "learning_rate": 0.00017980447125044583, "loss": 2.8557510375976562, "step": 8379, "token_acc": 0.3173814272458698 }, { "epoch": 4.91234242157725, "grad_norm": 0.2633445086959949, "learning_rate": 0.00017979863044186828, "loss": 2.860452651977539, "step": 8380, "token_acc": 0.3178431764336483 }, { "epoch": 4.912928759894459, "grad_norm": 0.2778937269590066, "learning_rate": 0.0001797927888836902, "loss": 2.8566553592681885, "step": 8381, "token_acc": 0.3170729862408569 }, { "epoch": 4.913515098211668, "grad_norm": 0.3038834088678048, "learning_rate": 0.00017978694657596632, "loss": 2.866913318634033, "step": 8382, "token_acc": 0.3164103019312951 }, { "epoch": 4.9141014365288775, "grad_norm": 0.21938384011073933, "learning_rate": 0.00017978110351875163, "loss": 2.8766119480133057, "step": 8383, "token_acc": 0.3162657461365683 }, { "epoch": 4.914687774846087, "grad_norm": 0.23538344192937924, "learning_rate": 0.00017977525971210097, "loss": 2.865816116333008, "step": 8384, "token_acc": 0.3175139161755075 }, { "epoch": 4.915274113163295, "grad_norm": 0.22459320113229408, "learning_rate": 0.00017976941515606925, "loss": 2.8219075202941895, "step": 8385, "token_acc": 0.3227600541023244 }, { "epoch": 4.915860451480504, "grad_norm": 0.22151718451969166, "learning_rate": 0.00017976356985071137, "loss": 2.8622217178344727, "step": 8386, "token_acc": 0.315430179801714 }, { "epoch": 4.916446789797713, "grad_norm": 0.22233044291478105, "learning_rate": 0.0001797577237960822, "loss": 2.915844678878784, "step": 8387, "token_acc": 0.3091933477977636 }, { "epoch": 4.917033128114922, "grad_norm": 0.21736400094775632, "learning_rate": 0.00017975187699223675, "loss": 2.8749098777770996, "step": 8388, "token_acc": 0.3158443814259652 }, { "epoch": 4.917619466432131, "grad_norm": 0.23973876173722167, "learning_rate": 0.00017974602943922983, "loss": 2.84849214553833, "step": 8389, "token_acc": 0.31763861390043624 }, { "epoch": 4.91820580474934, "grad_norm": 0.22841312207726472, "learning_rate": 0.00017974018113711644, "loss": 2.865987777709961, "step": 8390, "token_acc": 0.3173935988741524 }, { "epoch": 4.9187921430665495, "grad_norm": 0.23677695696859857, "learning_rate": 0.0001797343320859515, "loss": 2.889751434326172, "step": 8391, "token_acc": 0.312426859705264 }, { "epoch": 4.919378481383759, "grad_norm": 0.22697427161858655, "learning_rate": 0.00017972848228578996, "loss": 2.9122982025146484, "step": 8392, "token_acc": 0.30895203409335203 }, { "epoch": 4.919964819700968, "grad_norm": 0.22834015936063518, "learning_rate": 0.00017972263173668678, "loss": 2.8649489879608154, "step": 8393, "token_acc": 0.31620692809261847 }, { "epoch": 4.920551158018176, "grad_norm": 0.21463997859439704, "learning_rate": 0.00017971678043869686, "loss": 2.8691301345825195, "step": 8394, "token_acc": 0.3162274038298106 }, { "epoch": 4.921137496335385, "grad_norm": 0.2264994949144399, "learning_rate": 0.00017971092839187526, "loss": 2.8167340755462646, "step": 8395, "token_acc": 0.32415269732146684 }, { "epoch": 4.921723834652594, "grad_norm": 0.2257552581259529, "learning_rate": 0.00017970507559627685, "loss": 2.872645378112793, "step": 8396, "token_acc": 0.31694903346508424 }, { "epoch": 4.922310172969803, "grad_norm": 0.23328961657902544, "learning_rate": 0.00017969922205195667, "loss": 2.872405767440796, "step": 8397, "token_acc": 0.3151759810855178 }, { "epoch": 4.9228965112870124, "grad_norm": 0.2192088463765428, "learning_rate": 0.0001796933677589697, "loss": 2.832584857940674, "step": 8398, "token_acc": 0.3236575967595576 }, { "epoch": 4.923482849604222, "grad_norm": 0.24260608661816396, "learning_rate": 0.0001796875127173709, "loss": 2.893548011779785, "step": 8399, "token_acc": 0.31123765388631874 }, { "epoch": 4.924069187921431, "grad_norm": 0.27769296241341496, "learning_rate": 0.00017968165692721535, "loss": 2.824563980102539, "step": 8400, "token_acc": 0.321175642063783 }, { "epoch": 4.92465552623864, "grad_norm": 0.29811134098093284, "learning_rate": 0.000179675800388558, "loss": 2.9068756103515625, "step": 8401, "token_acc": 0.3093563886026312 }, { "epoch": 4.925241864555849, "grad_norm": 0.249000364576747, "learning_rate": 0.00017966994310145384, "loss": 2.878631114959717, "step": 8402, "token_acc": 0.31276594077405906 }, { "epoch": 4.925828202873058, "grad_norm": 0.2787394495925771, "learning_rate": 0.00017966408506595792, "loss": 2.867664337158203, "step": 8403, "token_acc": 0.31646316988851914 }, { "epoch": 4.926414541190267, "grad_norm": 0.30255537886443484, "learning_rate": 0.0001796582262821253, "loss": 2.871713161468506, "step": 8404, "token_acc": 0.3152598118525957 }, { "epoch": 4.927000879507476, "grad_norm": 0.24076229661336646, "learning_rate": 0.00017965236675001096, "loss": 2.8511435985565186, "step": 8405, "token_acc": 0.3198485288556914 }, { "epoch": 4.927587217824685, "grad_norm": 0.2755282150328066, "learning_rate": 0.00017964650646967, "loss": 2.8566789627075195, "step": 8406, "token_acc": 0.31816040007601654 }, { "epoch": 4.928173556141894, "grad_norm": 0.2126960649711986, "learning_rate": 0.0001796406454411574, "loss": 2.8522043228149414, "step": 8407, "token_acc": 0.3195865765629008 }, { "epoch": 4.928759894459103, "grad_norm": 0.31397777047049447, "learning_rate": 0.00017963478366452828, "loss": 2.8467164039611816, "step": 8408, "token_acc": 0.31936727804450593 }, { "epoch": 4.929346232776312, "grad_norm": 0.31244737987162124, "learning_rate": 0.00017962892113983765, "loss": 2.8915839195251465, "step": 8409, "token_acc": 0.3129559683344544 }, { "epoch": 4.929932571093521, "grad_norm": 0.2533921326833803, "learning_rate": 0.0001796230578671406, "loss": 2.893911361694336, "step": 8410, "token_acc": 0.3120797965016751 }, { "epoch": 4.93051890941073, "grad_norm": 0.2861462755250293, "learning_rate": 0.00017961719384649224, "loss": 2.869845151901245, "step": 8411, "token_acc": 0.3168968683968855 }, { "epoch": 4.931105247727939, "grad_norm": 0.25865957627690706, "learning_rate": 0.00017961132907794763, "loss": 2.8767058849334717, "step": 8412, "token_acc": 0.31476828809345864 }, { "epoch": 4.931691586045148, "grad_norm": 0.32565273995163213, "learning_rate": 0.00017960546356156188, "loss": 2.857372999191284, "step": 8413, "token_acc": 0.3187572705645626 }, { "epoch": 4.932277924362357, "grad_norm": 0.2552855438475177, "learning_rate": 0.00017959959729739003, "loss": 2.849339008331299, "step": 8414, "token_acc": 0.31983503128716967 }, { "epoch": 4.9328642626795665, "grad_norm": 0.2738336538961474, "learning_rate": 0.00017959373028548722, "loss": 2.884718179702759, "step": 8415, "token_acc": 0.3155745781014763 }, { "epoch": 4.933450600996775, "grad_norm": 0.24417145570321924, "learning_rate": 0.0001795878625259086, "loss": 2.891413927078247, "step": 8416, "token_acc": 0.3125787147391112 }, { "epoch": 4.934036939313984, "grad_norm": 0.24757218137037865, "learning_rate": 0.00017958199401870925, "loss": 2.8760080337524414, "step": 8417, "token_acc": 0.31498449208280016 }, { "epoch": 4.934623277631193, "grad_norm": 0.2595627487152961, "learning_rate": 0.0001795761247639443, "loss": 2.8534088134765625, "step": 8418, "token_acc": 0.3180557606346564 }, { "epoch": 4.935209615948402, "grad_norm": 0.2619144923431915, "learning_rate": 0.0001795702547616689, "loss": 2.832172155380249, "step": 8419, "token_acc": 0.3205300226327968 }, { "epoch": 4.935795954265611, "grad_norm": 0.26102097932421314, "learning_rate": 0.00017956438401193814, "loss": 2.8744659423828125, "step": 8420, "token_acc": 0.31579796297982493 }, { "epoch": 4.93638229258282, "grad_norm": 0.22910570857373616, "learning_rate": 0.00017955851251480725, "loss": 2.8690361976623535, "step": 8421, "token_acc": 0.3152141926262244 }, { "epoch": 4.936968630900029, "grad_norm": 0.2614473539724336, "learning_rate": 0.0001795526402703313, "loss": 2.8879055976867676, "step": 8422, "token_acc": 0.3136906731666895 }, { "epoch": 4.9375549692172385, "grad_norm": 0.23684964577174047, "learning_rate": 0.00017954676727856552, "loss": 2.886958122253418, "step": 8423, "token_acc": 0.312734063595016 }, { "epoch": 4.938141307534448, "grad_norm": 0.2490443299057755, "learning_rate": 0.00017954089353956504, "loss": 2.8789305686950684, "step": 8424, "token_acc": 0.31382580580441544 }, { "epoch": 4.938727645851657, "grad_norm": 0.2036345704375411, "learning_rate": 0.00017953501905338507, "loss": 2.8779430389404297, "step": 8425, "token_acc": 0.3141000326904217 }, { "epoch": 4.939313984168866, "grad_norm": 0.24906080426303961, "learning_rate": 0.00017952914382008076, "loss": 2.8541882038116455, "step": 8426, "token_acc": 0.31917883152960863 }, { "epoch": 4.939900322486075, "grad_norm": 0.2364522383358862, "learning_rate": 0.00017952326783970732, "loss": 2.869612693786621, "step": 8427, "token_acc": 0.31597029226021267 }, { "epoch": 4.940486660803283, "grad_norm": 0.23946703920374435, "learning_rate": 0.00017951739111231994, "loss": 2.8637654781341553, "step": 8428, "token_acc": 0.3156103529313132 }, { "epoch": 4.941072999120492, "grad_norm": 0.23108771649079995, "learning_rate": 0.00017951151363797377, "loss": 2.8711001873016357, "step": 8429, "token_acc": 0.3157841457716867 }, { "epoch": 4.941659337437701, "grad_norm": 0.3026029264217738, "learning_rate": 0.00017950563541672412, "loss": 2.8523011207580566, "step": 8430, "token_acc": 0.31890253244355815 }, { "epoch": 4.9422456757549105, "grad_norm": 0.2747685719009976, "learning_rate": 0.00017949975644862616, "loss": 2.83609676361084, "step": 8431, "token_acc": 0.31888526270645706 }, { "epoch": 4.94283201407212, "grad_norm": 0.23844616839270108, "learning_rate": 0.0001794938767337351, "loss": 2.894364356994629, "step": 8432, "token_acc": 0.3114953606689112 }, { "epoch": 4.943418352389329, "grad_norm": 0.3558042910109727, "learning_rate": 0.0001794879962721062, "loss": 2.909543991088867, "step": 8433, "token_acc": 0.30960725013793244 }, { "epoch": 4.944004690706538, "grad_norm": 0.2882064391548642, "learning_rate": 0.0001794821150637947, "loss": 2.8687398433685303, "step": 8434, "token_acc": 0.31630840641947633 }, { "epoch": 4.944591029023747, "grad_norm": 0.24634742643211946, "learning_rate": 0.0001794762331088558, "loss": 2.895592451095581, "step": 8435, "token_acc": 0.31194721216172105 }, { "epoch": 4.945177367340956, "grad_norm": 0.31088095814704864, "learning_rate": 0.00017947035040734478, "loss": 2.857819080352783, "step": 8436, "token_acc": 0.3178397192953762 }, { "epoch": 4.945763705658165, "grad_norm": 0.2160652368025357, "learning_rate": 0.00017946446695931695, "loss": 2.825084686279297, "step": 8437, "token_acc": 0.321468530905637 }, { "epoch": 4.946350043975373, "grad_norm": 0.2958035164682766, "learning_rate": 0.00017945858276482749, "loss": 2.8763270378112793, "step": 8438, "token_acc": 0.31381781670879066 }, { "epoch": 4.9469363822925825, "grad_norm": 0.241168953514544, "learning_rate": 0.00017945269782393173, "loss": 2.8931961059570312, "step": 8439, "token_acc": 0.31105080738332574 }, { "epoch": 4.947522720609792, "grad_norm": 0.2526440014816673, "learning_rate": 0.00017944681213668493, "loss": 2.8589744567871094, "step": 8440, "token_acc": 0.31835809013814836 }, { "epoch": 4.948109058927001, "grad_norm": 0.27369265265725334, "learning_rate": 0.00017944092570314243, "loss": 2.8843092918395996, "step": 8441, "token_acc": 0.3135505218982029 }, { "epoch": 4.94869539724421, "grad_norm": 0.24230636499144806, "learning_rate": 0.00017943503852335942, "loss": 2.8540053367614746, "step": 8442, "token_acc": 0.31693669449785866 }, { "epoch": 4.949281735561419, "grad_norm": 0.27081961341012084, "learning_rate": 0.0001794291505973913, "loss": 2.8884458541870117, "step": 8443, "token_acc": 0.3129586281265616 }, { "epoch": 4.949868073878628, "grad_norm": 0.22472966445766054, "learning_rate": 0.00017942326192529334, "loss": 2.8744518756866455, "step": 8444, "token_acc": 0.3157451186693931 }, { "epoch": 4.950454412195837, "grad_norm": 0.27107364537960604, "learning_rate": 0.00017941737250712082, "loss": 2.852600574493408, "step": 8445, "token_acc": 0.3170322417046412 }, { "epoch": 4.951040750513046, "grad_norm": 0.22211175928585705, "learning_rate": 0.00017941148234292914, "loss": 2.904472827911377, "step": 8446, "token_acc": 0.3110918589831365 }, { "epoch": 4.951627088830255, "grad_norm": 0.22791913004449532, "learning_rate": 0.0001794055914327736, "loss": 2.8633878231048584, "step": 8447, "token_acc": 0.3169164226131051 }, { "epoch": 4.9522134271474645, "grad_norm": 0.21537409102024568, "learning_rate": 0.00017939969977670951, "loss": 2.8988394737243652, "step": 8448, "token_acc": 0.3098271769590448 }, { "epoch": 4.952799765464674, "grad_norm": 0.22624407070317581, "learning_rate": 0.00017939380737479223, "loss": 2.922788619995117, "step": 8449, "token_acc": 0.3086223190323975 }, { "epoch": 4.953386103781882, "grad_norm": 0.24076528414204681, "learning_rate": 0.00017938791422707713, "loss": 2.867844581604004, "step": 8450, "token_acc": 0.31615657307135514 }, { "epoch": 4.953972442099091, "grad_norm": 0.2304792364918695, "learning_rate": 0.00017938202033361954, "loss": 2.8729777336120605, "step": 8451, "token_acc": 0.3142697275614504 }, { "epoch": 4.9545587804163, "grad_norm": 0.24467403822596323, "learning_rate": 0.00017937612569447485, "loss": 2.8968539237976074, "step": 8452, "token_acc": 0.31223510492049905 }, { "epoch": 4.955145118733509, "grad_norm": 0.22995302914132748, "learning_rate": 0.0001793702303096984, "loss": 2.873523712158203, "step": 8453, "token_acc": 0.31530301679677053 }, { "epoch": 4.955731457050718, "grad_norm": 0.2585564071769646, "learning_rate": 0.00017936433417934563, "loss": 2.8777623176574707, "step": 8454, "token_acc": 0.31463056509732606 }, { "epoch": 4.956317795367927, "grad_norm": 0.26014784343606256, "learning_rate": 0.00017935843730347185, "loss": 2.876589059829712, "step": 8455, "token_acc": 0.3132975738355324 }, { "epoch": 4.9569041336851365, "grad_norm": 0.23828140401566972, "learning_rate": 0.00017935253968213245, "loss": 2.8952813148498535, "step": 8456, "token_acc": 0.30933137023347596 }, { "epoch": 4.957490472002346, "grad_norm": 0.24482416436640467, "learning_rate": 0.00017934664131538295, "loss": 2.831240653991699, "step": 8457, "token_acc": 0.32196208624705663 }, { "epoch": 4.958076810319555, "grad_norm": 0.24906862139323488, "learning_rate": 0.00017934074220327863, "loss": 2.854583263397217, "step": 8458, "token_acc": 0.3174457628527493 }, { "epoch": 4.958663148636763, "grad_norm": 0.2149633932163617, "learning_rate": 0.00017933484234587495, "loss": 2.86299991607666, "step": 8459, "token_acc": 0.3183635436079501 }, { "epoch": 4.959249486953972, "grad_norm": 0.24466946577680426, "learning_rate": 0.00017932894174322735, "loss": 2.8612112998962402, "step": 8460, "token_acc": 0.31658846924874273 }, { "epoch": 4.959835825271181, "grad_norm": 0.2220752400546345, "learning_rate": 0.00017932304039539122, "loss": 2.8488755226135254, "step": 8461, "token_acc": 0.31957833941288866 }, { "epoch": 4.96042216358839, "grad_norm": 0.24354049725874863, "learning_rate": 0.000179317138302422, "loss": 2.86130428314209, "step": 8462, "token_acc": 0.3160565163239888 }, { "epoch": 4.961008501905599, "grad_norm": 0.220288200553323, "learning_rate": 0.0001793112354643752, "loss": 2.8464696407318115, "step": 8463, "token_acc": 0.31817335588066464 }, { "epoch": 4.9615948402228085, "grad_norm": 0.23742862660385866, "learning_rate": 0.00017930533188130617, "loss": 2.9064717292785645, "step": 8464, "token_acc": 0.3126001662420071 }, { "epoch": 4.962181178540018, "grad_norm": 0.2572505141016793, "learning_rate": 0.00017929942755327044, "loss": 2.84448504447937, "step": 8465, "token_acc": 0.3185073701447532 }, { "epoch": 4.962767516857227, "grad_norm": 0.24229069797076158, "learning_rate": 0.00017929352248032343, "loss": 2.9053921699523926, "step": 8466, "token_acc": 0.30928349844632513 }, { "epoch": 4.963353855174436, "grad_norm": 0.23707694155670558, "learning_rate": 0.00017928761666252064, "loss": 2.869821071624756, "step": 8467, "token_acc": 0.3155895990169047 }, { "epoch": 4.963940193491645, "grad_norm": 0.23993120982386987, "learning_rate": 0.0001792817100999175, "loss": 2.8758597373962402, "step": 8468, "token_acc": 0.31520305561996226 }, { "epoch": 4.964526531808854, "grad_norm": 0.21953219632247756, "learning_rate": 0.00017927580279256954, "loss": 2.866121768951416, "step": 8469, "token_acc": 0.3176150154250998 }, { "epoch": 4.965112870126063, "grad_norm": 0.21213184587429856, "learning_rate": 0.00017926989474053223, "loss": 2.8481290340423584, "step": 8470, "token_acc": 0.3193162721716748 }, { "epoch": 4.965699208443271, "grad_norm": 0.23950240981379395, "learning_rate": 0.00017926398594386112, "loss": 2.8559980392456055, "step": 8471, "token_acc": 0.3187008151112976 }, { "epoch": 4.9662855467604805, "grad_norm": 0.25205350140802885, "learning_rate": 0.00017925807640261162, "loss": 2.8302392959594727, "step": 8472, "token_acc": 0.32067642947722197 }, { "epoch": 4.96687188507769, "grad_norm": 0.3087549449807468, "learning_rate": 0.00017925216611683933, "loss": 2.9145708084106445, "step": 8473, "token_acc": 0.30979842020102183 }, { "epoch": 4.967458223394899, "grad_norm": 0.3097235146306694, "learning_rate": 0.00017924625508659972, "loss": 2.836543083190918, "step": 8474, "token_acc": 0.32098026693571324 }, { "epoch": 4.968044561712108, "grad_norm": 0.24463342483970554, "learning_rate": 0.00017924034331194834, "loss": 2.8545455932617188, "step": 8475, "token_acc": 0.3191926742664922 }, { "epoch": 4.968630900029317, "grad_norm": 0.2502253549220813, "learning_rate": 0.00017923443079294072, "loss": 2.8544154167175293, "step": 8476, "token_acc": 0.3188576140766107 }, { "epoch": 4.969217238346526, "grad_norm": 0.2987540742125401, "learning_rate": 0.00017922851752963238, "loss": 2.8679263591766357, "step": 8477, "token_acc": 0.3162451161834259 }, { "epoch": 4.969803576663735, "grad_norm": 0.33210370238558434, "learning_rate": 0.00017922260352207884, "loss": 2.8839192390441895, "step": 8478, "token_acc": 0.3128733221695333 }, { "epoch": 4.970389914980944, "grad_norm": 0.2609264166965845, "learning_rate": 0.00017921668877033574, "loss": 2.8879973888397217, "step": 8479, "token_acc": 0.3138327486581299 }, { "epoch": 4.970976253298153, "grad_norm": 0.277628001224917, "learning_rate": 0.00017921077327445859, "loss": 2.8682703971862793, "step": 8480, "token_acc": 0.31493226457873885 }, { "epoch": 4.971562591615362, "grad_norm": 0.27607377640150704, "learning_rate": 0.00017920485703450296, "loss": 2.846822500228882, "step": 8481, "token_acc": 0.3198889767548332 }, { "epoch": 4.972148929932571, "grad_norm": 0.23867597857746908, "learning_rate": 0.00017919894005052442, "loss": 2.8481671810150146, "step": 8482, "token_acc": 0.3195922858936277 }, { "epoch": 4.97273526824978, "grad_norm": 0.2557897899228066, "learning_rate": 0.00017919302232257856, "loss": 2.879589796066284, "step": 8483, "token_acc": 0.31282720475688913 }, { "epoch": 4.973321606566989, "grad_norm": 0.23800072107441336, "learning_rate": 0.000179187103850721, "loss": 2.861189842224121, "step": 8484, "token_acc": 0.3163650763050535 }, { "epoch": 4.973907944884198, "grad_norm": 0.24673257717820166, "learning_rate": 0.00017918118463500725, "loss": 2.9156742095947266, "step": 8485, "token_acc": 0.30818791742100377 }, { "epoch": 4.974494283201407, "grad_norm": 0.2389108879643748, "learning_rate": 0.00017917526467549298, "loss": 2.903752326965332, "step": 8486, "token_acc": 0.31066099278639764 }, { "epoch": 4.975080621518616, "grad_norm": 0.22200841917961095, "learning_rate": 0.00017916934397223383, "loss": 2.8764190673828125, "step": 8487, "token_acc": 0.31592107667979585 }, { "epoch": 4.975666959835825, "grad_norm": 0.24631251858129105, "learning_rate": 0.00017916342252528535, "loss": 2.8493995666503906, "step": 8488, "token_acc": 0.3204835589941973 }, { "epoch": 4.9762532981530345, "grad_norm": 0.2417674400421058, "learning_rate": 0.00017915750033470319, "loss": 2.840669631958008, "step": 8489, "token_acc": 0.3202059784758217 }, { "epoch": 4.976839636470244, "grad_norm": 0.22371602763045198, "learning_rate": 0.000179151577400543, "loss": 2.8537039756774902, "step": 8490, "token_acc": 0.3183531075421707 }, { "epoch": 4.977425974787453, "grad_norm": 0.24517790025225092, "learning_rate": 0.00017914565372286037, "loss": 2.8857383728027344, "step": 8491, "token_acc": 0.31477932810347925 }, { "epoch": 4.978012313104662, "grad_norm": 0.2303879716774955, "learning_rate": 0.00017913972930171096, "loss": 2.833596706390381, "step": 8492, "token_acc": 0.3208731919515907 }, { "epoch": 4.97859865142187, "grad_norm": 0.21952604240108176, "learning_rate": 0.00017913380413715047, "loss": 2.842550754547119, "step": 8493, "token_acc": 0.3202233053791323 }, { "epoch": 4.979184989739079, "grad_norm": 0.2364741868635869, "learning_rate": 0.00017912787822923454, "loss": 2.855903387069702, "step": 8494, "token_acc": 0.31731397366540776 }, { "epoch": 4.979771328056288, "grad_norm": 0.22864873124504112, "learning_rate": 0.0001791219515780188, "loss": 2.8198933601379395, "step": 8495, "token_acc": 0.3221293922170605 }, { "epoch": 4.980357666373497, "grad_norm": 0.21768791713483765, "learning_rate": 0.0001791160241835589, "loss": 2.8832879066467285, "step": 8496, "token_acc": 0.31440234389619276 }, { "epoch": 4.9809440046907065, "grad_norm": 0.2262613718135984, "learning_rate": 0.0001791100960459106, "loss": 2.8716835975646973, "step": 8497, "token_acc": 0.31583811089801006 }, { "epoch": 4.981530343007916, "grad_norm": 0.22683619389339266, "learning_rate": 0.00017910416716512956, "loss": 2.891462802886963, "step": 8498, "token_acc": 0.3127765486725664 }, { "epoch": 4.982116681325125, "grad_norm": 0.22339521389395867, "learning_rate": 0.00017909823754127144, "loss": 2.864078998565674, "step": 8499, "token_acc": 0.31737311127806994 }, { "epoch": 4.982703019642334, "grad_norm": 0.21080402882781651, "learning_rate": 0.000179092307174392, "loss": 2.932692527770996, "step": 8500, "token_acc": 0.3065689085808342 }, { "epoch": 4.983289357959543, "grad_norm": 0.22892508220793364, "learning_rate": 0.00017908637606454687, "loss": 2.857616901397705, "step": 8501, "token_acc": 0.31639023347591605 }, { "epoch": 4.983875696276751, "grad_norm": 0.2229368963671994, "learning_rate": 0.00017908044421179183, "loss": 2.8588857650756836, "step": 8502, "token_acc": 0.31685495846735895 }, { "epoch": 4.98446203459396, "grad_norm": 0.22444567030341322, "learning_rate": 0.0001790745116161826, "loss": 2.9332284927368164, "step": 8503, "token_acc": 0.3062027184304597 }, { "epoch": 4.985048372911169, "grad_norm": 0.21454941745025957, "learning_rate": 0.00017906857827777484, "loss": 2.924492597579956, "step": 8504, "token_acc": 0.30870076843493577 }, { "epoch": 4.9856347112283785, "grad_norm": 0.22136202269508384, "learning_rate": 0.00017906264419662436, "loss": 2.8622660636901855, "step": 8505, "token_acc": 0.31729759709019617 }, { "epoch": 4.986221049545588, "grad_norm": 0.23783258256232354, "learning_rate": 0.00017905670937278686, "loss": 2.856257438659668, "step": 8506, "token_acc": 0.3188564712107952 }, { "epoch": 4.986807387862797, "grad_norm": 0.2361640521773867, "learning_rate": 0.0001790507738063181, "loss": 2.9143805503845215, "step": 8507, "token_acc": 0.3091109254383891 }, { "epoch": 4.987393726180006, "grad_norm": 0.23269977030368877, "learning_rate": 0.00017904483749727387, "loss": 2.8381435871124268, "step": 8508, "token_acc": 0.31944902693813504 }, { "epoch": 4.987980064497215, "grad_norm": 0.22367574763668133, "learning_rate": 0.0001790389004457099, "loss": 2.915377378463745, "step": 8509, "token_acc": 0.30884221800094314 }, { "epoch": 4.988566402814424, "grad_norm": 0.2348708422805005, "learning_rate": 0.00017903296265168198, "loss": 2.8656444549560547, "step": 8510, "token_acc": 0.31724034063736073 }, { "epoch": 4.989152741131633, "grad_norm": 0.26292205820699793, "learning_rate": 0.00017902702411524586, "loss": 2.8722805976867676, "step": 8511, "token_acc": 0.31745583734262767 }, { "epoch": 4.989739079448842, "grad_norm": 0.21981841451273185, "learning_rate": 0.00017902108483645735, "loss": 2.853205680847168, "step": 8512, "token_acc": 0.3180597554688642 }, { "epoch": 4.990325417766051, "grad_norm": 0.2410120888170991, "learning_rate": 0.0001790151448153722, "loss": 2.8568387031555176, "step": 8513, "token_acc": 0.3163631315234814 }, { "epoch": 4.99091175608326, "grad_norm": 0.22686685412599306, "learning_rate": 0.00017900920405204625, "loss": 2.843190908432007, "step": 8514, "token_acc": 0.31800718704159325 }, { "epoch": 4.991498094400469, "grad_norm": 0.26593674023775077, "learning_rate": 0.0001790032625465353, "loss": 2.8981356620788574, "step": 8515, "token_acc": 0.3115141115009113 }, { "epoch": 4.992084432717678, "grad_norm": 0.27346868142257325, "learning_rate": 0.00017899732029889515, "loss": 2.8425588607788086, "step": 8516, "token_acc": 0.3186945150668945 }, { "epoch": 4.992670771034887, "grad_norm": 0.25475811112702484, "learning_rate": 0.00017899137730918163, "loss": 2.87961483001709, "step": 8517, "token_acc": 0.3149162699935498 }, { "epoch": 4.993257109352096, "grad_norm": 0.22113435032695664, "learning_rate": 0.00017898543357745058, "loss": 2.889873504638672, "step": 8518, "token_acc": 0.31485450657801634 }, { "epoch": 4.993843447669305, "grad_norm": 0.2862201794274265, "learning_rate": 0.00017897948910375777, "loss": 2.9130234718322754, "step": 8519, "token_acc": 0.30897796339593564 }, { "epoch": 4.994429785986514, "grad_norm": 0.38162279243154706, "learning_rate": 0.00017897354388815914, "loss": 2.885221242904663, "step": 8520, "token_acc": 0.3136089243663102 }, { "epoch": 4.995016124303723, "grad_norm": 0.30584510964405864, "learning_rate": 0.00017896759793071046, "loss": 2.899193525314331, "step": 8521, "token_acc": 0.31160074602868354 }, { "epoch": 4.9956024626209325, "grad_norm": 0.2549578283332126, "learning_rate": 0.0001789616512314676, "loss": 2.8359553813934326, "step": 8522, "token_acc": 0.32050980883473756 }, { "epoch": 4.996188800938142, "grad_norm": 0.2853655740064938, "learning_rate": 0.00017895570379048643, "loss": 2.8635950088500977, "step": 8523, "token_acc": 0.3154783371878066 }, { "epoch": 4.99677513925535, "grad_norm": 0.24804008976000905, "learning_rate": 0.00017894975560782284, "loss": 2.880258083343506, "step": 8524, "token_acc": 0.31414378998963766 }, { "epoch": 4.997361477572559, "grad_norm": 0.2827029743125223, "learning_rate": 0.00017894380668353265, "loss": 2.8957061767578125, "step": 8525, "token_acc": 0.3110220092605236 }, { "epoch": 4.997947815889768, "grad_norm": 0.2388138794035119, "learning_rate": 0.00017893785701767178, "loss": 2.8899502754211426, "step": 8526, "token_acc": 0.313573874509014 }, { "epoch": 4.998534154206977, "grad_norm": 0.2946185368856333, "learning_rate": 0.00017893190661029613, "loss": 2.8956902027130127, "step": 8527, "token_acc": 0.31157830606702114 }, { "epoch": 4.999120492524186, "grad_norm": 0.24657747915649159, "learning_rate": 0.00017892595546146155, "loss": 2.89015793800354, "step": 8528, "token_acc": 0.31426744274271345 }, { "epoch": 4.999706830841395, "grad_norm": 0.27516565593646075, "learning_rate": 0.000178920003571224, "loss": 2.901970148086548, "step": 8529, "token_acc": 0.31077075614465377 }, { "epoch": 5.0, "grad_norm": 0.31457004306043534, "learning_rate": 0.00017891405093963938, "loss": 2.883554458618164, "step": 8530, "token_acc": 0.31363783959302954 }, { "epoch": 5.0, "eval_loss": 3.110069751739502, "eval_runtime": 16.7889, "eval_samples_per_second": 15.248, "eval_steps_per_second": 1.906, "eval_token_acc": 0.28490550317586005, "step": 8530 }, { "epoch": 5.000586338317209, "grad_norm": 0.5728609097865577, "learning_rate": 0.00017890809756676354, "loss": 2.6137020587921143, "step": 8531, "token_acc": 0.35633464052287583 }, { "epoch": 5.001172676634418, "grad_norm": 0.48984419222165093, "learning_rate": 0.0001789021434526525, "loss": 2.6457982063293457, "step": 8532, "token_acc": 0.3505687640110476 }, { "epoch": 5.001759014951627, "grad_norm": 0.32993140654176834, "learning_rate": 0.00017889618859736212, "loss": 2.6383183002471924, "step": 8533, "token_acc": 0.3525478246148637 }, { "epoch": 5.0023453532688364, "grad_norm": 0.4905807086283444, "learning_rate": 0.00017889023300094836, "loss": 2.6150574684143066, "step": 8534, "token_acc": 0.3546726494610107 }, { "epoch": 5.002931691586046, "grad_norm": 0.479577916950639, "learning_rate": 0.00017888427666346718, "loss": 2.6390600204467773, "step": 8535, "token_acc": 0.3519423687815845 }, { "epoch": 5.003518029903254, "grad_norm": 0.4011623420757777, "learning_rate": 0.0001788783195849745, "loss": 2.5844533443450928, "step": 8536, "token_acc": 0.36083323525950334 }, { "epoch": 5.004104368220463, "grad_norm": 0.3927691188525506, "learning_rate": 0.0001788723617655263, "loss": 2.583338499069214, "step": 8537, "token_acc": 0.36307227177435214 }, { "epoch": 5.004690706537672, "grad_norm": 0.3590091098008287, "learning_rate": 0.00017886640320517855, "loss": 2.593388795852661, "step": 8538, "token_acc": 0.3605190318959975 }, { "epoch": 5.005277044854881, "grad_norm": 0.32283351775874786, "learning_rate": 0.00017886044390398725, "loss": 2.531052589416504, "step": 8539, "token_acc": 0.3719200999544699 }, { "epoch": 5.00586338317209, "grad_norm": 0.3543052571384308, "learning_rate": 0.0001788544838620083, "loss": 2.5684967041015625, "step": 8540, "token_acc": 0.363936745090672 }, { "epoch": 5.006449721489299, "grad_norm": 0.34444625581534105, "learning_rate": 0.00017884852307929774, "loss": 2.6395487785339355, "step": 8541, "token_acc": 0.35311911163804965 }, { "epoch": 5.0070360598065085, "grad_norm": 0.3173895031108944, "learning_rate": 0.00017884256155591157, "loss": 2.5861763954162598, "step": 8542, "token_acc": 0.3615610123450149 }, { "epoch": 5.007622398123718, "grad_norm": 0.3378104142526924, "learning_rate": 0.00017883659929190574, "loss": 2.571347713470459, "step": 8543, "token_acc": 0.3641806503433498 }, { "epoch": 5.008208736440927, "grad_norm": 0.2951923512088088, "learning_rate": 0.00017883063628733634, "loss": 2.5819907188415527, "step": 8544, "token_acc": 0.36197110489288087 }, { "epoch": 5.008795074758136, "grad_norm": 0.3176423457440417, "learning_rate": 0.00017882467254225933, "loss": 2.5647072792053223, "step": 8545, "token_acc": 0.364050285877353 }, { "epoch": 5.009381413075345, "grad_norm": 0.31497685666115965, "learning_rate": 0.0001788187080567307, "loss": 2.520672082901001, "step": 8546, "token_acc": 0.3703714562139025 }, { "epoch": 5.009967751392553, "grad_norm": 0.32198843957165213, "learning_rate": 0.00017881274283080656, "loss": 2.5758109092712402, "step": 8547, "token_acc": 0.3640082017132113 }, { "epoch": 5.010554089709762, "grad_norm": 0.30913375868976933, "learning_rate": 0.00017880677686454288, "loss": 2.5549397468566895, "step": 8548, "token_acc": 0.365419957284061 }, { "epoch": 5.011140428026971, "grad_norm": 0.3625104583515587, "learning_rate": 0.00017880081015799574, "loss": 2.539494514465332, "step": 8549, "token_acc": 0.36929375006444826 }, { "epoch": 5.0117267663441805, "grad_norm": 0.32693962659638726, "learning_rate": 0.00017879484271122117, "loss": 2.5783021450042725, "step": 8550, "token_acc": 0.36188182326225726 }, { "epoch": 5.01231310466139, "grad_norm": 0.3178441155562443, "learning_rate": 0.00017878887452427522, "loss": 2.600250244140625, "step": 8551, "token_acc": 0.36005084972567913 }, { "epoch": 5.012899442978599, "grad_norm": 0.3882108261774225, "learning_rate": 0.00017878290559721397, "loss": 2.6201887130737305, "step": 8552, "token_acc": 0.3541683938585958 }, { "epoch": 5.013485781295808, "grad_norm": 0.4052815662621842, "learning_rate": 0.00017877693593009347, "loss": 2.5477776527404785, "step": 8553, "token_acc": 0.3685016961076129 }, { "epoch": 5.014072119613017, "grad_norm": 0.3056425027642402, "learning_rate": 0.00017877096552296981, "loss": 2.5621085166931152, "step": 8554, "token_acc": 0.36622811220787754 }, { "epoch": 5.014658457930226, "grad_norm": 0.4570553853885685, "learning_rate": 0.0001787649943758991, "loss": 2.5541880130767822, "step": 8555, "token_acc": 0.36701937786843447 }, { "epoch": 5.015244796247435, "grad_norm": 0.3727821164379915, "learning_rate": 0.00017875902248893738, "loss": 2.574573516845703, "step": 8556, "token_acc": 0.3636656700896919 }, { "epoch": 5.015831134564644, "grad_norm": 0.3657681865774627, "learning_rate": 0.00017875304986214078, "loss": 2.542238473892212, "step": 8557, "token_acc": 0.36913995063051736 }, { "epoch": 5.0164174728818525, "grad_norm": 0.39731488657565095, "learning_rate": 0.0001787470764955654, "loss": 2.5839080810546875, "step": 8558, "token_acc": 0.3638529012748418 }, { "epoch": 5.017003811199062, "grad_norm": 0.33660857904428493, "learning_rate": 0.00017874110238926737, "loss": 2.544034004211426, "step": 8559, "token_acc": 0.367727235739279 }, { "epoch": 5.017590149516271, "grad_norm": 0.41833580572489215, "learning_rate": 0.00017873512754330279, "loss": 2.579230785369873, "step": 8560, "token_acc": 0.3633607306921452 }, { "epoch": 5.01817648783348, "grad_norm": 0.3237486028640243, "learning_rate": 0.00017872915195772773, "loss": 2.568049430847168, "step": 8561, "token_acc": 0.3659882754861951 }, { "epoch": 5.018762826150689, "grad_norm": 0.42757355794515894, "learning_rate": 0.0001787231756325984, "loss": 2.5780301094055176, "step": 8562, "token_acc": 0.36309642822354493 }, { "epoch": 5.019349164467898, "grad_norm": 0.3081906038258494, "learning_rate": 0.00017871719856797093, "loss": 2.5651447772979736, "step": 8563, "token_acc": 0.36332374183373456 }, { "epoch": 5.019935502785107, "grad_norm": 0.35387805977751463, "learning_rate": 0.00017871122076390145, "loss": 2.54543399810791, "step": 8564, "token_acc": 0.3679917701521052 }, { "epoch": 5.020521841102316, "grad_norm": 0.30383619501387427, "learning_rate": 0.00017870524222044612, "loss": 2.562528133392334, "step": 8565, "token_acc": 0.36429510454488545 }, { "epoch": 5.021108179419525, "grad_norm": 0.3420187628771937, "learning_rate": 0.00017869926293766108, "loss": 2.5655436515808105, "step": 8566, "token_acc": 0.36507346482573905 }, { "epoch": 5.0216945177367345, "grad_norm": 0.29282344403323773, "learning_rate": 0.0001786932829156025, "loss": 2.5570621490478516, "step": 8567, "token_acc": 0.3657955982797875 }, { "epoch": 5.022280856053943, "grad_norm": 0.32013063447416545, "learning_rate": 0.00017868730215432662, "loss": 2.598223924636841, "step": 8568, "token_acc": 0.3594857061823453 }, { "epoch": 5.022867194371152, "grad_norm": 0.30598285640345363, "learning_rate": 0.00017868132065388954, "loss": 2.520106792449951, "step": 8569, "token_acc": 0.3725451929106915 }, { "epoch": 5.023453532688361, "grad_norm": 0.3073116277605857, "learning_rate": 0.00017867533841434745, "loss": 2.5541653633117676, "step": 8570, "token_acc": 0.36786315199102637 }, { "epoch": 5.02403987100557, "grad_norm": 0.29053430852209117, "learning_rate": 0.0001786693554357566, "loss": 2.5661065578460693, "step": 8571, "token_acc": 0.3645743720601509 }, { "epoch": 5.024626209322779, "grad_norm": 0.29984573422101274, "learning_rate": 0.00017866337171817316, "loss": 2.585926055908203, "step": 8572, "token_acc": 0.3621052465450031 }, { "epoch": 5.025212547639988, "grad_norm": 0.28891046101653295, "learning_rate": 0.00017865738726165336, "loss": 2.5682687759399414, "step": 8573, "token_acc": 0.36491833424589604 }, { "epoch": 5.025798885957197, "grad_norm": 0.3021542829649355, "learning_rate": 0.00017865140206625336, "loss": 2.53190279006958, "step": 8574, "token_acc": 0.3712162155336569 }, { "epoch": 5.0263852242744065, "grad_norm": 0.312655598616484, "learning_rate": 0.00017864541613202945, "loss": 2.587083578109741, "step": 8575, "token_acc": 0.3616598341611508 }, { "epoch": 5.026971562591616, "grad_norm": 0.3079385900072447, "learning_rate": 0.00017863942945903785, "loss": 2.570455551147461, "step": 8576, "token_acc": 0.364129285413986 }, { "epoch": 5.027557900908825, "grad_norm": 0.2999212972382153, "learning_rate": 0.00017863344204733473, "loss": 2.5599498748779297, "step": 8577, "token_acc": 0.3676264649122575 }, { "epoch": 5.028144239226034, "grad_norm": 0.2942709482203419, "learning_rate": 0.00017862745389697642, "loss": 2.5489354133605957, "step": 8578, "token_acc": 0.36726496863519614 }, { "epoch": 5.028730577543242, "grad_norm": 0.3145912647504632, "learning_rate": 0.0001786214650080191, "loss": 2.5287702083587646, "step": 8579, "token_acc": 0.3714973994552239 }, { "epoch": 5.029316915860451, "grad_norm": 0.30320402319450795, "learning_rate": 0.00017861547538051907, "loss": 2.5303704738616943, "step": 8580, "token_acc": 0.3704164391319297 }, { "epoch": 5.02990325417766, "grad_norm": 0.33014856000935666, "learning_rate": 0.00017860948501453262, "loss": 2.4881644248962402, "step": 8581, "token_acc": 0.3780443920603298 }, { "epoch": 5.030489592494869, "grad_norm": 0.31657615073264067, "learning_rate": 0.00017860349391011596, "loss": 2.5896806716918945, "step": 8582, "token_acc": 0.36262776159341104 }, { "epoch": 5.0310759308120785, "grad_norm": 0.3184797158939612, "learning_rate": 0.00017859750206732536, "loss": 2.5678181648254395, "step": 8583, "token_acc": 0.36518014721708064 }, { "epoch": 5.031662269129288, "grad_norm": 0.31608551573702154, "learning_rate": 0.00017859150948621716, "loss": 2.550880193710327, "step": 8584, "token_acc": 0.36687682308722147 }, { "epoch": 5.032248607446497, "grad_norm": 0.3069700936872596, "learning_rate": 0.00017858551616684767, "loss": 2.5597996711730957, "step": 8585, "token_acc": 0.36549414089328924 }, { "epoch": 5.032834945763706, "grad_norm": 0.291911346593309, "learning_rate": 0.0001785795221092731, "loss": 2.5623950958251953, "step": 8586, "token_acc": 0.3652140187207813 }, { "epoch": 5.033421284080915, "grad_norm": 0.30898162104127186, "learning_rate": 0.00017857352731354985, "loss": 2.579223871231079, "step": 8587, "token_acc": 0.36094642168509833 }, { "epoch": 5.034007622398124, "grad_norm": 0.30719860280750216, "learning_rate": 0.00017856753177973418, "loss": 2.569281816482544, "step": 8588, "token_acc": 0.36469552760325946 }, { "epoch": 5.034593960715333, "grad_norm": 0.36774744395893666, "learning_rate": 0.00017856153550788238, "loss": 2.5600554943084717, "step": 8589, "token_acc": 0.3651454555415548 }, { "epoch": 5.035180299032541, "grad_norm": 0.4711026081836451, "learning_rate": 0.0001785555384980509, "loss": 2.5066380500793457, "step": 8590, "token_acc": 0.3751918659426043 }, { "epoch": 5.0357666373497505, "grad_norm": 0.46413117270997145, "learning_rate": 0.0001785495407502959, "loss": 2.570627212524414, "step": 8591, "token_acc": 0.3641189212980271 }, { "epoch": 5.03635297566696, "grad_norm": 0.30770741546561603, "learning_rate": 0.00017854354226467387, "loss": 2.547548294067383, "step": 8592, "token_acc": 0.36684605095343265 }, { "epoch": 5.036939313984169, "grad_norm": 0.45656417698670165, "learning_rate": 0.00017853754304124109, "loss": 2.5522119998931885, "step": 8593, "token_acc": 0.3670502440033844 }, { "epoch": 5.037525652301378, "grad_norm": 0.35576314169194634, "learning_rate": 0.00017853154308005388, "loss": 2.5898637771606445, "step": 8594, "token_acc": 0.3613680664462577 }, { "epoch": 5.038111990618587, "grad_norm": 0.35255739567077987, "learning_rate": 0.0001785255423811687, "loss": 2.5680136680603027, "step": 8595, "token_acc": 0.36405543015573627 }, { "epoch": 5.038698328935796, "grad_norm": 0.31752485866672336, "learning_rate": 0.00017851954094464184, "loss": 2.5736083984375, "step": 8596, "token_acc": 0.3617938429137447 }, { "epoch": 5.039284667253005, "grad_norm": 0.3395376046681421, "learning_rate": 0.0001785135387705297, "loss": 2.560234546661377, "step": 8597, "token_acc": 0.3658538539607927 }, { "epoch": 5.039871005570214, "grad_norm": 0.28802426219906957, "learning_rate": 0.00017850753585888865, "loss": 2.5121490955352783, "step": 8598, "token_acc": 0.3743807047117836 }, { "epoch": 5.040457343887423, "grad_norm": 0.36636400170871986, "learning_rate": 0.00017850153220977513, "loss": 2.5468177795410156, "step": 8599, "token_acc": 0.36772802947669475 }, { "epoch": 5.0410436822046325, "grad_norm": 0.31131145818896805, "learning_rate": 0.00017849552782324546, "loss": 2.536012649536133, "step": 8600, "token_acc": 0.3699290127170291 }, { "epoch": 5.041630020521841, "grad_norm": 0.3760346193784607, "learning_rate": 0.0001784895226993561, "loss": 2.5302774906158447, "step": 8601, "token_acc": 0.37090719553026724 }, { "epoch": 5.04221635883905, "grad_norm": 0.30104487128374796, "learning_rate": 0.00017848351683816342, "loss": 2.52775239944458, "step": 8602, "token_acc": 0.3723042518885429 }, { "epoch": 5.042802697156259, "grad_norm": 0.31366689481289206, "learning_rate": 0.00017847751023972386, "loss": 2.5727272033691406, "step": 8603, "token_acc": 0.36326769226234484 }, { "epoch": 5.043389035473468, "grad_norm": 0.2980917869380582, "learning_rate": 0.00017847150290409384, "loss": 2.5655388832092285, "step": 8604, "token_acc": 0.36521995090592757 }, { "epoch": 5.043975373790677, "grad_norm": 0.3176571924594715, "learning_rate": 0.00017846549483132982, "loss": 2.550281524658203, "step": 8605, "token_acc": 0.36648691729400135 }, { "epoch": 5.044561712107886, "grad_norm": 0.2863134135082776, "learning_rate": 0.0001784594860214882, "loss": 2.5486721992492676, "step": 8606, "token_acc": 0.36800546518522337 }, { "epoch": 5.045148050425095, "grad_norm": 0.34134556344303274, "learning_rate": 0.00017845347647462543, "loss": 2.544363021850586, "step": 8607, "token_acc": 0.3675474307178717 }, { "epoch": 5.0457343887423045, "grad_norm": 0.29889869901782373, "learning_rate": 0.00017844746619079794, "loss": 2.5942368507385254, "step": 8608, "token_acc": 0.3584032202211559 }, { "epoch": 5.046320727059514, "grad_norm": 0.3895689990148726, "learning_rate": 0.00017844145517006225, "loss": 2.5189552307128906, "step": 8609, "token_acc": 0.3722706538266599 }, { "epoch": 5.046907065376723, "grad_norm": 0.33741442406630434, "learning_rate": 0.00017843544341247477, "loss": 2.5026745796203613, "step": 8610, "token_acc": 0.3759538437846822 }, { "epoch": 5.047493403693931, "grad_norm": 0.3264569704237778, "learning_rate": 0.00017842943091809198, "loss": 2.549659013748169, "step": 8611, "token_acc": 0.367681353191754 }, { "epoch": 5.04807974201114, "grad_norm": 0.32571507145597456, "learning_rate": 0.0001784234176869704, "loss": 2.5807695388793945, "step": 8612, "token_acc": 0.36172193549864046 }, { "epoch": 5.048666080328349, "grad_norm": 0.3175754922077994, "learning_rate": 0.0001784174037191665, "loss": 2.5562191009521484, "step": 8613, "token_acc": 0.36770156359722256 }, { "epoch": 5.049252418645558, "grad_norm": 0.3352716692092544, "learning_rate": 0.00017841138901473672, "loss": 2.545306921005249, "step": 8614, "token_acc": 0.36915870930364886 }, { "epoch": 5.049838756962767, "grad_norm": 0.3342022595469454, "learning_rate": 0.00017840537357373762, "loss": 2.5136969089508057, "step": 8615, "token_acc": 0.373111929260103 }, { "epoch": 5.0504250952799765, "grad_norm": 0.3698572166437671, "learning_rate": 0.0001783993573962257, "loss": 2.524839401245117, "step": 8616, "token_acc": 0.37144148097751994 }, { "epoch": 5.051011433597186, "grad_norm": 0.3496972254293288, "learning_rate": 0.00017839334048225743, "loss": 2.544816732406616, "step": 8617, "token_acc": 0.3684737768090188 }, { "epoch": 5.051597771914395, "grad_norm": 0.323435957493382, "learning_rate": 0.00017838732283188938, "loss": 2.5452558994293213, "step": 8618, "token_acc": 0.36836540278792274 }, { "epoch": 5.052184110231604, "grad_norm": 0.3186981183050785, "learning_rate": 0.00017838130444517808, "loss": 2.5675978660583496, "step": 8619, "token_acc": 0.3624714464477968 }, { "epoch": 5.052770448548813, "grad_norm": 0.338347275583856, "learning_rate": 0.00017837528532218, "loss": 2.5589938163757324, "step": 8620, "token_acc": 0.3647268723153395 }, { "epoch": 5.053356786866022, "grad_norm": 0.2964264313375623, "learning_rate": 0.00017836926546295175, "loss": 2.507209300994873, "step": 8621, "token_acc": 0.3749760574827484 }, { "epoch": 5.05394312518323, "grad_norm": 0.3702286715287716, "learning_rate": 0.00017836324486754986, "loss": 2.546562910079956, "step": 8622, "token_acc": 0.3686370070416226 }, { "epoch": 5.054529463500439, "grad_norm": 0.32738272997183826, "learning_rate": 0.0001783572235360309, "loss": 2.550058126449585, "step": 8623, "token_acc": 0.3673816680056365 }, { "epoch": 5.0551158018176485, "grad_norm": 0.3489811917549848, "learning_rate": 0.0001783512014684514, "loss": 2.575343370437622, "step": 8624, "token_acc": 0.36435302500254474 }, { "epoch": 5.055702140134858, "grad_norm": 0.3272732798578958, "learning_rate": 0.0001783451786648679, "loss": 2.59786057472229, "step": 8625, "token_acc": 0.36059966259922793 }, { "epoch": 5.056288478452067, "grad_norm": 0.3351515724565755, "learning_rate": 0.00017833915512533704, "loss": 2.536472797393799, "step": 8626, "token_acc": 0.36967911959708966 }, { "epoch": 5.056874816769276, "grad_norm": 0.3490249113357389, "learning_rate": 0.00017833313084991543, "loss": 2.577605724334717, "step": 8627, "token_acc": 0.3643636419277674 }, { "epoch": 5.057461155086485, "grad_norm": 0.31281084362226147, "learning_rate": 0.00017832710583865955, "loss": 2.552957773208618, "step": 8628, "token_acc": 0.36781288939453743 }, { "epoch": 5.058047493403694, "grad_norm": 0.38519994362273724, "learning_rate": 0.0001783210800916261, "loss": 2.5694937705993652, "step": 8629, "token_acc": 0.3632950524154789 }, { "epoch": 5.058633831720903, "grad_norm": 0.3170205677129762, "learning_rate": 0.00017831505360887162, "loss": 2.5812439918518066, "step": 8630, "token_acc": 0.36106454892693246 }, { "epoch": 5.059220170038112, "grad_norm": 0.3221587995706993, "learning_rate": 0.00017830902639045273, "loss": 2.587827444076538, "step": 8631, "token_acc": 0.36089932258233914 }, { "epoch": 5.059806508355321, "grad_norm": 0.3127017424375763, "learning_rate": 0.0001783029984364261, "loss": 2.5396838188171387, "step": 8632, "token_acc": 0.3683606510661558 }, { "epoch": 5.06039284667253, "grad_norm": 0.3092481145452716, "learning_rate": 0.00017829696974684827, "loss": 2.5201659202575684, "step": 8633, "token_acc": 0.3729968371112283 }, { "epoch": 5.060979184989739, "grad_norm": 0.31156518954728046, "learning_rate": 0.00017829094032177593, "loss": 2.5538747310638428, "step": 8634, "token_acc": 0.3665296579810702 }, { "epoch": 5.061565523306948, "grad_norm": 0.3265156067724412, "learning_rate": 0.0001782849101612657, "loss": 2.5675978660583496, "step": 8635, "token_acc": 0.3631898249996081 }, { "epoch": 5.062151861624157, "grad_norm": 0.3201273192078522, "learning_rate": 0.00017827887926537424, "loss": 2.55073881149292, "step": 8636, "token_acc": 0.36664392640455706 }, { "epoch": 5.062738199941366, "grad_norm": 0.3125211010127739, "learning_rate": 0.0001782728476341582, "loss": 2.5284056663513184, "step": 8637, "token_acc": 0.36952214647906245 }, { "epoch": 5.063324538258575, "grad_norm": 0.2988053014494796, "learning_rate": 0.0001782668152676742, "loss": 2.556939125061035, "step": 8638, "token_acc": 0.3656246057301466 }, { "epoch": 5.063910876575784, "grad_norm": 0.2985445757361666, "learning_rate": 0.00017826078216597898, "loss": 2.5389156341552734, "step": 8639, "token_acc": 0.3697985923318158 }, { "epoch": 5.064497214892993, "grad_norm": 0.3145072900062383, "learning_rate": 0.0001782547483291291, "loss": 2.5682849884033203, "step": 8640, "token_acc": 0.36383810159093655 }, { "epoch": 5.0650835532102025, "grad_norm": 0.33045525848793245, "learning_rate": 0.00017824871375718136, "loss": 2.604620933532715, "step": 8641, "token_acc": 0.3586817901801636 }, { "epoch": 5.065669891527412, "grad_norm": 0.2974922110161493, "learning_rate": 0.0001782426784501924, "loss": 2.598081111907959, "step": 8642, "token_acc": 0.35895287524857966 }, { "epoch": 5.066256229844621, "grad_norm": 0.33630052166895597, "learning_rate": 0.00017823664240821893, "loss": 2.5367367267608643, "step": 8643, "token_acc": 0.369738694642249 }, { "epoch": 5.066842568161829, "grad_norm": 0.30004110187418354, "learning_rate": 0.00017823060563131756, "loss": 2.5762908458709717, "step": 8644, "token_acc": 0.3645476514627047 }, { "epoch": 5.067428906479038, "grad_norm": 0.3910657069692267, "learning_rate": 0.00017822456811954513, "loss": 2.5280518531799316, "step": 8645, "token_acc": 0.370581977225265 }, { "epoch": 5.068015244796247, "grad_norm": 0.32994213495225766, "learning_rate": 0.00017821852987295826, "loss": 2.533412456512451, "step": 8646, "token_acc": 0.36998954478582774 }, { "epoch": 5.068601583113456, "grad_norm": 0.3347223141051749, "learning_rate": 0.0001782124908916137, "loss": 2.6010608673095703, "step": 8647, "token_acc": 0.35866107576633893 }, { "epoch": 5.069187921430665, "grad_norm": 0.38568755970071855, "learning_rate": 0.0001782064511755682, "loss": 2.5580105781555176, "step": 8648, "token_acc": 0.36583961760377187 }, { "epoch": 5.0697742597478745, "grad_norm": 0.28649006945836664, "learning_rate": 0.00017820041072487845, "loss": 2.530031204223633, "step": 8649, "token_acc": 0.3707557090370909 }, { "epoch": 5.070360598065084, "grad_norm": 0.3972715558454441, "learning_rate": 0.00017819436953960124, "loss": 2.551689624786377, "step": 8650, "token_acc": 0.36756097747601835 }, { "epoch": 5.070946936382293, "grad_norm": 0.3177109451791595, "learning_rate": 0.0001781883276197933, "loss": 2.5293281078338623, "step": 8651, "token_acc": 0.3711145039800849 }, { "epoch": 5.071533274699502, "grad_norm": 0.3297786870337855, "learning_rate": 0.00017818228496551135, "loss": 2.5782697200775146, "step": 8652, "token_acc": 0.3618049174760221 }, { "epoch": 5.072119613016711, "grad_norm": 0.3028362519003831, "learning_rate": 0.0001781762415768122, "loss": 2.5293474197387695, "step": 8653, "token_acc": 0.3712482075024053 }, { "epoch": 5.07270595133392, "grad_norm": 0.33525613729127424, "learning_rate": 0.00017817019745375263, "loss": 2.538595199584961, "step": 8654, "token_acc": 0.3687129390891122 }, { "epoch": 5.073292289651128, "grad_norm": 0.2934633273275737, "learning_rate": 0.00017816415259638938, "loss": 2.625231981277466, "step": 8655, "token_acc": 0.3549068410762937 }, { "epoch": 5.073878627968337, "grad_norm": 0.408788390016289, "learning_rate": 0.0001781581070047792, "loss": 2.5804429054260254, "step": 8656, "token_acc": 0.36280263362538234 }, { "epoch": 5.0744649662855466, "grad_norm": 0.37506886856840405, "learning_rate": 0.00017815206067897898, "loss": 2.5608997344970703, "step": 8657, "token_acc": 0.3667965521313024 }, { "epoch": 5.075051304602756, "grad_norm": 0.3487901006393067, "learning_rate": 0.00017814601361904544, "loss": 2.5515003204345703, "step": 8658, "token_acc": 0.3668162192892153 }, { "epoch": 5.075637642919965, "grad_norm": 0.3611737601651554, "learning_rate": 0.0001781399658250354, "loss": 2.596212387084961, "step": 8659, "token_acc": 0.3593859509709155 }, { "epoch": 5.076223981237174, "grad_norm": 0.3177250888477601, "learning_rate": 0.00017813391729700568, "loss": 2.595973014831543, "step": 8660, "token_acc": 0.3601860020585208 }, { "epoch": 5.076810319554383, "grad_norm": 0.32189341550380085, "learning_rate": 0.0001781278680350131, "loss": 2.5558865070343018, "step": 8661, "token_acc": 0.36595420601315753 }, { "epoch": 5.077396657871592, "grad_norm": 0.30434027519695955, "learning_rate": 0.00017812181803911447, "loss": 2.570974349975586, "step": 8662, "token_acc": 0.3638679735987597 }, { "epoch": 5.077982996188801, "grad_norm": 0.3656942903457676, "learning_rate": 0.00017811576730936664, "loss": 2.564648151397705, "step": 8663, "token_acc": 0.36480222829243114 }, { "epoch": 5.07856933450601, "grad_norm": 0.2980314602520659, "learning_rate": 0.00017810971584582643, "loss": 2.528481960296631, "step": 8664, "token_acc": 0.3704219930510654 }, { "epoch": 5.0791556728232194, "grad_norm": 0.3214652020003212, "learning_rate": 0.00017810366364855068, "loss": 2.565074920654297, "step": 8665, "token_acc": 0.36418697708257547 }, { "epoch": 5.079742011140428, "grad_norm": 0.3085935181115178, "learning_rate": 0.00017809761071759629, "loss": 2.5683186054229736, "step": 8666, "token_acc": 0.36621876838292206 }, { "epoch": 5.080328349457637, "grad_norm": 0.3585548289825254, "learning_rate": 0.00017809155705302007, "loss": 2.55531907081604, "step": 8667, "token_acc": 0.3646511154638651 }, { "epoch": 5.080914687774846, "grad_norm": 0.34396413090760825, "learning_rate": 0.0001780855026548789, "loss": 2.5789380073547363, "step": 8668, "token_acc": 0.36284301187108964 }, { "epoch": 5.081501026092055, "grad_norm": 0.3115431730015309, "learning_rate": 0.00017807944752322964, "loss": 2.5804362297058105, "step": 8669, "token_acc": 0.3636655055243306 }, { "epoch": 5.082087364409264, "grad_norm": 0.32452169809234144, "learning_rate": 0.0001780733916581292, "loss": 2.555271863937378, "step": 8670, "token_acc": 0.3665360597526632 }, { "epoch": 5.082673702726473, "grad_norm": 0.30057334771173677, "learning_rate": 0.00017806733505963443, "loss": 2.573197841644287, "step": 8671, "token_acc": 0.36405204673202785 }, { "epoch": 5.083260041043682, "grad_norm": 0.3087657974315778, "learning_rate": 0.00017806127772780226, "loss": 2.5896239280700684, "step": 8672, "token_acc": 0.3589368227145959 }, { "epoch": 5.0838463793608915, "grad_norm": 0.29099831044817276, "learning_rate": 0.00017805521966268958, "loss": 2.536803722381592, "step": 8673, "token_acc": 0.37012067586433034 }, { "epoch": 5.084432717678101, "grad_norm": 0.33520015533913267, "learning_rate": 0.0001780491608643533, "loss": 2.573042392730713, "step": 8674, "token_acc": 0.36412847443242097 }, { "epoch": 5.08501905599531, "grad_norm": 0.3224108045187601, "learning_rate": 0.0001780431013328503, "loss": 2.5509583950042725, "step": 8675, "token_acc": 0.36753521671929934 }, { "epoch": 5.085605394312518, "grad_norm": 0.3413275571569484, "learning_rate": 0.00017803704106823755, "loss": 2.621654510498047, "step": 8676, "token_acc": 0.35722091581062326 }, { "epoch": 5.086191732629727, "grad_norm": 0.3586229044437792, "learning_rate": 0.00017803098007057195, "loss": 2.5988285541534424, "step": 8677, "token_acc": 0.36091511131343346 }, { "epoch": 5.086778070946936, "grad_norm": 0.30801630044444706, "learning_rate": 0.00017802491833991045, "loss": 2.528641700744629, "step": 8678, "token_acc": 0.36859016142019096 }, { "epoch": 5.087364409264145, "grad_norm": 0.31741918840181327, "learning_rate": 0.00017801885587630996, "loss": 2.580700397491455, "step": 8679, "token_acc": 0.36228125132735844 }, { "epoch": 5.087950747581354, "grad_norm": 0.31135155478888293, "learning_rate": 0.00017801279267982745, "loss": 2.5882132053375244, "step": 8680, "token_acc": 0.3621735633592015 }, { "epoch": 5.0885370858985635, "grad_norm": 0.3061114253666468, "learning_rate": 0.0001780067287505199, "loss": 2.566697120666504, "step": 8681, "token_acc": 0.3645383560254693 }, { "epoch": 5.089123424215773, "grad_norm": 0.3515887983679542, "learning_rate": 0.00017800066408844422, "loss": 2.5126171112060547, "step": 8682, "token_acc": 0.3735695560423256 }, { "epoch": 5.089709762532982, "grad_norm": 0.3014504110772663, "learning_rate": 0.00017799459869365745, "loss": 2.5652589797973633, "step": 8683, "token_acc": 0.3654255599472991 }, { "epoch": 5.090296100850191, "grad_norm": 0.37302211570990335, "learning_rate": 0.00017798853256621649, "loss": 2.5386734008789062, "step": 8684, "token_acc": 0.369287898762999 }, { "epoch": 5.0908824391674, "grad_norm": 0.3070045874687679, "learning_rate": 0.00017798246570617832, "loss": 2.5354185104370117, "step": 8685, "token_acc": 0.3696374773786095 }, { "epoch": 5.091468777484609, "grad_norm": 0.30846844724496125, "learning_rate": 0.00017797639811360005, "loss": 2.5793769359588623, "step": 8686, "token_acc": 0.3612555666139245 }, { "epoch": 5.092055115801817, "grad_norm": 0.335196762448663, "learning_rate": 0.00017797032978853852, "loss": 2.539492607116699, "step": 8687, "token_acc": 0.3669445766183255 }, { "epoch": 5.092641454119026, "grad_norm": 0.27926329461377664, "learning_rate": 0.0001779642607310509, "loss": 2.5332818031311035, "step": 8688, "token_acc": 0.3695707471344244 }, { "epoch": 5.0932277924362355, "grad_norm": 0.3304190403700646, "learning_rate": 0.00017795819094119404, "loss": 2.592761278152466, "step": 8689, "token_acc": 0.359917911521154 }, { "epoch": 5.093814130753445, "grad_norm": 0.3199046823182537, "learning_rate": 0.000177952120419025, "loss": 2.544893503189087, "step": 8690, "token_acc": 0.3676867722832576 }, { "epoch": 5.094400469070654, "grad_norm": 0.3338379341983684, "learning_rate": 0.0001779460491646009, "loss": 2.569002628326416, "step": 8691, "token_acc": 0.3664240572519387 }, { "epoch": 5.094986807387863, "grad_norm": 0.29806898084584, "learning_rate": 0.00017793997717797865, "loss": 2.5985050201416016, "step": 8692, "token_acc": 0.359388820654344 }, { "epoch": 5.095573145705072, "grad_norm": 0.2974730596146133, "learning_rate": 0.0001779339044592154, "loss": 2.564746379852295, "step": 8693, "token_acc": 0.3647777427285075 }, { "epoch": 5.096159484022281, "grad_norm": 0.3227208190439189, "learning_rate": 0.00017792783100836808, "loss": 2.56030011177063, "step": 8694, "token_acc": 0.3664799773793805 }, { "epoch": 5.09674582233949, "grad_norm": 0.28585466777550367, "learning_rate": 0.0001779217568254938, "loss": 2.5541584491729736, "step": 8695, "token_acc": 0.366164249293557 }, { "epoch": 5.097332160656699, "grad_norm": 0.34662690247403183, "learning_rate": 0.00017791568191064964, "loss": 2.5726568698883057, "step": 8696, "token_acc": 0.3624830807611667 }, { "epoch": 5.097918498973908, "grad_norm": 0.33546528285006866, "learning_rate": 0.00017790960626389262, "loss": 2.605684280395508, "step": 8697, "token_acc": 0.35868411284817303 }, { "epoch": 5.098504837291117, "grad_norm": 0.33887671212101234, "learning_rate": 0.00017790352988527984, "loss": 2.556511163711548, "step": 8698, "token_acc": 0.3666915958963187 }, { "epoch": 5.099091175608326, "grad_norm": 0.30233365861987865, "learning_rate": 0.00017789745277486837, "loss": 2.5629310607910156, "step": 8699, "token_acc": 0.3647650137312814 }, { "epoch": 5.099677513925535, "grad_norm": 0.3652666452882459, "learning_rate": 0.0001778913749327153, "loss": 2.596416473388672, "step": 8700, "token_acc": 0.3605429436862847 }, { "epoch": 5.100263852242744, "grad_norm": 0.32776413258426074, "learning_rate": 0.00017788529635887773, "loss": 2.598938226699829, "step": 8701, "token_acc": 0.3587423341398699 }, { "epoch": 5.100850190559953, "grad_norm": 0.3117711264051812, "learning_rate": 0.00017787921705341274, "loss": 2.5848286151885986, "step": 8702, "token_acc": 0.36257501189820984 }, { "epoch": 5.101436528877162, "grad_norm": 0.3002138322325676, "learning_rate": 0.0001778731370163775, "loss": 2.5925133228302, "step": 8703, "token_acc": 0.36055757188401166 }, { "epoch": 5.102022867194371, "grad_norm": 0.3153448951658653, "learning_rate": 0.00017786705624782902, "loss": 2.578900098800659, "step": 8704, "token_acc": 0.36218541938166493 }, { "epoch": 5.10260920551158, "grad_norm": 0.30419673902434613, "learning_rate": 0.00017786097474782446, "loss": 2.5859649181365967, "step": 8705, "token_acc": 0.3618694623141846 }, { "epoch": 5.1031955438287895, "grad_norm": 0.31647094422421734, "learning_rate": 0.000177854892516421, "loss": 2.566540479660034, "step": 8706, "token_acc": 0.3630619323802391 }, { "epoch": 5.103781882145999, "grad_norm": 0.3161751532697215, "learning_rate": 0.0001778488095536757, "loss": 2.6177268028259277, "step": 8707, "token_acc": 0.35553262048334794 }, { "epoch": 5.104368220463208, "grad_norm": 0.29940245202558324, "learning_rate": 0.0001778427258596458, "loss": 2.6232423782348633, "step": 8708, "token_acc": 0.35296298846799695 }, { "epoch": 5.104954558780416, "grad_norm": 0.3142022718993205, "learning_rate": 0.00017783664143438833, "loss": 2.5876476764678955, "step": 8709, "token_acc": 0.3624279574029616 }, { "epoch": 5.105540897097625, "grad_norm": 0.30494911699308014, "learning_rate": 0.0001778305562779605, "loss": 2.563300609588623, "step": 8710, "token_acc": 0.3638596314222979 }, { "epoch": 5.106127235414834, "grad_norm": 0.3663480910734714, "learning_rate": 0.0001778244703904195, "loss": 2.5931406021118164, "step": 8711, "token_acc": 0.3606730973883842 }, { "epoch": 5.106713573732043, "grad_norm": 0.4010463540965783, "learning_rate": 0.00017781838377182245, "loss": 2.560307502746582, "step": 8712, "token_acc": 0.36487027841909403 }, { "epoch": 5.107299912049252, "grad_norm": 0.3690635820645635, "learning_rate": 0.00017781229642222657, "loss": 2.5877881050109863, "step": 8713, "token_acc": 0.36101509339496446 }, { "epoch": 5.1078862503664615, "grad_norm": 0.2849674264066636, "learning_rate": 0.00017780620834168898, "loss": 2.5788164138793945, "step": 8714, "token_acc": 0.3633517976588629 }, { "epoch": 5.108472588683671, "grad_norm": 0.3776444929012261, "learning_rate": 0.00017780011953026694, "loss": 2.54604434967041, "step": 8715, "token_acc": 0.36879391116277954 }, { "epoch": 5.10905892700088, "grad_norm": 0.32689300791328374, "learning_rate": 0.0001777940299880176, "loss": 2.5166871547698975, "step": 8716, "token_acc": 0.37301462699845617 }, { "epoch": 5.109645265318089, "grad_norm": 0.31670023753219606, "learning_rate": 0.0001777879397149982, "loss": 2.5536739826202393, "step": 8717, "token_acc": 0.36645080288112075 }, { "epoch": 5.110231603635298, "grad_norm": 0.36461519502981615, "learning_rate": 0.0001777818487112659, "loss": 2.583094835281372, "step": 8718, "token_acc": 0.3624467264262818 }, { "epoch": 5.110817941952506, "grad_norm": 0.35026827218716855, "learning_rate": 0.00017777575697687793, "loss": 2.5645411014556885, "step": 8719, "token_acc": 0.3659068151027861 }, { "epoch": 5.111404280269715, "grad_norm": 0.302309715251391, "learning_rate": 0.00017776966451189157, "loss": 2.5638270378112793, "step": 8720, "token_acc": 0.36238349316096446 }, { "epoch": 5.111990618586924, "grad_norm": 0.3741235231061588, "learning_rate": 0.00017776357131636398, "loss": 2.5800561904907227, "step": 8721, "token_acc": 0.361576339351697 }, { "epoch": 5.1125769569041335, "grad_norm": 0.3209821568203822, "learning_rate": 0.00017775747739035241, "loss": 2.579256057739258, "step": 8722, "token_acc": 0.3625130713531254 }, { "epoch": 5.113163295221343, "grad_norm": 0.2948416511263344, "learning_rate": 0.00017775138273391417, "loss": 2.537633180618286, "step": 8723, "token_acc": 0.3691275516973161 }, { "epoch": 5.113749633538552, "grad_norm": 0.34246840772290155, "learning_rate": 0.00017774528734710644, "loss": 2.5742411613464355, "step": 8724, "token_acc": 0.3615608684284888 }, { "epoch": 5.114335971855761, "grad_norm": 0.30519530059903244, "learning_rate": 0.0001777391912299865, "loss": 2.603969097137451, "step": 8725, "token_acc": 0.35845489878674464 }, { "epoch": 5.11492231017297, "grad_norm": 0.3383554662467581, "learning_rate": 0.00017773309438261158, "loss": 2.5867156982421875, "step": 8726, "token_acc": 0.3613432339021906 }, { "epoch": 5.115508648490179, "grad_norm": 0.3114679358121218, "learning_rate": 0.00017772699680503902, "loss": 2.608220100402832, "step": 8727, "token_acc": 0.3577511502671921 }, { "epoch": 5.116094986807388, "grad_norm": 0.30103817547590217, "learning_rate": 0.00017772089849732602, "loss": 2.5776171684265137, "step": 8728, "token_acc": 0.36263352858922393 }, { "epoch": 5.116681325124597, "grad_norm": 0.31370309327297613, "learning_rate": 0.00017771479945952995, "loss": 2.6162428855895996, "step": 8729, "token_acc": 0.3558424654350257 }, { "epoch": 5.1172676634418055, "grad_norm": 0.29058125739962826, "learning_rate": 0.00017770869969170806, "loss": 2.5584490299224854, "step": 8730, "token_acc": 0.3662462359205193 }, { "epoch": 5.117854001759015, "grad_norm": 0.31493409034253317, "learning_rate": 0.00017770259919391764, "loss": 2.5770998001098633, "step": 8731, "token_acc": 0.3623192660158731 }, { "epoch": 5.118440340076224, "grad_norm": 0.3038403243885083, "learning_rate": 0.00017769649796621598, "loss": 2.5806236267089844, "step": 8732, "token_acc": 0.3616266885963227 }, { "epoch": 5.119026678393433, "grad_norm": 0.29478892089536085, "learning_rate": 0.00017769039600866048, "loss": 2.545267105102539, "step": 8733, "token_acc": 0.3689115357848509 }, { "epoch": 5.119613016710642, "grad_norm": 0.3171312202486041, "learning_rate": 0.00017768429332130835, "loss": 2.564260482788086, "step": 8734, "token_acc": 0.36449064385236885 }, { "epoch": 5.120199355027851, "grad_norm": 0.33122265835743825, "learning_rate": 0.000177678189904217, "loss": 2.5714006423950195, "step": 8735, "token_acc": 0.3638228935891728 }, { "epoch": 5.12078569334506, "grad_norm": 0.3101457697606461, "learning_rate": 0.00017767208575744368, "loss": 2.5785865783691406, "step": 8736, "token_acc": 0.3613113670413507 }, { "epoch": 5.121372031662269, "grad_norm": 0.2995124932122777, "learning_rate": 0.00017766598088104582, "loss": 2.5694262981414795, "step": 8737, "token_acc": 0.3641655666357351 }, { "epoch": 5.121958369979478, "grad_norm": 0.35917739514183444, "learning_rate": 0.0001776598752750807, "loss": 2.6244006156921387, "step": 8738, "token_acc": 0.3561718949247641 }, { "epoch": 5.1225447082966875, "grad_norm": 0.4305897999245486, "learning_rate": 0.00017765376893960573, "loss": 2.578237533569336, "step": 8739, "token_acc": 0.36265919693563503 }, { "epoch": 5.123131046613897, "grad_norm": 0.33349056553527523, "learning_rate": 0.00017764766187467824, "loss": 2.5885679721832275, "step": 8740, "token_acc": 0.3598215051917961 }, { "epoch": 5.123717384931105, "grad_norm": 0.33115078912184726, "learning_rate": 0.00017764155408035557, "loss": 2.5867807865142822, "step": 8741, "token_acc": 0.35987577606834437 }, { "epoch": 5.124303723248314, "grad_norm": 0.3572592378302158, "learning_rate": 0.00017763544555669512, "loss": 2.542055130004883, "step": 8742, "token_acc": 0.3676556543508734 }, { "epoch": 5.124890061565523, "grad_norm": 0.3053746428260833, "learning_rate": 0.0001776293363037543, "loss": 2.561729907989502, "step": 8743, "token_acc": 0.36527112461216915 }, { "epoch": 5.125476399882732, "grad_norm": 0.3921018654877444, "learning_rate": 0.00017762322632159045, "loss": 2.5706028938293457, "step": 8744, "token_acc": 0.3639261434277703 }, { "epoch": 5.126062738199941, "grad_norm": 0.2972537049353717, "learning_rate": 0.00017761711561026103, "loss": 2.562030792236328, "step": 8745, "token_acc": 0.3640063079556931 }, { "epoch": 5.12664907651715, "grad_norm": 0.31986615885829506, "learning_rate": 0.00017761100416982336, "loss": 2.5650229454040527, "step": 8746, "token_acc": 0.3642552491329643 }, { "epoch": 5.1272354148343595, "grad_norm": 0.28885815264670817, "learning_rate": 0.0001776048920003349, "loss": 2.5682590007781982, "step": 8747, "token_acc": 0.3644066170524787 }, { "epoch": 5.127821753151569, "grad_norm": 0.32369732605998486, "learning_rate": 0.00017759877910185302, "loss": 2.581594228744507, "step": 8748, "token_acc": 0.3608302674264765 }, { "epoch": 5.128408091468778, "grad_norm": 0.3195838715460639, "learning_rate": 0.0001775926654744352, "loss": 2.5679264068603516, "step": 8749, "token_acc": 0.3627362075743677 }, { "epoch": 5.128994429785987, "grad_norm": 0.3012263462150962, "learning_rate": 0.00017758655111813887, "loss": 2.5967321395874023, "step": 8750, "token_acc": 0.36043907906773 }, { "epoch": 5.129580768103196, "grad_norm": 0.32407003391363237, "learning_rate": 0.00017758043603302142, "loss": 2.563599109649658, "step": 8751, "token_acc": 0.36445512926400964 }, { "epoch": 5.130167106420404, "grad_norm": 0.3267574191454591, "learning_rate": 0.0001775743202191403, "loss": 2.5634918212890625, "step": 8752, "token_acc": 0.36589412014304085 }, { "epoch": 5.130753444737613, "grad_norm": 0.3287773192051845, "learning_rate": 0.000177568203676553, "loss": 2.5854454040527344, "step": 8753, "token_acc": 0.36014038985961017 }, { "epoch": 5.131339783054822, "grad_norm": 0.3249219670465738, "learning_rate": 0.00017756208640531696, "loss": 2.585137367248535, "step": 8754, "token_acc": 0.3610645870407202 }, { "epoch": 5.1319261213720315, "grad_norm": 0.2962890653235747, "learning_rate": 0.0001775559684054896, "loss": 2.542698621749878, "step": 8755, "token_acc": 0.3692114295252303 }, { "epoch": 5.132512459689241, "grad_norm": 0.32202942764946535, "learning_rate": 0.00017754984967712845, "loss": 2.552659034729004, "step": 8756, "token_acc": 0.3645587925957089 }, { "epoch": 5.13309879800645, "grad_norm": 0.3033781641562856, "learning_rate": 0.00017754373022029095, "loss": 2.561004161834717, "step": 8757, "token_acc": 0.3646738485815804 }, { "epoch": 5.133685136323659, "grad_norm": 0.295879427398369, "learning_rate": 0.0001775376100350346, "loss": 2.5950076580047607, "step": 8758, "token_acc": 0.3604670113146048 }, { "epoch": 5.134271474640868, "grad_norm": 0.2853022923052908, "learning_rate": 0.00017753148912141685, "loss": 2.5576281547546387, "step": 8759, "token_acc": 0.3652652020682325 }, { "epoch": 5.134857812958077, "grad_norm": 0.32569828329035605, "learning_rate": 0.0001775253674794953, "loss": 2.5961480140686035, "step": 8760, "token_acc": 0.35876917647436274 }, { "epoch": 5.135444151275286, "grad_norm": 0.3056343109718411, "learning_rate": 0.00017751924510932737, "loss": 2.54693865776062, "step": 8761, "token_acc": 0.36730257827270274 }, { "epoch": 5.136030489592494, "grad_norm": 0.31537849159211484, "learning_rate": 0.00017751312201097057, "loss": 2.595202922821045, "step": 8762, "token_acc": 0.36051313660528544 }, { "epoch": 5.1366168279097035, "grad_norm": 0.3241377418695354, "learning_rate": 0.00017750699818448244, "loss": 2.5486183166503906, "step": 8763, "token_acc": 0.3653852168473728 }, { "epoch": 5.137203166226913, "grad_norm": 0.29797362908082, "learning_rate": 0.00017750087362992053, "loss": 2.559046506881714, "step": 8764, "token_acc": 0.36490117648260817 }, { "epoch": 5.137789504544122, "grad_norm": 0.32222572259453336, "learning_rate": 0.0001774947483473423, "loss": 2.5956671237945557, "step": 8765, "token_acc": 0.35944647230051285 }, { "epoch": 5.138375842861331, "grad_norm": 0.30616928295049667, "learning_rate": 0.00017748862233680539, "loss": 2.5945863723754883, "step": 8766, "token_acc": 0.3596494927979417 }, { "epoch": 5.13896218117854, "grad_norm": 0.285442120245395, "learning_rate": 0.00017748249559836724, "loss": 2.583724021911621, "step": 8767, "token_acc": 0.36120989528158454 }, { "epoch": 5.139548519495749, "grad_norm": 0.31688485292108787, "learning_rate": 0.0001774763681320855, "loss": 2.5831120014190674, "step": 8768, "token_acc": 0.36177994192633817 }, { "epoch": 5.140134857812958, "grad_norm": 0.2938573762736049, "learning_rate": 0.00017747023993801766, "loss": 2.5536818504333496, "step": 8769, "token_acc": 0.3677388268997609 }, { "epoch": 5.140721196130167, "grad_norm": 0.2894950733824776, "learning_rate": 0.00017746411101622132, "loss": 2.6125760078430176, "step": 8770, "token_acc": 0.35678827392169044 }, { "epoch": 5.141307534447376, "grad_norm": 0.31760653282216933, "learning_rate": 0.00017745798136675403, "loss": 2.5950801372528076, "step": 8771, "token_acc": 0.35999667768050186 }, { "epoch": 5.1418938727645855, "grad_norm": 0.2935906730979065, "learning_rate": 0.00017745185098967336, "loss": 2.6049084663391113, "step": 8772, "token_acc": 0.35716771095640787 }, { "epoch": 5.142480211081795, "grad_norm": 0.2913134922172613, "learning_rate": 0.00017744571988503692, "loss": 2.5761826038360596, "step": 8773, "token_acc": 0.361777729049813 }, { "epoch": 5.143066549399003, "grad_norm": 0.34141863448886867, "learning_rate": 0.00017743958805290232, "loss": 2.5550057888031006, "step": 8774, "token_acc": 0.365270122979476 }, { "epoch": 5.143652887716212, "grad_norm": 0.3533291356073863, "learning_rate": 0.00017743345549332715, "loss": 2.564237594604492, "step": 8775, "token_acc": 0.3638222203292374 }, { "epoch": 5.144239226033421, "grad_norm": 0.30645873015559016, "learning_rate": 0.00017742732220636903, "loss": 2.5772275924682617, "step": 8776, "token_acc": 0.36307858683837363 }, { "epoch": 5.14482556435063, "grad_norm": 0.3115476745810554, "learning_rate": 0.00017742118819208549, "loss": 2.5988450050354004, "step": 8777, "token_acc": 0.3596710495986723 }, { "epoch": 5.145411902667839, "grad_norm": 0.30881392874079755, "learning_rate": 0.00017741505345053425, "loss": 2.556070327758789, "step": 8778, "token_acc": 0.3657164291072768 }, { "epoch": 5.145998240985048, "grad_norm": 0.2916190589256628, "learning_rate": 0.0001774089179817729, "loss": 2.571113109588623, "step": 8779, "token_acc": 0.36330131284864725 }, { "epoch": 5.1465845793022575, "grad_norm": 0.31259452400825366, "learning_rate": 0.00017740278178585904, "loss": 2.5893185138702393, "step": 8780, "token_acc": 0.36085242992117766 }, { "epoch": 5.147170917619467, "grad_norm": 0.33231795934897307, "learning_rate": 0.0001773966448628504, "loss": 2.555454969406128, "step": 8781, "token_acc": 0.3668878168061756 }, { "epoch": 5.147757255936676, "grad_norm": 0.3117555494702881, "learning_rate": 0.00017739050721280453, "loss": 2.598276138305664, "step": 8782, "token_acc": 0.3588670225604107 }, { "epoch": 5.148343594253885, "grad_norm": 0.3163486423367551, "learning_rate": 0.00017738436883577916, "loss": 2.582850694656372, "step": 8783, "token_acc": 0.36111750928454045 }, { "epoch": 5.148929932571093, "grad_norm": 0.330811759561692, "learning_rate": 0.00017737822973183193, "loss": 2.556408166885376, "step": 8784, "token_acc": 0.36490600189230327 }, { "epoch": 5.149516270888302, "grad_norm": 0.30144330573473926, "learning_rate": 0.0001773720899010205, "loss": 2.577692985534668, "step": 8785, "token_acc": 0.36193092025086554 }, { "epoch": 5.150102609205511, "grad_norm": 0.33176113659805917, "learning_rate": 0.00017736594934340252, "loss": 2.580148220062256, "step": 8786, "token_acc": 0.3612634119467982 }, { "epoch": 5.15068894752272, "grad_norm": 0.32186644720849794, "learning_rate": 0.00017735980805903568, "loss": 2.6092586517333984, "step": 8787, "token_acc": 0.35700732474799035 }, { "epoch": 5.1512752858399296, "grad_norm": 0.33291530727762836, "learning_rate": 0.00017735366604797772, "loss": 2.551642417907715, "step": 8788, "token_acc": 0.3665850532278243 }, { "epoch": 5.151861624157139, "grad_norm": 0.2965315835537992, "learning_rate": 0.00017734752331028633, "loss": 2.6178078651428223, "step": 8789, "token_acc": 0.3554844437963959 }, { "epoch": 5.152447962474348, "grad_norm": 0.30189433523374826, "learning_rate": 0.00017734137984601914, "loss": 2.5936150550842285, "step": 8790, "token_acc": 0.3599041554693602 }, { "epoch": 5.153034300791557, "grad_norm": 0.29855256049129747, "learning_rate": 0.00017733523565523392, "loss": 2.5673632621765137, "step": 8791, "token_acc": 0.36487070575554154 }, { "epoch": 5.153620639108766, "grad_norm": 0.30893572873219743, "learning_rate": 0.00017732909073798835, "loss": 2.5712413787841797, "step": 8792, "token_acc": 0.3629407022308646 }, { "epoch": 5.154206977425975, "grad_norm": 0.30286228945309135, "learning_rate": 0.0001773229450943402, "loss": 2.5810327529907227, "step": 8793, "token_acc": 0.36093151619455166 }, { "epoch": 5.154793315743184, "grad_norm": 0.306931201183645, "learning_rate": 0.0001773167987243472, "loss": 2.6005775928497314, "step": 8794, "token_acc": 0.35822223638879813 }, { "epoch": 5.1553796540603924, "grad_norm": 0.29258145459147067, "learning_rate": 0.000177310651628067, "loss": 2.5784718990325928, "step": 8795, "token_acc": 0.36182931297791593 }, { "epoch": 5.155965992377602, "grad_norm": 0.3141084331489179, "learning_rate": 0.00017730450380555742, "loss": 2.6128878593444824, "step": 8796, "token_acc": 0.35605835786299217 }, { "epoch": 5.156552330694811, "grad_norm": 0.31054693928880134, "learning_rate": 0.00017729835525687624, "loss": 2.5845413208007812, "step": 8797, "token_acc": 0.36081458367548036 }, { "epoch": 5.15713866901202, "grad_norm": 0.3728608191400777, "learning_rate": 0.00017729220598208115, "loss": 2.5960464477539062, "step": 8798, "token_acc": 0.3593628723862373 }, { "epoch": 5.157725007329229, "grad_norm": 0.3280141891409224, "learning_rate": 0.0001772860559812299, "loss": 2.5909268856048584, "step": 8799, "token_acc": 0.3592768122673003 }, { "epoch": 5.158311345646438, "grad_norm": 0.29396126943774803, "learning_rate": 0.00017727990525438035, "loss": 2.581202507019043, "step": 8800, "token_acc": 0.35971804380666594 }, { "epoch": 5.158897683963647, "grad_norm": 0.36120109212497215, "learning_rate": 0.0001772737538015902, "loss": 2.5689618587493896, "step": 8801, "token_acc": 0.36294901671409696 }, { "epoch": 5.159484022280856, "grad_norm": 0.36765344780659887, "learning_rate": 0.00017726760162291728, "loss": 2.5400218963623047, "step": 8802, "token_acc": 0.3680063062402447 }, { "epoch": 5.160070360598065, "grad_norm": 0.3100161329390006, "learning_rate": 0.00017726144871841934, "loss": 2.6082160472869873, "step": 8803, "token_acc": 0.3576020079403022 }, { "epoch": 5.1606566989152745, "grad_norm": 0.4177507104184087, "learning_rate": 0.0001772552950881542, "loss": 2.605971336364746, "step": 8804, "token_acc": 0.35833385595750655 }, { "epoch": 5.161243037232484, "grad_norm": 0.3990429071196709, "learning_rate": 0.0001772491407321797, "loss": 2.6020851135253906, "step": 8805, "token_acc": 0.3592093295133146 }, { "epoch": 5.161829375549692, "grad_norm": 0.33071246389774106, "learning_rate": 0.00017724298565055356, "loss": 2.573862075805664, "step": 8806, "token_acc": 0.36170981117565754 }, { "epoch": 5.162415713866901, "grad_norm": 0.450611524325082, "learning_rate": 0.0001772368298433337, "loss": 2.611372947692871, "step": 8807, "token_acc": 0.3566550355592254 }, { "epoch": 5.16300205218411, "grad_norm": 0.3021141315810496, "learning_rate": 0.00017723067331057787, "loss": 2.5904293060302734, "step": 8808, "token_acc": 0.36050947390329197 }, { "epoch": 5.163588390501319, "grad_norm": 0.430433773307704, "learning_rate": 0.00017722451605234397, "loss": 2.6565680503845215, "step": 8809, "token_acc": 0.34969225114791097 }, { "epoch": 5.164174728818528, "grad_norm": 0.2891182835747003, "learning_rate": 0.00017721835806868978, "loss": 2.6297552585601807, "step": 8810, "token_acc": 0.3548627483564077 }, { "epoch": 5.164761067135737, "grad_norm": 0.3943084610503009, "learning_rate": 0.0001772121993596732, "loss": 2.5754218101501465, "step": 8811, "token_acc": 0.3622248607178738 }, { "epoch": 5.1653474054529465, "grad_norm": 0.29385971396078264, "learning_rate": 0.00017720603992535204, "loss": 2.607400417327881, "step": 8812, "token_acc": 0.3582610137780738 }, { "epoch": 5.165933743770156, "grad_norm": 0.3803703001390518, "learning_rate": 0.00017719987976578413, "loss": 2.5949554443359375, "step": 8813, "token_acc": 0.3599474234100637 }, { "epoch": 5.166520082087365, "grad_norm": 0.2817534171566281, "learning_rate": 0.0001771937188810274, "loss": 2.5874056816101074, "step": 8814, "token_acc": 0.35904198157695777 }, { "epoch": 5.167106420404574, "grad_norm": 0.37286566017244677, "learning_rate": 0.00017718755727113973, "loss": 2.584026575088501, "step": 8815, "token_acc": 0.36073057010817544 }, { "epoch": 5.167692758721783, "grad_norm": 0.2767348433203727, "learning_rate": 0.00017718139493617894, "loss": 2.5618467330932617, "step": 8816, "token_acc": 0.3646959025242918 }, { "epoch": 5.168279097038991, "grad_norm": 0.31617510034402707, "learning_rate": 0.00017717523187620295, "loss": 2.576890230178833, "step": 8817, "token_acc": 0.36311764720596956 }, { "epoch": 5.1688654353562, "grad_norm": 0.2993267402690325, "learning_rate": 0.00017716906809126965, "loss": 2.6144163608551025, "step": 8818, "token_acc": 0.35484430319468313 }, { "epoch": 5.169451773673409, "grad_norm": 0.30884772192954396, "learning_rate": 0.00017716290358143696, "loss": 2.640666961669922, "step": 8819, "token_acc": 0.35258329384934994 }, { "epoch": 5.1700381119906185, "grad_norm": 0.30624398544873566, "learning_rate": 0.00017715673834676275, "loss": 2.5840091705322266, "step": 8820, "token_acc": 0.3616628905632541 }, { "epoch": 5.170624450307828, "grad_norm": 0.29040072899697433, "learning_rate": 0.000177150572387305, "loss": 2.5670838356018066, "step": 8821, "token_acc": 0.3621655671736207 }, { "epoch": 5.171210788625037, "grad_norm": 0.2942055327510666, "learning_rate": 0.00017714440570312153, "loss": 2.6134395599365234, "step": 8822, "token_acc": 0.3565820197345011 }, { "epoch": 5.171797126942246, "grad_norm": 0.28897145725059237, "learning_rate": 0.00017713823829427035, "loss": 2.556884765625, "step": 8823, "token_acc": 0.3666879684270477 }, { "epoch": 5.172383465259455, "grad_norm": 0.312512093815707, "learning_rate": 0.00017713207016080933, "loss": 2.616426944732666, "step": 8824, "token_acc": 0.35600277226084076 }, { "epoch": 5.172969803576664, "grad_norm": 0.2907733524965378, "learning_rate": 0.00017712590130279646, "loss": 2.5543813705444336, "step": 8825, "token_acc": 0.3653676071271561 }, { "epoch": 5.173556141893873, "grad_norm": 0.3118605923579699, "learning_rate": 0.00017711973172028972, "loss": 2.571045398712158, "step": 8826, "token_acc": 0.36339021488882 }, { "epoch": 5.174142480211081, "grad_norm": 0.29930764117352127, "learning_rate": 0.00017711356141334697, "loss": 2.593981981277466, "step": 8827, "token_acc": 0.3587100040703273 }, { "epoch": 5.1747288185282905, "grad_norm": 0.31468243985456906, "learning_rate": 0.00017710739038202624, "loss": 2.589202880859375, "step": 8828, "token_acc": 0.35976129798661804 }, { "epoch": 5.1753151568455, "grad_norm": 0.28822486603463027, "learning_rate": 0.00017710121862638548, "loss": 2.5864434242248535, "step": 8829, "token_acc": 0.3585260387290844 }, { "epoch": 5.175901495162709, "grad_norm": 0.2916709461688628, "learning_rate": 0.00017709504614648268, "loss": 2.575636148452759, "step": 8830, "token_acc": 0.3625404574772155 }, { "epoch": 5.176487833479918, "grad_norm": 0.29313010955158697, "learning_rate": 0.0001770888729423758, "loss": 2.638294219970703, "step": 8831, "token_acc": 0.35295063811834193 }, { "epoch": 5.177074171797127, "grad_norm": 0.2863528101969556, "learning_rate": 0.00017708269901412283, "loss": 2.594176769256592, "step": 8832, "token_acc": 0.3589286928149004 }, { "epoch": 5.177660510114336, "grad_norm": 0.3018080878559346, "learning_rate": 0.00017707652436178178, "loss": 2.571104049682617, "step": 8833, "token_acc": 0.36333263841001545 }, { "epoch": 5.178246848431545, "grad_norm": 0.27555627140482153, "learning_rate": 0.00017707034898541065, "loss": 2.596005916595459, "step": 8834, "token_acc": 0.35710715563555123 }, { "epoch": 5.178833186748754, "grad_norm": 0.29011786870315326, "learning_rate": 0.0001770641728850674, "loss": 2.590555191040039, "step": 8835, "token_acc": 0.360935813807727 }, { "epoch": 5.179419525065963, "grad_norm": 0.2978453480192051, "learning_rate": 0.00017705799606081016, "loss": 2.5733537673950195, "step": 8836, "token_acc": 0.3622193171331898 }, { "epoch": 5.1800058633831725, "grad_norm": 0.2882035133423909, "learning_rate": 0.00017705181851269687, "loss": 2.589637279510498, "step": 8837, "token_acc": 0.3615395762220648 }, { "epoch": 5.180592201700381, "grad_norm": 0.2946360984321885, "learning_rate": 0.00017704564024078554, "loss": 2.6022963523864746, "step": 8838, "token_acc": 0.3582858418195219 }, { "epoch": 5.18117854001759, "grad_norm": 0.27955000171512157, "learning_rate": 0.00017703946124513425, "loss": 2.5820934772491455, "step": 8839, "token_acc": 0.3621006415868494 }, { "epoch": 5.181764878334799, "grad_norm": 0.3066909011153294, "learning_rate": 0.00017703328152580104, "loss": 2.5948734283447266, "step": 8840, "token_acc": 0.3582165529612342 }, { "epoch": 5.182351216652008, "grad_norm": 0.30894457038666073, "learning_rate": 0.00017702710108284396, "loss": 2.569955825805664, "step": 8841, "token_acc": 0.36253553005579536 }, { "epoch": 5.182937554969217, "grad_norm": 0.3286767495803109, "learning_rate": 0.00017702091991632102, "loss": 2.62277889251709, "step": 8842, "token_acc": 0.35459366717591206 }, { "epoch": 5.183523893286426, "grad_norm": 0.28186909976088925, "learning_rate": 0.00017701473802629036, "loss": 2.647202968597412, "step": 8843, "token_acc": 0.35108093479368313 }, { "epoch": 5.184110231603635, "grad_norm": 0.33795708720267514, "learning_rate": 0.00017700855541281002, "loss": 2.604660987854004, "step": 8844, "token_acc": 0.3571048115050009 }, { "epoch": 5.1846965699208445, "grad_norm": 0.35268808542920493, "learning_rate": 0.0001770023720759381, "loss": 2.6267058849334717, "step": 8845, "token_acc": 0.35550304476913713 }, { "epoch": 5.185282908238054, "grad_norm": 0.3063645011330831, "learning_rate": 0.0001769961880157326, "loss": 2.568591356277466, "step": 8846, "token_acc": 0.36352294739779245 }, { "epoch": 5.185869246555263, "grad_norm": 0.30271917779320245, "learning_rate": 0.0001769900032322517, "loss": 2.573857307434082, "step": 8847, "token_acc": 0.3631904065127848 }, { "epoch": 5.186455584872472, "grad_norm": 0.33362757904837365, "learning_rate": 0.00017698381772555344, "loss": 2.576220989227295, "step": 8848, "token_acc": 0.3620993745386737 }, { "epoch": 5.18704192318968, "grad_norm": 0.36497869626235757, "learning_rate": 0.00017697763149569594, "loss": 2.564157009124756, "step": 8849, "token_acc": 0.36568953268392107 }, { "epoch": 5.187628261506889, "grad_norm": 0.2918568341756316, "learning_rate": 0.00017697144454273736, "loss": 2.6090922355651855, "step": 8850, "token_acc": 0.3575259054652645 }, { "epoch": 5.188214599824098, "grad_norm": 0.34911531373807103, "learning_rate": 0.00017696525686673576, "loss": 2.5933568477630615, "step": 8851, "token_acc": 0.35864319742622036 }, { "epoch": 5.188800938141307, "grad_norm": 0.35652312535143926, "learning_rate": 0.0001769590684677493, "loss": 2.5977320671081543, "step": 8852, "token_acc": 0.35852609281221803 }, { "epoch": 5.1893872764585165, "grad_norm": 0.2866128622186048, "learning_rate": 0.00017695287934583605, "loss": 2.5706992149353027, "step": 8853, "token_acc": 0.36342019509511414 }, { "epoch": 5.189973614775726, "grad_norm": 0.3137836019576745, "learning_rate": 0.00017694668950105427, "loss": 2.5903472900390625, "step": 8854, "token_acc": 0.36089131872686003 }, { "epoch": 5.190559953092935, "grad_norm": 0.2860848218727202, "learning_rate": 0.00017694049893346198, "loss": 2.6254711151123047, "step": 8855, "token_acc": 0.35468721538936715 }, { "epoch": 5.191146291410144, "grad_norm": 0.30521531759588744, "learning_rate": 0.00017693430764311737, "loss": 2.6389904022216797, "step": 8856, "token_acc": 0.3527080231245728 }, { "epoch": 5.191732629727353, "grad_norm": 0.2848142272217059, "learning_rate": 0.00017692811563007862, "loss": 2.5652620792388916, "step": 8857, "token_acc": 0.36416562497490573 }, { "epoch": 5.192318968044562, "grad_norm": 0.29476194500343117, "learning_rate": 0.0001769219228944039, "loss": 2.583707809448242, "step": 8858, "token_acc": 0.361143427393411 }, { "epoch": 5.192905306361771, "grad_norm": 0.28541014284583993, "learning_rate": 0.00017691572943615138, "loss": 2.606368064880371, "step": 8859, "token_acc": 0.35718983510170443 }, { "epoch": 5.193491644678979, "grad_norm": 0.2955340961949114, "learning_rate": 0.0001769095352553792, "loss": 2.570476531982422, "step": 8860, "token_acc": 0.36220984633551995 }, { "epoch": 5.1940779829961885, "grad_norm": 0.2969794728079262, "learning_rate": 0.00017690334035214557, "loss": 2.607548475265503, "step": 8861, "token_acc": 0.357132392816846 }, { "epoch": 5.194664321313398, "grad_norm": 0.28862586173203547, "learning_rate": 0.0001768971447265087, "loss": 2.635871648788452, "step": 8862, "token_acc": 0.3509638447188416 }, { "epoch": 5.195250659630607, "grad_norm": 0.32862825027999903, "learning_rate": 0.00017689094837852677, "loss": 2.5731353759765625, "step": 8863, "token_acc": 0.36267274232784913 }, { "epoch": 5.195836997947816, "grad_norm": 0.29088187435086743, "learning_rate": 0.000176884751308258, "loss": 2.6089284420013428, "step": 8864, "token_acc": 0.35545232261749066 }, { "epoch": 5.196423336265025, "grad_norm": 0.2874926762518735, "learning_rate": 0.00017687855351576057, "loss": 2.601158618927002, "step": 8865, "token_acc": 0.356766678138264 }, { "epoch": 5.197009674582234, "grad_norm": 0.31700415639942564, "learning_rate": 0.00017687235500109277, "loss": 2.6067328453063965, "step": 8866, "token_acc": 0.35643780853915474 }, { "epoch": 5.197596012899443, "grad_norm": 0.30873287677401, "learning_rate": 0.00017686615576431274, "loss": 2.59761905670166, "step": 8867, "token_acc": 0.3590050524268561 }, { "epoch": 5.198182351216652, "grad_norm": 0.2928075524909576, "learning_rate": 0.00017685995580547879, "loss": 2.578472375869751, "step": 8868, "token_acc": 0.36317113871745293 }, { "epoch": 5.198768689533861, "grad_norm": 0.28916122797735777, "learning_rate": 0.00017685375512464907, "loss": 2.6061248779296875, "step": 8869, "token_acc": 0.3570543892905847 }, { "epoch": 5.19935502785107, "grad_norm": 0.2872368567456281, "learning_rate": 0.00017684755372188193, "loss": 2.603356122970581, "step": 8870, "token_acc": 0.35796114028741405 }, { "epoch": 5.199941366168279, "grad_norm": 0.2870500951548041, "learning_rate": 0.00017684135159723555, "loss": 2.618990421295166, "step": 8871, "token_acc": 0.35474290243958934 }, { "epoch": 5.200527704485488, "grad_norm": 0.2864276564424642, "learning_rate": 0.00017683514875076824, "loss": 2.593994140625, "step": 8872, "token_acc": 0.3575860782527263 }, { "epoch": 5.201114042802697, "grad_norm": 0.30050104589767135, "learning_rate": 0.00017682894518253824, "loss": 2.6247172355651855, "step": 8873, "token_acc": 0.3544306401713776 }, { "epoch": 5.201700381119906, "grad_norm": 0.31646686185320194, "learning_rate": 0.00017682274089260381, "loss": 2.574474334716797, "step": 8874, "token_acc": 0.36330769007307406 }, { "epoch": 5.202286719437115, "grad_norm": 0.3041857573712074, "learning_rate": 0.00017681653588102328, "loss": 2.611551523208618, "step": 8875, "token_acc": 0.3572504070717219 }, { "epoch": 5.202873057754324, "grad_norm": 0.2862174831405846, "learning_rate": 0.0001768103301478549, "loss": 2.610865354537964, "step": 8876, "token_acc": 0.3547432254344782 }, { "epoch": 5.203459396071533, "grad_norm": 0.3188465053380257, "learning_rate": 0.00017680412369315692, "loss": 2.581068277359009, "step": 8877, "token_acc": 0.361868049689842 }, { "epoch": 5.2040457343887425, "grad_norm": 0.3717357775555192, "learning_rate": 0.00017679791651698776, "loss": 2.5799295902252197, "step": 8878, "token_acc": 0.3610729059350797 }, { "epoch": 5.204632072705952, "grad_norm": 0.3597770921147461, "learning_rate": 0.00017679170861940562, "loss": 2.5837433338165283, "step": 8879, "token_acc": 0.3607960022719101 }, { "epoch": 5.205218411023161, "grad_norm": 0.296606164147987, "learning_rate": 0.00017678550000046887, "loss": 2.591031074523926, "step": 8880, "token_acc": 0.3598082188928337 }, { "epoch": 5.205804749340369, "grad_norm": 0.3167138416859688, "learning_rate": 0.00017677929066023583, "loss": 2.581468105316162, "step": 8881, "token_acc": 0.36107384298758644 }, { "epoch": 5.206391087657578, "grad_norm": 0.3184406849672734, "learning_rate": 0.0001767730805987648, "loss": 2.59531569480896, "step": 8882, "token_acc": 0.3606766693542849 }, { "epoch": 5.206977425974787, "grad_norm": 0.29010467295338416, "learning_rate": 0.00017676686981611415, "loss": 2.60360050201416, "step": 8883, "token_acc": 0.35864521591871296 }, { "epoch": 5.207563764291996, "grad_norm": 0.3588415896014432, "learning_rate": 0.00017676065831234217, "loss": 2.6245317459106445, "step": 8884, "token_acc": 0.3544902608373391 }, { "epoch": 5.208150102609205, "grad_norm": 0.32009161546581305, "learning_rate": 0.00017675444608750723, "loss": 2.6115505695343018, "step": 8885, "token_acc": 0.35673445774794477 }, { "epoch": 5.2087364409264145, "grad_norm": 0.29866476570579265, "learning_rate": 0.00017674823314166776, "loss": 2.5770421028137207, "step": 8886, "token_acc": 0.3622060242799817 }, { "epoch": 5.209322779243624, "grad_norm": 0.3093734670709904, "learning_rate": 0.00017674201947488202, "loss": 2.6146249771118164, "step": 8887, "token_acc": 0.3581273560657985 }, { "epoch": 5.209909117560833, "grad_norm": 0.30152675815007357, "learning_rate": 0.00017673580508720843, "loss": 2.6202807426452637, "step": 8888, "token_acc": 0.35608605830429246 }, { "epoch": 5.210495455878042, "grad_norm": 0.37814267926313366, "learning_rate": 0.00017672958997870533, "loss": 2.572105884552002, "step": 8889, "token_acc": 0.3629826109470191 }, { "epoch": 5.211081794195251, "grad_norm": 0.3292304444912722, "learning_rate": 0.00017672337414943113, "loss": 2.6440553665161133, "step": 8890, "token_acc": 0.35229318054058517 }, { "epoch": 5.21166813251246, "grad_norm": 0.3034593574859272, "learning_rate": 0.00017671715759944422, "loss": 2.6002702713012695, "step": 8891, "token_acc": 0.3573376683410891 }, { "epoch": 5.212254470829668, "grad_norm": 0.32872101784817487, "learning_rate": 0.00017671094032880303, "loss": 2.6020944118499756, "step": 8892, "token_acc": 0.35633485014670335 }, { "epoch": 5.212840809146877, "grad_norm": 0.30040987379298434, "learning_rate": 0.00017670472233756587, "loss": 2.6131582260131836, "step": 8893, "token_acc": 0.35709080695208845 }, { "epoch": 5.2134271474640865, "grad_norm": 0.27812740170405953, "learning_rate": 0.00017669850362579123, "loss": 2.6491641998291016, "step": 8894, "token_acc": 0.35002195191239893 }, { "epoch": 5.214013485781296, "grad_norm": 0.3040233825752894, "learning_rate": 0.0001766922841935375, "loss": 2.606353521347046, "step": 8895, "token_acc": 0.35625909656073784 }, { "epoch": 5.214599824098505, "grad_norm": 0.2886028500828905, "learning_rate": 0.00017668606404086312, "loss": 2.6165714263916016, "step": 8896, "token_acc": 0.353889548075522 }, { "epoch": 5.215186162415714, "grad_norm": 0.351504967624042, "learning_rate": 0.0001766798431678265, "loss": 2.586872100830078, "step": 8897, "token_acc": 0.36083460106100385 }, { "epoch": 5.215772500732923, "grad_norm": 0.3114049150986594, "learning_rate": 0.00017667362157448605, "loss": 2.5593318939208984, "step": 8898, "token_acc": 0.3648464233338765 }, { "epoch": 5.216358839050132, "grad_norm": 0.32620783118979163, "learning_rate": 0.00017666739926090028, "loss": 2.607499122619629, "step": 8899, "token_acc": 0.35629975221315047 }, { "epoch": 5.216945177367341, "grad_norm": 0.30508239708023666, "learning_rate": 0.00017666117622712758, "loss": 2.579393148422241, "step": 8900, "token_acc": 0.36147835898746034 }, { "epoch": 5.21753151568455, "grad_norm": 0.31512461675205666, "learning_rate": 0.00017665495247322642, "loss": 2.58910870552063, "step": 8901, "token_acc": 0.3610934162221625 }, { "epoch": 5.218117854001759, "grad_norm": 0.3331883047453537, "learning_rate": 0.00017664872799925534, "loss": 2.5961084365844727, "step": 8902, "token_acc": 0.3587714874122641 }, { "epoch": 5.218704192318968, "grad_norm": 0.3050923596830108, "learning_rate": 0.00017664250280527267, "loss": 2.617009162902832, "step": 8903, "token_acc": 0.35557874902549336 }, { "epoch": 5.219290530636177, "grad_norm": 0.36307336990497646, "learning_rate": 0.000176636276891337, "loss": 2.5787949562072754, "step": 8904, "token_acc": 0.36222543912020566 }, { "epoch": 5.219876868953386, "grad_norm": 0.29439756799236233, "learning_rate": 0.00017663005025750676, "loss": 2.621690273284912, "step": 8905, "token_acc": 0.35518129927726677 }, { "epoch": 5.220463207270595, "grad_norm": 0.3206589952322598, "learning_rate": 0.00017662382290384047, "loss": 2.614262104034424, "step": 8906, "token_acc": 0.35550557153984325 }, { "epoch": 5.221049545587804, "grad_norm": 0.290530713150827, "learning_rate": 0.00017661759483039664, "loss": 2.612776756286621, "step": 8907, "token_acc": 0.35589317746435584 }, { "epoch": 5.221635883905013, "grad_norm": 0.34661912581747684, "learning_rate": 0.00017661136603723372, "loss": 2.6172826290130615, "step": 8908, "token_acc": 0.3540361247947455 }, { "epoch": 5.222222222222222, "grad_norm": 0.29911326096454727, "learning_rate": 0.00017660513652441027, "loss": 2.6312520503997803, "step": 8909, "token_acc": 0.3538855231229024 }, { "epoch": 5.222808560539431, "grad_norm": 0.35431828256202935, "learning_rate": 0.00017659890629198477, "loss": 2.629631757736206, "step": 8910, "token_acc": 0.35414256984171644 }, { "epoch": 5.2233948988566405, "grad_norm": 0.3186983119913448, "learning_rate": 0.00017659267534001578, "loss": 2.5746936798095703, "step": 8911, "token_acc": 0.3635192314630136 }, { "epoch": 5.22398123717385, "grad_norm": 0.3242624801407607, "learning_rate": 0.00017658644366856178, "loss": 2.6207313537597656, "step": 8912, "token_acc": 0.35678353936952406 }, { "epoch": 5.224567575491059, "grad_norm": 0.3494445124773035, "learning_rate": 0.00017658021127768135, "loss": 2.6116943359375, "step": 8913, "token_acc": 0.3559525710740102 }, { "epoch": 5.225153913808267, "grad_norm": 0.2867130796835485, "learning_rate": 0.00017657397816743306, "loss": 2.574814558029175, "step": 8914, "token_acc": 0.3633493914144188 }, { "epoch": 5.225740252125476, "grad_norm": 0.3653283472460645, "learning_rate": 0.00017656774433787544, "loss": 2.633619546890259, "step": 8915, "token_acc": 0.35268917986190523 }, { "epoch": 5.226326590442685, "grad_norm": 0.31533882348416115, "learning_rate": 0.000176561509789067, "loss": 2.603806972503662, "step": 8916, "token_acc": 0.35738972272445113 }, { "epoch": 5.226912928759894, "grad_norm": 0.3077074753000578, "learning_rate": 0.00017655527452106634, "loss": 2.5943570137023926, "step": 8917, "token_acc": 0.35786196600948844 }, { "epoch": 5.227499267077103, "grad_norm": 0.29959809495308665, "learning_rate": 0.0001765490385339321, "loss": 2.6193180084228516, "step": 8918, "token_acc": 0.35545160294751554 }, { "epoch": 5.2280856053943126, "grad_norm": 0.3205902709839929, "learning_rate": 0.00017654280182772273, "loss": 2.6344871520996094, "step": 8919, "token_acc": 0.35142441415224385 }, { "epoch": 5.228671943711522, "grad_norm": 0.30876965397672074, "learning_rate": 0.0001765365644024969, "loss": 2.6367740631103516, "step": 8920, "token_acc": 0.3536425476682808 }, { "epoch": 5.229258282028731, "grad_norm": 0.2831767469074054, "learning_rate": 0.00017653032625831316, "loss": 2.608541488647461, "step": 8921, "token_acc": 0.3571820719483057 }, { "epoch": 5.22984462034594, "grad_norm": 0.2817990266436051, "learning_rate": 0.00017652408739523016, "loss": 2.6234054565429688, "step": 8922, "token_acc": 0.3560192886773779 }, { "epoch": 5.230430958663149, "grad_norm": 0.2711107440558047, "learning_rate": 0.00017651784781330646, "loss": 2.607964038848877, "step": 8923, "token_acc": 0.35711216597782053 }, { "epoch": 5.231017296980358, "grad_norm": 0.3003357545094554, "learning_rate": 0.0001765116075126007, "loss": 2.638132333755493, "step": 8924, "token_acc": 0.35305141192722445 }, { "epoch": 5.231603635297566, "grad_norm": 0.2898127458216129, "learning_rate": 0.00017650536649317148, "loss": 2.6328983306884766, "step": 8925, "token_acc": 0.3540484888741282 }, { "epoch": 5.2321899736147754, "grad_norm": 0.3086776949497406, "learning_rate": 0.00017649912475507744, "loss": 2.5847232341766357, "step": 8926, "token_acc": 0.36117247465967744 }, { "epoch": 5.232776311931985, "grad_norm": 0.2773391206690136, "learning_rate": 0.00017649288229837722, "loss": 2.593325614929199, "step": 8927, "token_acc": 0.35931855840408705 }, { "epoch": 5.233362650249194, "grad_norm": 0.3068693167303299, "learning_rate": 0.00017648663912312942, "loss": 2.5795040130615234, "step": 8928, "token_acc": 0.36141258824166167 }, { "epoch": 5.233948988566403, "grad_norm": 0.31083938762054275, "learning_rate": 0.00017648039522939272, "loss": 2.614588737487793, "step": 8929, "token_acc": 0.3560164822692763 }, { "epoch": 5.234535326883612, "grad_norm": 0.2985515531873977, "learning_rate": 0.00017647415061722575, "loss": 2.598916530609131, "step": 8930, "token_acc": 0.3579850153377618 }, { "epoch": 5.235121665200821, "grad_norm": 0.2796026369684912, "learning_rate": 0.00017646790528668722, "loss": 2.585221290588379, "step": 8931, "token_acc": 0.36120348832645627 }, { "epoch": 5.23570800351803, "grad_norm": 0.28737463006291303, "learning_rate": 0.00017646165923783572, "loss": 2.5958662033081055, "step": 8932, "token_acc": 0.3610374016617763 }, { "epoch": 5.236294341835239, "grad_norm": 0.29868012936335064, "learning_rate": 0.00017645541247073002, "loss": 2.647989511489868, "step": 8933, "token_acc": 0.35169541810963306 }, { "epoch": 5.236880680152448, "grad_norm": 0.28741195022807337, "learning_rate": 0.00017644916498542868, "loss": 2.6246585845947266, "step": 8934, "token_acc": 0.3543490921853422 }, { "epoch": 5.237467018469657, "grad_norm": 0.3024716302951172, "learning_rate": 0.0001764429167819905, "loss": 2.6007468700408936, "step": 8935, "token_acc": 0.3576647541763084 }, { "epoch": 5.238053356786866, "grad_norm": 0.31823492549558297, "learning_rate": 0.00017643666786047412, "loss": 2.591785430908203, "step": 8936, "token_acc": 0.35987065225964193 }, { "epoch": 5.238639695104075, "grad_norm": 0.29093448477113826, "learning_rate": 0.00017643041822093823, "loss": 2.5966320037841797, "step": 8937, "token_acc": 0.35865443847313977 }, { "epoch": 5.239226033421284, "grad_norm": 0.3602176128493178, "learning_rate": 0.00017642416786344155, "loss": 2.60294508934021, "step": 8938, "token_acc": 0.35823805012264603 }, { "epoch": 5.239812371738493, "grad_norm": 0.2863918895449606, "learning_rate": 0.00017641791678804284, "loss": 2.643946409225464, "step": 8939, "token_acc": 0.35186155596170804 }, { "epoch": 5.240398710055702, "grad_norm": 0.3250231607642417, "learning_rate": 0.0001764116649948007, "loss": 2.5882511138916016, "step": 8940, "token_acc": 0.361252967463573 }, { "epoch": 5.240985048372911, "grad_norm": 0.3373707245106521, "learning_rate": 0.000176405412483774, "loss": 2.576939344406128, "step": 8941, "token_acc": 0.36355262645788194 }, { "epoch": 5.24157138669012, "grad_norm": 0.28231517263118694, "learning_rate": 0.00017639915925502138, "loss": 2.6037158966064453, "step": 8942, "token_acc": 0.35765800556300653 }, { "epoch": 5.2421577250073295, "grad_norm": 0.44315721684516396, "learning_rate": 0.00017639290530860162, "loss": 2.593046188354492, "step": 8943, "token_acc": 0.36015892780453285 }, { "epoch": 5.242744063324539, "grad_norm": 0.3456474183860975, "learning_rate": 0.00017638665064457343, "loss": 2.6263513565063477, "step": 8944, "token_acc": 0.35402143321065166 }, { "epoch": 5.243330401641748, "grad_norm": 0.33997836123243064, "learning_rate": 0.0001763803952629956, "loss": 2.622840404510498, "step": 8945, "token_acc": 0.3549798662726088 }, { "epoch": 5.243916739958956, "grad_norm": 0.39243144263794494, "learning_rate": 0.00017637413916392689, "loss": 2.602046012878418, "step": 8946, "token_acc": 0.3579908845589954 }, { "epoch": 5.244503078276165, "grad_norm": 0.28812177340128425, "learning_rate": 0.00017636788234742605, "loss": 2.586763858795166, "step": 8947, "token_acc": 0.3614600878751822 }, { "epoch": 5.245089416593374, "grad_norm": 0.31665677809463044, "learning_rate": 0.00017636162481355188, "loss": 2.6205544471740723, "step": 8948, "token_acc": 0.35475883624407617 }, { "epoch": 5.245675754910583, "grad_norm": 0.28631333361964334, "learning_rate": 0.00017635536656236312, "loss": 2.600924491882324, "step": 8949, "token_acc": 0.35767624254346186 }, { "epoch": 5.246262093227792, "grad_norm": 0.3416097768998052, "learning_rate": 0.00017634910759391857, "loss": 2.590449333190918, "step": 8950, "token_acc": 0.35946761632975355 }, { "epoch": 5.2468484315450015, "grad_norm": 0.2864851848347396, "learning_rate": 0.00017634284790827705, "loss": 2.591109275817871, "step": 8951, "token_acc": 0.3590155204883434 }, { "epoch": 5.247434769862211, "grad_norm": 0.32009030196872096, "learning_rate": 0.00017633658750549738, "loss": 2.631446123123169, "step": 8952, "token_acc": 0.3518361376765615 }, { "epoch": 5.24802110817942, "grad_norm": 0.30715287919072826, "learning_rate": 0.00017633032638563828, "loss": 2.626551628112793, "step": 8953, "token_acc": 0.35466651493952034 }, { "epoch": 5.248607446496629, "grad_norm": 0.30191879835284363, "learning_rate": 0.00017632406454875867, "loss": 2.6372604370117188, "step": 8954, "token_acc": 0.3513582966226138 }, { "epoch": 5.249193784813838, "grad_norm": 0.285357260078069, "learning_rate": 0.0001763178019949173, "loss": 2.6059539318084717, "step": 8955, "token_acc": 0.3571936905836741 }, { "epoch": 5.249780123131047, "grad_norm": 0.298223730356294, "learning_rate": 0.000176311538724173, "loss": 2.639087677001953, "step": 8956, "token_acc": 0.35286021741216467 }, { "epoch": 5.250366461448255, "grad_norm": 0.2866783997004973, "learning_rate": 0.00017630527473658464, "loss": 2.562894344329834, "step": 8957, "token_acc": 0.36472233722942343 }, { "epoch": 5.250952799765464, "grad_norm": 0.2944375517389073, "learning_rate": 0.00017629901003221105, "loss": 2.5701937675476074, "step": 8958, "token_acc": 0.36303990255988505 }, { "epoch": 5.2515391380826735, "grad_norm": 0.28913217326901264, "learning_rate": 0.00017629274461111106, "loss": 2.6354010105133057, "step": 8959, "token_acc": 0.3527658874318681 }, { "epoch": 5.252125476399883, "grad_norm": 0.287896308732696, "learning_rate": 0.00017628647847334357, "loss": 2.5816168785095215, "step": 8960, "token_acc": 0.36249897605329984 }, { "epoch": 5.252711814717092, "grad_norm": 0.2836656923185974, "learning_rate": 0.00017628021161896743, "loss": 2.6121528148651123, "step": 8961, "token_acc": 0.3566316127229255 }, { "epoch": 5.253298153034301, "grad_norm": 0.28745988300989717, "learning_rate": 0.00017627394404804144, "loss": 2.6017661094665527, "step": 8962, "token_acc": 0.3586734639633414 }, { "epoch": 5.25388449135151, "grad_norm": 0.2831945492417219, "learning_rate": 0.00017626767576062454, "loss": 2.594233274459839, "step": 8963, "token_acc": 0.35934348160625534 }, { "epoch": 5.254470829668719, "grad_norm": 0.27586738894755025, "learning_rate": 0.0001762614067567756, "loss": 2.6412806510925293, "step": 8964, "token_acc": 0.3505909246886417 }, { "epoch": 5.255057167985928, "grad_norm": 0.2944956123430574, "learning_rate": 0.0001762551370365535, "loss": 2.5941548347473145, "step": 8965, "token_acc": 0.35867293424936464 }, { "epoch": 5.255643506303137, "grad_norm": 0.29989593621063293, "learning_rate": 0.00017624886660001717, "loss": 2.596522569656372, "step": 8966, "token_acc": 0.35902004214583105 }, { "epoch": 5.256229844620346, "grad_norm": 0.3287599683252191, "learning_rate": 0.00017624259544722545, "loss": 2.6046249866485596, "step": 8967, "token_acc": 0.3566307727366786 }, { "epoch": 5.256816182937555, "grad_norm": 0.29633392261532654, "learning_rate": 0.00017623632357823728, "loss": 2.6302382946014404, "step": 8968, "token_acc": 0.35277176887591544 }, { "epoch": 5.257402521254764, "grad_norm": 0.2964524953036852, "learning_rate": 0.00017623005099311163, "loss": 2.611330986022949, "step": 8969, "token_acc": 0.3556174189699742 }, { "epoch": 5.257988859571973, "grad_norm": 0.277834700365497, "learning_rate": 0.00017622377769190736, "loss": 2.6310195922851562, "step": 8970, "token_acc": 0.35374237459568236 }, { "epoch": 5.258575197889182, "grad_norm": 0.28835244474603333, "learning_rate": 0.00017621750367468337, "loss": 2.627258539199829, "step": 8971, "token_acc": 0.3537943932833242 }, { "epoch": 5.259161536206391, "grad_norm": 0.29280553245944724, "learning_rate": 0.0001762112289414987, "loss": 2.5902390480041504, "step": 8972, "token_acc": 0.3596100316008875 }, { "epoch": 5.2597478745236, "grad_norm": 0.3354383506640203, "learning_rate": 0.00017620495349241218, "loss": 2.6468722820281982, "step": 8973, "token_acc": 0.34950138412614484 }, { "epoch": 5.260334212840809, "grad_norm": 0.3140280351005481, "learning_rate": 0.0001761986773274828, "loss": 2.637523651123047, "step": 8974, "token_acc": 0.3513550086135558 }, { "epoch": 5.260920551158018, "grad_norm": 0.3033298037475534, "learning_rate": 0.00017619240044676953, "loss": 2.5959901809692383, "step": 8975, "token_acc": 0.3579761280376357 }, { "epoch": 5.2615068894752275, "grad_norm": 0.2963409896929546, "learning_rate": 0.00017618612285033136, "loss": 2.604379177093506, "step": 8976, "token_acc": 0.35653769317558404 }, { "epoch": 5.262093227792437, "grad_norm": 0.35757397159455806, "learning_rate": 0.00017617984453822722, "loss": 2.6063408851623535, "step": 8977, "token_acc": 0.35741926350065284 }, { "epoch": 5.262679566109645, "grad_norm": 0.3774046014096985, "learning_rate": 0.00017617356551051608, "loss": 2.6097888946533203, "step": 8978, "token_acc": 0.3566665036536368 }, { "epoch": 5.263265904426854, "grad_norm": 0.2951899144061072, "learning_rate": 0.00017616728576725694, "loss": 2.6205573081970215, "step": 8979, "token_acc": 0.3563465972327535 }, { "epoch": 5.263852242744063, "grad_norm": 0.3615111545493343, "learning_rate": 0.0001761610053085088, "loss": 2.6179280281066895, "step": 8980, "token_acc": 0.35593678770626974 }, { "epoch": 5.264438581061272, "grad_norm": 0.3531519598512695, "learning_rate": 0.00017615472413433063, "loss": 2.628978967666626, "step": 8981, "token_acc": 0.35328794595735696 }, { "epoch": 5.265024919378481, "grad_norm": 0.30846183604250915, "learning_rate": 0.00017614844224478145, "loss": 2.5970637798309326, "step": 8982, "token_acc": 0.3594998813009417 }, { "epoch": 5.26561125769569, "grad_norm": 0.37615389290053786, "learning_rate": 0.00017614215963992027, "loss": 2.6035749912261963, "step": 8983, "token_acc": 0.357612877233912 }, { "epoch": 5.2661975960128995, "grad_norm": 0.3173799132123656, "learning_rate": 0.0001761358763198061, "loss": 2.6065468788146973, "step": 8984, "token_acc": 0.35802802353374735 }, { "epoch": 5.266783934330109, "grad_norm": 0.3552028029603691, "learning_rate": 0.00017612959228449797, "loss": 2.6264853477478027, "step": 8985, "token_acc": 0.35360447363096226 }, { "epoch": 5.267370272647318, "grad_norm": 0.31518225251382354, "learning_rate": 0.00017612330753405487, "loss": 2.6327805519104004, "step": 8986, "token_acc": 0.3528630884923337 }, { "epoch": 5.267956610964527, "grad_norm": 0.3565223622397148, "learning_rate": 0.00017611702206853593, "loss": 2.595665216445923, "step": 8987, "token_acc": 0.35884508795627884 }, { "epoch": 5.268542949281736, "grad_norm": 0.305227863163778, "learning_rate": 0.0001761107358880001, "loss": 2.6159262657165527, "step": 8988, "token_acc": 0.355600270017655 }, { "epoch": 5.269129287598945, "grad_norm": 0.32285729463743773, "learning_rate": 0.0001761044489925065, "loss": 2.6152122020721436, "step": 8989, "token_acc": 0.3556207182211469 }, { "epoch": 5.269715625916153, "grad_norm": 0.3217858554922761, "learning_rate": 0.00017609816138211413, "loss": 2.6016697883605957, "step": 8990, "token_acc": 0.3580617573739469 }, { "epoch": 5.270301964233362, "grad_norm": 0.2884168319953974, "learning_rate": 0.00017609187305688207, "loss": 2.605201482772827, "step": 8991, "token_acc": 0.35689749502227175 }, { "epoch": 5.2708883025505715, "grad_norm": 0.31755308185400694, "learning_rate": 0.0001760855840168694, "loss": 2.611504077911377, "step": 8992, "token_acc": 0.3567775736828922 }, { "epoch": 5.271474640867781, "grad_norm": 0.28539060509850916, "learning_rate": 0.0001760792942621352, "loss": 2.585408926010132, "step": 8993, "token_acc": 0.3617779878176854 }, { "epoch": 5.27206097918499, "grad_norm": 0.3050278777883416, "learning_rate": 0.0001760730037927386, "loss": 2.6180243492126465, "step": 8994, "token_acc": 0.3544958361310706 }, { "epoch": 5.272647317502199, "grad_norm": 0.29544386523781624, "learning_rate": 0.00017606671260873857, "loss": 2.6446807384490967, "step": 8995, "token_acc": 0.35140982071504107 }, { "epoch": 5.273233655819408, "grad_norm": 0.2896486183124645, "learning_rate": 0.0001760604207101943, "loss": 2.667173385620117, "step": 8996, "token_acc": 0.34786844783164095 }, { "epoch": 5.273819994136617, "grad_norm": 0.28967403018239507, "learning_rate": 0.00017605412809716485, "loss": 2.61602783203125, "step": 8997, "token_acc": 0.3556349381653477 }, { "epoch": 5.274406332453826, "grad_norm": 0.31953608384234616, "learning_rate": 0.0001760478347697094, "loss": 2.628969192504883, "step": 8998, "token_acc": 0.3541925571688506 }, { "epoch": 5.274992670771035, "grad_norm": 0.29758483212025405, "learning_rate": 0.00017604154072788694, "loss": 2.5840134620666504, "step": 8999, "token_acc": 0.36045895383963117 }, { "epoch": 5.2755790090882435, "grad_norm": 0.27759254190097504, "learning_rate": 0.00017603524597175675, "loss": 2.6010642051696777, "step": 9000, "token_acc": 0.359265724787752 }, { "epoch": 5.276165347405453, "grad_norm": 0.28777283060260406, "learning_rate": 0.00017602895050137783, "loss": 2.607245445251465, "step": 9001, "token_acc": 0.3548003349707944 }, { "epoch": 5.276751685722662, "grad_norm": 0.31652789205743814, "learning_rate": 0.0001760226543168094, "loss": 2.6195006370544434, "step": 9002, "token_acc": 0.3547273552486546 }, { "epoch": 5.277338024039871, "grad_norm": 0.3276983808823185, "learning_rate": 0.00017601635741811057, "loss": 2.6375725269317627, "step": 9003, "token_acc": 0.3525123331747703 }, { "epoch": 5.27792436235708, "grad_norm": 0.27027298646974746, "learning_rate": 0.0001760100598053405, "loss": 2.6157901287078857, "step": 9004, "token_acc": 0.35428544064870215 }, { "epoch": 5.278510700674289, "grad_norm": 0.31325341227957815, "learning_rate": 0.00017600376147855834, "loss": 2.646467685699463, "step": 9005, "token_acc": 0.35055022757034515 }, { "epoch": 5.279097038991498, "grad_norm": 0.26973140277126073, "learning_rate": 0.00017599746243782325, "loss": 2.626941204071045, "step": 9006, "token_acc": 0.35555930591141316 }, { "epoch": 5.279683377308707, "grad_norm": 0.3393729213971131, "learning_rate": 0.0001759911626831944, "loss": 2.6132426261901855, "step": 9007, "token_acc": 0.35756513952053565 }, { "epoch": 5.280269715625916, "grad_norm": 0.3879644988459256, "learning_rate": 0.000175984862214731, "loss": 2.6410703659057617, "step": 9008, "token_acc": 0.3516117852759617 }, { "epoch": 5.2808560539431255, "grad_norm": 0.2739956475897817, "learning_rate": 0.00017597856103249217, "loss": 2.638608932495117, "step": 9009, "token_acc": 0.351092523798769 }, { "epoch": 5.281442392260335, "grad_norm": 0.3440551800518427, "learning_rate": 0.0001759722591365372, "loss": 2.6260015964508057, "step": 9010, "token_acc": 0.35322916558734235 }, { "epoch": 5.282028730577543, "grad_norm": 0.28232847572962244, "learning_rate": 0.00017596595652692514, "loss": 2.6265738010406494, "step": 9011, "token_acc": 0.353564243010994 }, { "epoch": 5.282615068894752, "grad_norm": 0.301102947064147, "learning_rate": 0.00017595965320371537, "loss": 2.6153030395507812, "step": 9012, "token_acc": 0.35561550384270907 }, { "epoch": 5.283201407211961, "grad_norm": 0.28628984389884327, "learning_rate": 0.00017595334916696697, "loss": 2.6205005645751953, "step": 9013, "token_acc": 0.35565197716050206 }, { "epoch": 5.28378774552917, "grad_norm": 0.29263521241375834, "learning_rate": 0.00017594704441673925, "loss": 2.651933431625366, "step": 9014, "token_acc": 0.35072530916417 }, { "epoch": 5.284374083846379, "grad_norm": 0.2801368062592007, "learning_rate": 0.00017594073895309134, "loss": 2.6311397552490234, "step": 9015, "token_acc": 0.3532900146805838 }, { "epoch": 5.284960422163588, "grad_norm": 0.27754873020141085, "learning_rate": 0.00017593443277608254, "loss": 2.642279863357544, "step": 9016, "token_acc": 0.3517929435227335 }, { "epoch": 5.2855467604807975, "grad_norm": 0.28289431222166866, "learning_rate": 0.00017592812588577205, "loss": 2.6196844577789307, "step": 9017, "token_acc": 0.35567085050235026 }, { "epoch": 5.286133098798007, "grad_norm": 0.29657072140753443, "learning_rate": 0.00017592181828221914, "loss": 2.58072566986084, "step": 9018, "token_acc": 0.36213364015712574 }, { "epoch": 5.286719437115216, "grad_norm": 0.293469074036933, "learning_rate": 0.00017591550996548305, "loss": 2.6210427284240723, "step": 9019, "token_acc": 0.35408826461717086 }, { "epoch": 5.287305775432425, "grad_norm": 0.28211628913550935, "learning_rate": 0.00017590920093562307, "loss": 2.6029324531555176, "step": 9020, "token_acc": 0.3568860765875917 }, { "epoch": 5.287892113749633, "grad_norm": 0.295151984323409, "learning_rate": 0.0001759028911926984, "loss": 2.6245193481445312, "step": 9021, "token_acc": 0.3544909120238114 }, { "epoch": 5.288478452066842, "grad_norm": 0.28160898763138664, "learning_rate": 0.0001758965807367684, "loss": 2.5861737728118896, "step": 9022, "token_acc": 0.3603082851637765 }, { "epoch": 5.289064790384051, "grad_norm": 0.30717280860225477, "learning_rate": 0.00017589026956789223, "loss": 2.6251678466796875, "step": 9023, "token_acc": 0.35370535962373006 }, { "epoch": 5.28965112870126, "grad_norm": 0.35208221781005705, "learning_rate": 0.00017588395768612926, "loss": 2.6634230613708496, "step": 9024, "token_acc": 0.3467819529319984 }, { "epoch": 5.2902374670184695, "grad_norm": 0.30862952141206784, "learning_rate": 0.0001758776450915388, "loss": 2.6463637351989746, "step": 9025, "token_acc": 0.3503440806244964 }, { "epoch": 5.290823805335679, "grad_norm": 0.27931257112576735, "learning_rate": 0.00017587133178418007, "loss": 2.624399185180664, "step": 9026, "token_acc": 0.35419862397925567 }, { "epoch": 5.291410143652888, "grad_norm": 0.31866322950055115, "learning_rate": 0.00017586501776411243, "loss": 2.618483066558838, "step": 9027, "token_acc": 0.35496736765772297 }, { "epoch": 5.291996481970097, "grad_norm": 0.2848243918570421, "learning_rate": 0.00017585870303139518, "loss": 2.662550449371338, "step": 9028, "token_acc": 0.3484362061444181 }, { "epoch": 5.292582820287306, "grad_norm": 0.29604395182398446, "learning_rate": 0.00017585238758608762, "loss": 2.621995687484741, "step": 9029, "token_acc": 0.35358926316757106 }, { "epoch": 5.293169158604515, "grad_norm": 0.34917699169332056, "learning_rate": 0.0001758460714282491, "loss": 2.635432243347168, "step": 9030, "token_acc": 0.35027430118507613 }, { "epoch": 5.293755496921724, "grad_norm": 0.2881902595411632, "learning_rate": 0.00017583975455793895, "loss": 2.6020326614379883, "step": 9031, "token_acc": 0.358450350723655 }, { "epoch": 5.294341835238933, "grad_norm": 0.31148134282562767, "learning_rate": 0.0001758334369752165, "loss": 2.5915191173553467, "step": 9032, "token_acc": 0.3604079549656045 }, { "epoch": 5.2949281735561415, "grad_norm": 0.27771926793373564, "learning_rate": 0.0001758271186801411, "loss": 2.643895149230957, "step": 9033, "token_acc": 0.3527766679984019 }, { "epoch": 5.295514511873351, "grad_norm": 0.32030550269509334, "learning_rate": 0.00017582079967277207, "loss": 2.5994625091552734, "step": 9034, "token_acc": 0.3572646364162483 }, { "epoch": 5.29610085019056, "grad_norm": 0.3265707128987528, "learning_rate": 0.0001758144799531688, "loss": 2.588143825531006, "step": 9035, "token_acc": 0.36146950164053016 }, { "epoch": 5.296687188507769, "grad_norm": 0.28077270424230305, "learning_rate": 0.00017580815952139068, "loss": 2.620697259902954, "step": 9036, "token_acc": 0.3550741338249981 }, { "epoch": 5.297273526824978, "grad_norm": 0.34808878941063703, "learning_rate": 0.00017580183837749707, "loss": 2.6298627853393555, "step": 9037, "token_acc": 0.3528846986235608 }, { "epoch": 5.297859865142187, "grad_norm": 0.26652860326729394, "learning_rate": 0.0001757955165215473, "loss": 2.6268820762634277, "step": 9038, "token_acc": 0.3527528842594765 }, { "epoch": 5.298446203459396, "grad_norm": 0.3660975438354635, "learning_rate": 0.00017578919395360077, "loss": 2.6217548847198486, "step": 9039, "token_acc": 0.35419827724012504 }, { "epoch": 5.299032541776605, "grad_norm": 0.31493022441359575, "learning_rate": 0.0001757828706737169, "loss": 2.593721866607666, "step": 9040, "token_acc": 0.3602060906290686 }, { "epoch": 5.299618880093814, "grad_norm": 0.31305821828760433, "learning_rate": 0.00017577654668195512, "loss": 2.630213499069214, "step": 9041, "token_acc": 0.3531029871647731 }, { "epoch": 5.3002052184110235, "grad_norm": 0.3340149637039047, "learning_rate": 0.0001757702219783748, "loss": 2.638047695159912, "step": 9042, "token_acc": 0.3519916828020987 }, { "epoch": 5.300791556728232, "grad_norm": 0.2750714503117414, "learning_rate": 0.0001757638965630353, "loss": 2.5797386169433594, "step": 9043, "token_acc": 0.3608497268199049 }, { "epoch": 5.301377895045441, "grad_norm": 0.33370012640943, "learning_rate": 0.00017575757043599606, "loss": 2.6306817531585693, "step": 9044, "token_acc": 0.35539415636360755 }, { "epoch": 5.30196423336265, "grad_norm": 0.27329696910101586, "learning_rate": 0.0001757512435973166, "loss": 2.581338405609131, "step": 9045, "token_acc": 0.360382205678035 }, { "epoch": 5.302550571679859, "grad_norm": 0.30484529874474836, "learning_rate": 0.00017574491604705625, "loss": 2.6212024688720703, "step": 9046, "token_acc": 0.3539755196492368 }, { "epoch": 5.303136909997068, "grad_norm": 0.2870395136087814, "learning_rate": 0.0001757385877852745, "loss": 2.637681484222412, "step": 9047, "token_acc": 0.3514013819663498 }, { "epoch": 5.303723248314277, "grad_norm": 0.31714505008839566, "learning_rate": 0.00017573225881203076, "loss": 2.640303611755371, "step": 9048, "token_acc": 0.35329424902289225 }, { "epoch": 5.304309586631486, "grad_norm": 0.31009667999003915, "learning_rate": 0.0001757259291273845, "loss": 2.61525297164917, "step": 9049, "token_acc": 0.3559364376223344 }, { "epoch": 5.3048959249486956, "grad_norm": 0.2943865623688959, "learning_rate": 0.00017571959873139515, "loss": 2.6052048206329346, "step": 9050, "token_acc": 0.3557980157210806 }, { "epoch": 5.305482263265905, "grad_norm": 0.3208321714668972, "learning_rate": 0.00017571326762412225, "loss": 2.6432807445526123, "step": 9051, "token_acc": 0.35159205527596776 }, { "epoch": 5.306068601583114, "grad_norm": 0.275562669732982, "learning_rate": 0.00017570693580562518, "loss": 2.6014018058776855, "step": 9052, "token_acc": 0.3576564444551962 }, { "epoch": 5.306654939900323, "grad_norm": 0.297559007425737, "learning_rate": 0.00017570060327596351, "loss": 2.6268725395202637, "step": 9053, "token_acc": 0.35392623963296277 }, { "epoch": 5.307241278217531, "grad_norm": 0.27938043566252896, "learning_rate": 0.00017569427003519668, "loss": 2.6344408988952637, "step": 9054, "token_acc": 0.35130987699983474 }, { "epoch": 5.30782761653474, "grad_norm": 0.2897252668071001, "learning_rate": 0.00017568793608338415, "loss": 2.617241144180298, "step": 9055, "token_acc": 0.3550784081044273 }, { "epoch": 5.308413954851949, "grad_norm": 0.2716098922223578, "learning_rate": 0.00017568160142058544, "loss": 2.6318421363830566, "step": 9056, "token_acc": 0.3539247415010161 }, { "epoch": 5.3090002931691584, "grad_norm": 0.26897198022148067, "learning_rate": 0.0001756752660468601, "loss": 2.6266794204711914, "step": 9057, "token_acc": 0.3541892040499518 }, { "epoch": 5.309586631486368, "grad_norm": 0.2718371279179874, "learning_rate": 0.0001756689299622676, "loss": 2.622577667236328, "step": 9058, "token_acc": 0.3538454749116736 }, { "epoch": 5.310172969803577, "grad_norm": 0.2758080083095387, "learning_rate": 0.00017566259316686747, "loss": 2.6642274856567383, "step": 9059, "token_acc": 0.3476271654505093 }, { "epoch": 5.310759308120786, "grad_norm": 0.2939317034099069, "learning_rate": 0.00017565625566071924, "loss": 2.626450777053833, "step": 9060, "token_acc": 0.35529531939732223 }, { "epoch": 5.311345646437995, "grad_norm": 0.3036499410210893, "learning_rate": 0.00017564991744388242, "loss": 2.6162467002868652, "step": 9061, "token_acc": 0.3570325117618145 }, { "epoch": 5.311931984755204, "grad_norm": 0.28791743717506507, "learning_rate": 0.00017564357851641656, "loss": 2.6299383640289307, "step": 9062, "token_acc": 0.35365745980526725 }, { "epoch": 5.312518323072413, "grad_norm": 0.2750684280734211, "learning_rate": 0.00017563723887838124, "loss": 2.641218900680542, "step": 9063, "token_acc": 0.35090036451373036 }, { "epoch": 5.313104661389621, "grad_norm": 0.2983133291420359, "learning_rate": 0.00017563089852983595, "loss": 2.6711513996124268, "step": 9064, "token_acc": 0.34700824696755733 }, { "epoch": 5.3136909997068305, "grad_norm": 0.2695695138576655, "learning_rate": 0.0001756245574708403, "loss": 2.6378631591796875, "step": 9065, "token_acc": 0.3514974975505124 }, { "epoch": 5.31427733802404, "grad_norm": 0.28383433350963405, "learning_rate": 0.00017561821570145385, "loss": 2.6156296730041504, "step": 9066, "token_acc": 0.35636258719120756 }, { "epoch": 5.314863676341249, "grad_norm": 0.30452957685698323, "learning_rate": 0.00017561187322173615, "loss": 2.6259407997131348, "step": 9067, "token_acc": 0.3533663272445812 }, { "epoch": 5.315450014658458, "grad_norm": 0.2800858644776289, "learning_rate": 0.00017560553003174677, "loss": 2.6577627658843994, "step": 9068, "token_acc": 0.3499114688957289 }, { "epoch": 5.316036352975667, "grad_norm": 0.2766345611127507, "learning_rate": 0.00017559918613154537, "loss": 2.6276133060455322, "step": 9069, "token_acc": 0.35325496468883927 }, { "epoch": 5.316622691292876, "grad_norm": 0.3333545721389573, "learning_rate": 0.00017559284152119143, "loss": 2.6134591102600098, "step": 9070, "token_acc": 0.35598660377915003 }, { "epoch": 5.317209029610085, "grad_norm": 0.343137750382585, "learning_rate": 0.00017558649620074462, "loss": 2.605302572250366, "step": 9071, "token_acc": 0.356458748979191 }, { "epoch": 5.317795367927294, "grad_norm": 0.2941133336109503, "learning_rate": 0.00017558015017026454, "loss": 2.6478724479675293, "step": 9072, "token_acc": 0.35041021154476454 }, { "epoch": 5.318381706244503, "grad_norm": 0.35598740922745375, "learning_rate": 0.0001755738034298108, "loss": 2.6021063327789307, "step": 9073, "token_acc": 0.3586416105267305 }, { "epoch": 5.3189680445617125, "grad_norm": 0.40048671090041144, "learning_rate": 0.000175567455979443, "loss": 2.6390910148620605, "step": 9074, "token_acc": 0.3510536877755363 }, { "epoch": 5.319554382878922, "grad_norm": 0.27526154393841834, "learning_rate": 0.0001755611078192208, "loss": 2.616978645324707, "step": 9075, "token_acc": 0.35403253757365066 }, { "epoch": 5.32014072119613, "grad_norm": 0.39565548713407417, "learning_rate": 0.0001755547589492038, "loss": 2.6247684955596924, "step": 9076, "token_acc": 0.35327722375704795 }, { "epoch": 5.320727059513339, "grad_norm": 0.3201350921509172, "learning_rate": 0.00017554840936945162, "loss": 2.6282029151916504, "step": 9077, "token_acc": 0.3526081396656372 }, { "epoch": 5.321313397830548, "grad_norm": 0.33885618428675307, "learning_rate": 0.000175542059080024, "loss": 2.6283724308013916, "step": 9078, "token_acc": 0.35313902759659566 }, { "epoch": 5.321899736147757, "grad_norm": 0.3092494431919273, "learning_rate": 0.00017553570808098048, "loss": 2.665473699569702, "step": 9079, "token_acc": 0.3471552432746068 }, { "epoch": 5.322486074464966, "grad_norm": 0.359909528450709, "learning_rate": 0.00017552935637238078, "loss": 2.6382510662078857, "step": 9080, "token_acc": 0.3515944399018806 }, { "epoch": 5.323072412782175, "grad_norm": 0.3237204262063517, "learning_rate": 0.00017552300395428457, "loss": 2.65928316116333, "step": 9081, "token_acc": 0.3492521419483987 }, { "epoch": 5.3236587510993845, "grad_norm": 0.34288327082963693, "learning_rate": 0.00017551665082675148, "loss": 2.5832948684692383, "step": 9082, "token_acc": 0.3611499473424258 }, { "epoch": 5.324245089416594, "grad_norm": 0.31979352849713677, "learning_rate": 0.00017551029698984123, "loss": 2.6473593711853027, "step": 9083, "token_acc": 0.35192766571586476 }, { "epoch": 5.324831427733803, "grad_norm": 0.32135892375115627, "learning_rate": 0.00017550394244361347, "loss": 2.578754425048828, "step": 9084, "token_acc": 0.36233885080353845 }, { "epoch": 5.325417766051012, "grad_norm": 0.3133324635558715, "learning_rate": 0.00017549758718812797, "loss": 2.6548657417297363, "step": 9085, "token_acc": 0.34870031911054106 }, { "epoch": 5.32600410436822, "grad_norm": 0.31439023892076773, "learning_rate": 0.00017549123122344434, "loss": 2.635324478149414, "step": 9086, "token_acc": 0.3519035612327825 }, { "epoch": 5.326590442685429, "grad_norm": 0.305983733806818, "learning_rate": 0.0001754848745496223, "loss": 2.6060590744018555, "step": 9087, "token_acc": 0.35775526319311535 }, { "epoch": 5.327176781002638, "grad_norm": 0.32409872895944036, "learning_rate": 0.0001754785171667216, "loss": 2.6450953483581543, "step": 9088, "token_acc": 0.35085297126432247 }, { "epoch": 5.327763119319847, "grad_norm": 0.31079307818170143, "learning_rate": 0.00017547215907480195, "loss": 2.630849838256836, "step": 9089, "token_acc": 0.35314364621177763 }, { "epoch": 5.3283494576370565, "grad_norm": 0.31678481605695397, "learning_rate": 0.00017546580027392303, "loss": 2.6259191036224365, "step": 9090, "token_acc": 0.35333945077790047 }, { "epoch": 5.328935795954266, "grad_norm": 0.3264392061279435, "learning_rate": 0.00017545944076414465, "loss": 2.6482725143432617, "step": 9091, "token_acc": 0.35105781643131884 }, { "epoch": 5.329522134271475, "grad_norm": 0.28540610736057953, "learning_rate": 0.00017545308054552647, "loss": 2.6152689456939697, "step": 9092, "token_acc": 0.3557318939821155 }, { "epoch": 5.330108472588684, "grad_norm": 0.2869917194884141, "learning_rate": 0.0001754467196181283, "loss": 2.6385960578918457, "step": 9093, "token_acc": 0.3507612409876581 }, { "epoch": 5.330694810905893, "grad_norm": 0.30336076629311287, "learning_rate": 0.00017544035798200983, "loss": 2.5872960090637207, "step": 9094, "token_acc": 0.3585177957486373 }, { "epoch": 5.331281149223102, "grad_norm": 0.2950112938142556, "learning_rate": 0.00017543399563723088, "loss": 2.660673141479492, "step": 9095, "token_acc": 0.3475361438845908 }, { "epoch": 5.331867487540311, "grad_norm": 0.3417830081148275, "learning_rate": 0.00017542763258385119, "loss": 2.6628928184509277, "step": 9096, "token_acc": 0.34686441574936494 }, { "epoch": 5.33245382585752, "grad_norm": 0.31772020093596137, "learning_rate": 0.00017542126882193052, "loss": 2.6203958988189697, "step": 9097, "token_acc": 0.354867817649194 }, { "epoch": 5.3330401641747285, "grad_norm": 0.29091211494358055, "learning_rate": 0.00017541490435152868, "loss": 2.5979137420654297, "step": 9098, "token_acc": 0.35750370854469443 }, { "epoch": 5.333626502491938, "grad_norm": 0.335148388390116, "learning_rate": 0.00017540853917270542, "loss": 2.6149709224700928, "step": 9099, "token_acc": 0.3542441280493335 }, { "epoch": 5.334212840809147, "grad_norm": 0.298773400512402, "learning_rate": 0.00017540217328552055, "loss": 2.6258180141448975, "step": 9100, "token_acc": 0.35318468684259857 }, { "epoch": 5.334799179126356, "grad_norm": 0.28273270044005383, "learning_rate": 0.00017539580669003387, "loss": 2.6134450435638428, "step": 9101, "token_acc": 0.35420800836832955 }, { "epoch": 5.335385517443565, "grad_norm": 0.27791619021303676, "learning_rate": 0.0001753894393863052, "loss": 2.590485095977783, "step": 9102, "token_acc": 0.3588154178479099 }, { "epoch": 5.335971855760774, "grad_norm": 0.2779319489382343, "learning_rate": 0.00017538307137439428, "loss": 2.603686809539795, "step": 9103, "token_acc": 0.35787717952545933 }, { "epoch": 5.336558194077983, "grad_norm": 0.2966877083747229, "learning_rate": 0.00017537670265436106, "loss": 2.6256093978881836, "step": 9104, "token_acc": 0.35351301693758097 }, { "epoch": 5.337144532395192, "grad_norm": 0.29528362625209525, "learning_rate": 0.00017537033322626524, "loss": 2.64054012298584, "step": 9105, "token_acc": 0.3512576453794296 }, { "epoch": 5.337730870712401, "grad_norm": 0.28817935184415744, "learning_rate": 0.00017536396309016675, "loss": 2.6372482776641846, "step": 9106, "token_acc": 0.3522232134109554 }, { "epoch": 5.3383172090296105, "grad_norm": 0.281638366014676, "learning_rate": 0.00017535759224612533, "loss": 2.6294965744018555, "step": 9107, "token_acc": 0.35383972285364573 }, { "epoch": 5.338903547346819, "grad_norm": 0.300906129084363, "learning_rate": 0.00017535122069420092, "loss": 2.6844325065612793, "step": 9108, "token_acc": 0.34291447293858246 }, { "epoch": 5.339489885664028, "grad_norm": 0.2845672837736264, "learning_rate": 0.00017534484843445327, "loss": 2.6173362731933594, "step": 9109, "token_acc": 0.3567033563555202 }, { "epoch": 5.340076223981237, "grad_norm": 0.29336173147617856, "learning_rate": 0.00017533847546694233, "loss": 2.589667558670044, "step": 9110, "token_acc": 0.35926793215724323 }, { "epoch": 5.340662562298446, "grad_norm": 0.29478278718906475, "learning_rate": 0.00017533210179172795, "loss": 2.658782720565796, "step": 9111, "token_acc": 0.3493949479106895 }, { "epoch": 5.341248900615655, "grad_norm": 0.27874069023201703, "learning_rate": 0.00017532572740886995, "loss": 2.6278762817382812, "step": 9112, "token_acc": 0.35338075550191317 }, { "epoch": 5.341835238932864, "grad_norm": 0.31119150716155614, "learning_rate": 0.00017531935231842826, "loss": 2.6399271488189697, "step": 9113, "token_acc": 0.35126385740458965 }, { "epoch": 5.342421577250073, "grad_norm": 0.286739443796918, "learning_rate": 0.00017531297652046278, "loss": 2.6200504302978516, "step": 9114, "token_acc": 0.35347253466943895 }, { "epoch": 5.3430079155672825, "grad_norm": 0.391878898213163, "learning_rate": 0.0001753066000150333, "loss": 2.65126371383667, "step": 9115, "token_acc": 0.34753103826317494 }, { "epoch": 5.343594253884492, "grad_norm": 0.2940297680977322, "learning_rate": 0.00017530022280219987, "loss": 2.6300222873687744, "step": 9116, "token_acc": 0.3532876537891534 }, { "epoch": 5.344180592201701, "grad_norm": 0.29281241244573347, "learning_rate": 0.00017529384488202228, "loss": 2.587777614593506, "step": 9117, "token_acc": 0.3595239475362151 }, { "epoch": 5.34476693051891, "grad_norm": 0.29254328966189197, "learning_rate": 0.00017528746625456045, "loss": 2.636072874069214, "step": 9118, "token_acc": 0.35251633582751013 }, { "epoch": 5.345353268836118, "grad_norm": 0.31486354698924723, "learning_rate": 0.00017528108691987438, "loss": 2.6129727363586426, "step": 9119, "token_acc": 0.35666382885233294 }, { "epoch": 5.345939607153327, "grad_norm": 0.31731883285600876, "learning_rate": 0.00017527470687802386, "loss": 2.657259464263916, "step": 9120, "token_acc": 0.3482393867164852 }, { "epoch": 5.346525945470536, "grad_norm": 0.3041777261364634, "learning_rate": 0.00017526832612906897, "loss": 2.661468267440796, "step": 9121, "token_acc": 0.3480633190897753 }, { "epoch": 5.347112283787745, "grad_norm": 0.3327609475708628, "learning_rate": 0.00017526194467306957, "loss": 2.6475026607513428, "step": 9122, "token_acc": 0.35100722158874953 }, { "epoch": 5.3476986221049545, "grad_norm": 0.30706185795630264, "learning_rate": 0.0001752555625100856, "loss": 2.616863250732422, "step": 9123, "token_acc": 0.35492202244958776 }, { "epoch": 5.348284960422164, "grad_norm": 0.3296483534679435, "learning_rate": 0.00017524917964017707, "loss": 2.6088900566101074, "step": 9124, "token_acc": 0.35612530290252675 }, { "epoch": 5.348871298739373, "grad_norm": 0.3066028286091517, "learning_rate": 0.00017524279606340385, "loss": 2.660628080368042, "step": 9125, "token_acc": 0.34801751994731767 }, { "epoch": 5.349457637056582, "grad_norm": 0.3264146615310536, "learning_rate": 0.00017523641177982598, "loss": 2.61086106300354, "step": 9126, "token_acc": 0.3552697631056821 }, { "epoch": 5.350043975373791, "grad_norm": 0.30092092145481386, "learning_rate": 0.00017523002678950338, "loss": 2.6382458209991455, "step": 9127, "token_acc": 0.3517462627507323 }, { "epoch": 5.350630313691, "grad_norm": 0.34773964828063153, "learning_rate": 0.00017522364109249608, "loss": 2.6637630462646484, "step": 9128, "token_acc": 0.34718433305943575 }, { "epoch": 5.351216652008208, "grad_norm": 0.2811897977512063, "learning_rate": 0.00017521725468886402, "loss": 2.6443536281585693, "step": 9129, "token_acc": 0.3521953438217866 }, { "epoch": 5.351802990325417, "grad_norm": 0.33117984361285163, "learning_rate": 0.00017521086757866722, "loss": 2.631499767303467, "step": 9130, "token_acc": 0.35433478876273955 }, { "epoch": 5.3523893286426265, "grad_norm": 0.29510787992485477, "learning_rate": 0.00017520447976196565, "loss": 2.671140670776367, "step": 9131, "token_acc": 0.3471141362080497 }, { "epoch": 5.352975666959836, "grad_norm": 0.3217863253605579, "learning_rate": 0.00017519809123881931, "loss": 2.6426329612731934, "step": 9132, "token_acc": 0.3510991423156407 }, { "epoch": 5.353562005277045, "grad_norm": 0.30524592704227443, "learning_rate": 0.0001751917020092883, "loss": 2.6281991004943848, "step": 9133, "token_acc": 0.35266112432837016 }, { "epoch": 5.354148343594254, "grad_norm": 0.27425010820647794, "learning_rate": 0.0001751853120734325, "loss": 2.6257667541503906, "step": 9134, "token_acc": 0.3532135294149024 }, { "epoch": 5.354734681911463, "grad_norm": 0.2958922525868725, "learning_rate": 0.00017517892143131207, "loss": 2.6101741790771484, "step": 9135, "token_acc": 0.3577594849734921 }, { "epoch": 5.355321020228672, "grad_norm": 0.2779829006150963, "learning_rate": 0.00017517253008298694, "loss": 2.611966609954834, "step": 9136, "token_acc": 0.3557986630252915 }, { "epoch": 5.355907358545881, "grad_norm": 0.283422800707739, "learning_rate": 0.0001751661380285172, "loss": 2.6100351810455322, "step": 9137, "token_acc": 0.3557722399679284 }, { "epoch": 5.35649369686309, "grad_norm": 0.28855634578453615, "learning_rate": 0.00017515974526796288, "loss": 2.6458120346069336, "step": 9138, "token_acc": 0.3503066378537702 }, { "epoch": 5.357080035180299, "grad_norm": 0.2935697920836456, "learning_rate": 0.00017515335180138403, "loss": 2.67828369140625, "step": 9139, "token_acc": 0.34568130648827416 }, { "epoch": 5.3576663734975085, "grad_norm": 0.3043977162626109, "learning_rate": 0.0001751469576288407, "loss": 2.6350231170654297, "step": 9140, "token_acc": 0.35185208991641365 }, { "epoch": 5.358252711814717, "grad_norm": 0.27760535331149716, "learning_rate": 0.00017514056275039298, "loss": 2.6588568687438965, "step": 9141, "token_acc": 0.34780973623466555 }, { "epoch": 5.358839050131926, "grad_norm": 0.2835811473968491, "learning_rate": 0.00017513416716610092, "loss": 2.6256415843963623, "step": 9142, "token_acc": 0.35355226656496686 }, { "epoch": 5.359425388449135, "grad_norm": 0.2802774411176032, "learning_rate": 0.0001751277708760246, "loss": 2.661647319793701, "step": 9143, "token_acc": 0.34815232362122234 }, { "epoch": 5.360011726766344, "grad_norm": 0.2875446633216819, "learning_rate": 0.00017512137388022412, "loss": 2.667041778564453, "step": 9144, "token_acc": 0.3486363529586795 }, { "epoch": 5.360598065083553, "grad_norm": 0.2787932426554914, "learning_rate": 0.00017511497617875955, "loss": 2.6451539993286133, "step": 9145, "token_acc": 0.3512874160209435 }, { "epoch": 5.361184403400762, "grad_norm": 0.3123570563453856, "learning_rate": 0.000175108577771691, "loss": 2.6678757667541504, "step": 9146, "token_acc": 0.34696859021183346 }, { "epoch": 5.361770741717971, "grad_norm": 0.3009270211128707, "learning_rate": 0.00017510217865907856, "loss": 2.6367340087890625, "step": 9147, "token_acc": 0.3511767143747084 }, { "epoch": 5.3623570800351805, "grad_norm": 0.32113995381240895, "learning_rate": 0.00017509577884098238, "loss": 2.6606087684631348, "step": 9148, "token_acc": 0.3486898332283091 }, { "epoch": 5.36294341835239, "grad_norm": 0.27959369168445586, "learning_rate": 0.00017508937831746253, "loss": 2.662008762359619, "step": 9149, "token_acc": 0.3475869246064931 }, { "epoch": 5.363529756669599, "grad_norm": 0.3230672578386095, "learning_rate": 0.00017508297708857917, "loss": 2.6738786697387695, "step": 9150, "token_acc": 0.3471830675996469 }, { "epoch": 5.364116094986807, "grad_norm": 0.2909542830772192, "learning_rate": 0.0001750765751543924, "loss": 2.656966209411621, "step": 9151, "token_acc": 0.3502919952625911 }, { "epoch": 5.364702433304016, "grad_norm": 0.3172404502030238, "learning_rate": 0.00017507017251496237, "loss": 2.6545541286468506, "step": 9152, "token_acc": 0.3506357541172131 }, { "epoch": 5.365288771621225, "grad_norm": 0.2720645153299547, "learning_rate": 0.00017506376917034925, "loss": 2.5917651653289795, "step": 9153, "token_acc": 0.35840305808762596 }, { "epoch": 5.365875109938434, "grad_norm": 0.3459479560443342, "learning_rate": 0.00017505736512061316, "loss": 2.611440658569336, "step": 9154, "token_acc": 0.35605213363085614 }, { "epoch": 5.366461448255643, "grad_norm": 0.3249418903877057, "learning_rate": 0.00017505096036581424, "loss": 2.658864974975586, "step": 9155, "token_acc": 0.34792251231310123 }, { "epoch": 5.3670477865728525, "grad_norm": 0.2922989062209861, "learning_rate": 0.0001750445549060127, "loss": 2.6486916542053223, "step": 9156, "token_acc": 0.3512991499518446 }, { "epoch": 5.367634124890062, "grad_norm": 0.35274622803534633, "learning_rate": 0.0001750381487412687, "loss": 2.6241095066070557, "step": 9157, "token_acc": 0.35361047579670174 }, { "epoch": 5.368220463207271, "grad_norm": 0.3763943354075912, "learning_rate": 0.0001750317418716424, "loss": 2.6429853439331055, "step": 9158, "token_acc": 0.3505435793838548 }, { "epoch": 5.36880680152448, "grad_norm": 0.30612400513216764, "learning_rate": 0.00017502533429719397, "loss": 2.6591031551361084, "step": 9159, "token_acc": 0.3490009937758251 }, { "epoch": 5.369393139841689, "grad_norm": 0.3203736641308976, "learning_rate": 0.00017501892601798366, "loss": 2.6291561126708984, "step": 9160, "token_acc": 0.3533349034743168 }, { "epoch": 5.369979478158898, "grad_norm": 0.34192120109428303, "learning_rate": 0.0001750125170340716, "loss": 2.627112865447998, "step": 9161, "token_acc": 0.35359891740969945 }, { "epoch": 5.370565816476106, "grad_norm": 0.2843544638963652, "learning_rate": 0.00017500610734551804, "loss": 2.5898313522338867, "step": 9162, "token_acc": 0.3585044281002691 }, { "epoch": 5.371152154793315, "grad_norm": 0.31517302840536654, "learning_rate": 0.00017499969695238319, "loss": 2.665757894515991, "step": 9163, "token_acc": 0.34857948876356887 }, { "epoch": 5.3717384931105245, "grad_norm": 0.27688638128701154, "learning_rate": 0.0001749932858547272, "loss": 2.628725528717041, "step": 9164, "token_acc": 0.3535559827006247 }, { "epoch": 5.372324831427734, "grad_norm": 0.30913899690604996, "learning_rate": 0.0001749868740526104, "loss": 2.634352922439575, "step": 9165, "token_acc": 0.3510590597119113 }, { "epoch": 5.372911169744943, "grad_norm": 0.2748201231281132, "learning_rate": 0.0001749804615460929, "loss": 2.678978204727173, "step": 9166, "token_acc": 0.3464749152822547 }, { "epoch": 5.373497508062152, "grad_norm": 0.31381779824559786, "learning_rate": 0.00017497404833523506, "loss": 2.647855758666992, "step": 9167, "token_acc": 0.34964007561834853 }, { "epoch": 5.374083846379361, "grad_norm": 0.29518899338823756, "learning_rate": 0.00017496763442009704, "loss": 2.631199598312378, "step": 9168, "token_acc": 0.35215448535368055 }, { "epoch": 5.37467018469657, "grad_norm": 0.27892144909398875, "learning_rate": 0.0001749612198007391, "loss": 2.715101718902588, "step": 9169, "token_acc": 0.3407833961346785 }, { "epoch": 5.375256523013779, "grad_norm": 0.2946675782950637, "learning_rate": 0.00017495480447722158, "loss": 2.627732276916504, "step": 9170, "token_acc": 0.3533859124286103 }, { "epoch": 5.375842861330988, "grad_norm": 0.2692378569610981, "learning_rate": 0.0001749483884496046, "loss": 2.6475040912628174, "step": 9171, "token_acc": 0.3514483510656923 }, { "epoch": 5.3764291996481965, "grad_norm": 0.30928815350488864, "learning_rate": 0.00017494197171794853, "loss": 2.6394259929656982, "step": 9172, "token_acc": 0.3512110400030205 }, { "epoch": 5.377015537965406, "grad_norm": 0.27381692864354157, "learning_rate": 0.0001749355542823136, "loss": 2.655602216720581, "step": 9173, "token_acc": 0.348442514002392 }, { "epoch": 5.377601876282615, "grad_norm": 0.2770754440448715, "learning_rate": 0.00017492913614276014, "loss": 2.6748037338256836, "step": 9174, "token_acc": 0.34641980703082526 }, { "epoch": 5.378188214599824, "grad_norm": 0.2821182147745218, "learning_rate": 0.00017492271729934843, "loss": 2.640951633453369, "step": 9175, "token_acc": 0.34960844868471846 }, { "epoch": 5.378774552917033, "grad_norm": 0.29134355449895866, "learning_rate": 0.0001749162977521387, "loss": 2.694399356842041, "step": 9176, "token_acc": 0.34526181158829117 }, { "epoch": 5.379360891234242, "grad_norm": 0.28339730088405507, "learning_rate": 0.00017490987750119134, "loss": 2.653134822845459, "step": 9177, "token_acc": 0.3489772441312866 }, { "epoch": 5.379947229551451, "grad_norm": 0.2903999014350154, "learning_rate": 0.00017490345654656663, "loss": 2.6606860160827637, "step": 9178, "token_acc": 0.34796152273915043 }, { "epoch": 5.38053356786866, "grad_norm": 0.29677740599003216, "learning_rate": 0.00017489703488832484, "loss": 2.679461717605591, "step": 9179, "token_acc": 0.3452262418409116 }, { "epoch": 5.381119906185869, "grad_norm": 0.2697579475166165, "learning_rate": 0.00017489061252652638, "loss": 2.6446759700775146, "step": 9180, "token_acc": 0.34955150217394143 }, { "epoch": 5.3817062445030786, "grad_norm": 0.3030978500479128, "learning_rate": 0.0001748841894612315, "loss": 2.638965129852295, "step": 9181, "token_acc": 0.3517012569203565 }, { "epoch": 5.382292582820288, "grad_norm": 0.3051325901575654, "learning_rate": 0.0001748777656925006, "loss": 2.6479573249816895, "step": 9182, "token_acc": 0.35169829974893 }, { "epoch": 5.382878921137497, "grad_norm": 0.2875584734640436, "learning_rate": 0.00017487134122039395, "loss": 2.6258015632629395, "step": 9183, "token_acc": 0.3537485405759508 }, { "epoch": 5.383465259454705, "grad_norm": 0.2757945525569109, "learning_rate": 0.00017486491604497197, "loss": 2.664182186126709, "step": 9184, "token_acc": 0.348831485738523 }, { "epoch": 5.384051597771914, "grad_norm": 0.2729210743588234, "learning_rate": 0.000174858490166295, "loss": 2.6397876739501953, "step": 9185, "token_acc": 0.34964197619560794 }, { "epoch": 5.384637936089123, "grad_norm": 0.28739859326512823, "learning_rate": 0.00017485206358442332, "loss": 2.645949125289917, "step": 9186, "token_acc": 0.3506532427034973 }, { "epoch": 5.385224274406332, "grad_norm": 0.308235423230451, "learning_rate": 0.00017484563629941745, "loss": 2.6778860092163086, "step": 9187, "token_acc": 0.3448710712451502 }, { "epoch": 5.3858106127235414, "grad_norm": 0.2967589316678, "learning_rate": 0.0001748392083113376, "loss": 2.657205104827881, "step": 9188, "token_acc": 0.35011086180113915 }, { "epoch": 5.386396951040751, "grad_norm": 0.2979831071707743, "learning_rate": 0.0001748327796202443, "loss": 2.6488137245178223, "step": 9189, "token_acc": 0.35024664526135063 }, { "epoch": 5.38698328935796, "grad_norm": 0.2877212544030338, "learning_rate": 0.00017482635022619784, "loss": 2.6232125759124756, "step": 9190, "token_acc": 0.35373985534400715 }, { "epoch": 5.387569627675169, "grad_norm": 0.2828034670223659, "learning_rate": 0.00017481992012925864, "loss": 2.6502652168273926, "step": 9191, "token_acc": 0.35097181208755857 }, { "epoch": 5.388155965992378, "grad_norm": 0.3247823988867913, "learning_rate": 0.00017481348932948712, "loss": 2.652045726776123, "step": 9192, "token_acc": 0.3506194135173279 }, { "epoch": 5.388742304309587, "grad_norm": 0.2877578392348327, "learning_rate": 0.00017480705782694372, "loss": 2.6619410514831543, "step": 9193, "token_acc": 0.34804089737192284 }, { "epoch": 5.389328642626795, "grad_norm": 0.2945741136560786, "learning_rate": 0.00017480062562168878, "loss": 2.6558499336242676, "step": 9194, "token_acc": 0.34909756667618874 }, { "epoch": 5.389914980944004, "grad_norm": 0.28568789621593405, "learning_rate": 0.00017479419271378274, "loss": 2.672414541244507, "step": 9195, "token_acc": 0.3464891982450619 }, { "epoch": 5.3905013192612135, "grad_norm": 0.3033980329969103, "learning_rate": 0.0001747877591032861, "loss": 2.6636180877685547, "step": 9196, "token_acc": 0.3466803847208385 }, { "epoch": 5.391087657578423, "grad_norm": 0.27552698443000784, "learning_rate": 0.00017478132479025921, "loss": 2.668168067932129, "step": 9197, "token_acc": 0.3466289108869471 }, { "epoch": 5.391673995895632, "grad_norm": 0.29275583402266914, "learning_rate": 0.00017477488977476254, "loss": 2.6359057426452637, "step": 9198, "token_acc": 0.3519196673561569 }, { "epoch": 5.392260334212841, "grad_norm": 0.3150720530420571, "learning_rate": 0.00017476845405685654, "loss": 2.6451005935668945, "step": 9199, "token_acc": 0.35023056274437914 }, { "epoch": 5.39284667253005, "grad_norm": 0.30543353534881174, "learning_rate": 0.00017476201763660167, "loss": 2.648590564727783, "step": 9200, "token_acc": 0.3488169901422295 }, { "epoch": 5.393433010847259, "grad_norm": 0.29511651739550887, "learning_rate": 0.0001747555805140584, "loss": 2.66976261138916, "step": 9201, "token_acc": 0.34743473079534476 }, { "epoch": 5.394019349164468, "grad_norm": 0.2944459553185582, "learning_rate": 0.00017474914268928715, "loss": 2.6645655632019043, "step": 9202, "token_acc": 0.3474706713315747 }, { "epoch": 5.394605687481677, "grad_norm": 0.26957972039691674, "learning_rate": 0.00017474270416234847, "loss": 2.6345341205596924, "step": 9203, "token_acc": 0.35123961613551313 }, { "epoch": 5.395192025798886, "grad_norm": 0.2929112440455568, "learning_rate": 0.00017473626493330277, "loss": 2.6242728233337402, "step": 9204, "token_acc": 0.35428383916238676 }, { "epoch": 5.395778364116095, "grad_norm": 0.3372180127903838, "learning_rate": 0.0001747298250022106, "loss": 2.6255886554718018, "step": 9205, "token_acc": 0.35243933887549284 }, { "epoch": 5.396364702433304, "grad_norm": 0.34854658561618224, "learning_rate": 0.00017472338436913242, "loss": 2.641961097717285, "step": 9206, "token_acc": 0.35080886527646826 }, { "epoch": 5.396951040750513, "grad_norm": 0.27998655527515354, "learning_rate": 0.0001747169430341287, "loss": 2.6474790573120117, "step": 9207, "token_acc": 0.3503120698385074 }, { "epoch": 5.397537379067722, "grad_norm": 0.3138349883677517, "learning_rate": 0.00017471050099726, "loss": 2.633378028869629, "step": 9208, "token_acc": 0.3529273344590489 }, { "epoch": 5.398123717384931, "grad_norm": 0.3128681099397522, "learning_rate": 0.0001747040582585868, "loss": 2.6790719032287598, "step": 9209, "token_acc": 0.3445141809016383 }, { "epoch": 5.39871005570214, "grad_norm": 0.29269220154742487, "learning_rate": 0.00017469761481816968, "loss": 2.6462650299072266, "step": 9210, "token_acc": 0.3506086582511453 }, { "epoch": 5.399296394019349, "grad_norm": 0.28290830017447266, "learning_rate": 0.00017469117067606913, "loss": 2.626572608947754, "step": 9211, "token_acc": 0.3522772618793235 }, { "epoch": 5.399882732336558, "grad_norm": 0.303152004723051, "learning_rate": 0.00017468472583234563, "loss": 2.6611132621765137, "step": 9212, "token_acc": 0.34688854276790976 }, { "epoch": 5.4004690706537675, "grad_norm": 0.2744499388078628, "learning_rate": 0.0001746782802870598, "loss": 2.6674013137817383, "step": 9213, "token_acc": 0.34850612153206784 }, { "epoch": 5.401055408970977, "grad_norm": 0.27660877961609887, "learning_rate": 0.00017467183404027217, "loss": 2.639521837234497, "step": 9214, "token_acc": 0.352064184693467 }, { "epoch": 5.401641747288186, "grad_norm": 0.28652071307511084, "learning_rate": 0.00017466538709204324, "loss": 2.6406121253967285, "step": 9215, "token_acc": 0.3526194316151607 }, { "epoch": 5.402228085605394, "grad_norm": 0.2633453024280151, "learning_rate": 0.00017465893944243363, "loss": 2.647779941558838, "step": 9216, "token_acc": 0.349663027328002 }, { "epoch": 5.402814423922603, "grad_norm": 0.29036724444051026, "learning_rate": 0.0001746524910915039, "loss": 2.668825149536133, "step": 9217, "token_acc": 0.345658357035302 }, { "epoch": 5.403400762239812, "grad_norm": 0.2838923432302892, "learning_rate": 0.0001746460420393146, "loss": 2.64522647857666, "step": 9218, "token_acc": 0.35120988785784296 }, { "epoch": 5.403987100557021, "grad_norm": 0.2779513825513963, "learning_rate": 0.0001746395922859263, "loss": 2.645008087158203, "step": 9219, "token_acc": 0.34980507598058713 }, { "epoch": 5.40457343887423, "grad_norm": 0.3255499891434415, "learning_rate": 0.00017463314183139965, "loss": 2.6132619380950928, "step": 9220, "token_acc": 0.35555567416959794 }, { "epoch": 5.4051597771914395, "grad_norm": 0.3152259872423518, "learning_rate": 0.00017462669067579517, "loss": 2.639146089553833, "step": 9221, "token_acc": 0.35209544046491387 }, { "epoch": 5.405746115508649, "grad_norm": 0.2865576663914704, "learning_rate": 0.0001746202388191735, "loss": 2.634516954421997, "step": 9222, "token_acc": 0.3531703777937845 }, { "epoch": 5.406332453825858, "grad_norm": 0.30907224675001005, "learning_rate": 0.00017461378626159525, "loss": 2.6406731605529785, "step": 9223, "token_acc": 0.350268175622392 }, { "epoch": 5.406918792143067, "grad_norm": 0.3200922583515522, "learning_rate": 0.00017460733300312105, "loss": 2.6428892612457275, "step": 9224, "token_acc": 0.3525032308816093 }, { "epoch": 5.407505130460276, "grad_norm": 0.284691396576186, "learning_rate": 0.00017460087904381144, "loss": 2.6719746589660645, "step": 9225, "token_acc": 0.34598874391790385 }, { "epoch": 5.408091468777485, "grad_norm": 0.33064610000445716, "learning_rate": 0.0001745944243837271, "loss": 2.6362950801849365, "step": 9226, "token_acc": 0.35206593739645914 }, { "epoch": 5.408677807094693, "grad_norm": 0.27921331806323674, "learning_rate": 0.00017458796902292869, "loss": 2.6359457969665527, "step": 9227, "token_acc": 0.35187015678024336 }, { "epoch": 5.409264145411902, "grad_norm": 0.3324757094536556, "learning_rate": 0.0001745815129614768, "loss": 2.678595542907715, "step": 9228, "token_acc": 0.3445675759454433 }, { "epoch": 5.4098504837291115, "grad_norm": 0.35578066812042636, "learning_rate": 0.0001745750561994321, "loss": 2.646449565887451, "step": 9229, "token_acc": 0.35135618511706623 }, { "epoch": 5.410436822046321, "grad_norm": 0.2974071114269231, "learning_rate": 0.00017456859873685523, "loss": 2.6521496772766113, "step": 9230, "token_acc": 0.35014047486011535 }, { "epoch": 5.41102316036353, "grad_norm": 0.35828511272411984, "learning_rate": 0.0001745621405738069, "loss": 2.663882255554199, "step": 9231, "token_acc": 0.3487180554724316 }, { "epoch": 5.411609498680739, "grad_norm": 0.35791225373617613, "learning_rate": 0.00017455568171034766, "loss": 2.641292095184326, "step": 9232, "token_acc": 0.35025245010264455 }, { "epoch": 5.412195836997948, "grad_norm": 0.2985744761471861, "learning_rate": 0.0001745492221465383, "loss": 2.6272144317626953, "step": 9233, "token_acc": 0.3530285236031804 }, { "epoch": 5.412782175315157, "grad_norm": 0.31776533100372834, "learning_rate": 0.00017454276188243946, "loss": 2.648611545562744, "step": 9234, "token_acc": 0.35067204660235685 }, { "epoch": 5.413368513632366, "grad_norm": 0.2949566010130329, "learning_rate": 0.00017453630091811178, "loss": 2.645766258239746, "step": 9235, "token_acc": 0.351200272016321 }, { "epoch": 5.413954851949575, "grad_norm": 0.3484661470153019, "learning_rate": 0.00017452983925361605, "loss": 2.6655569076538086, "step": 9236, "token_acc": 0.3486717620545965 }, { "epoch": 5.4145411902667835, "grad_norm": 0.2723768563723372, "learning_rate": 0.0001745233768890129, "loss": 2.6693434715270996, "step": 9237, "token_acc": 0.34740341964780486 }, { "epoch": 5.415127528583993, "grad_norm": 0.3039809968566393, "learning_rate": 0.000174516913824363, "loss": 2.644955635070801, "step": 9238, "token_acc": 0.35083827424407427 }, { "epoch": 5.415713866901202, "grad_norm": 0.2890490171994882, "learning_rate": 0.00017451045005972712, "loss": 2.6863882541656494, "step": 9239, "token_acc": 0.3436282148731337 }, { "epoch": 5.416300205218411, "grad_norm": 0.31801458186699505, "learning_rate": 0.00017450398559516598, "loss": 2.676280975341797, "step": 9240, "token_acc": 0.3459335303070087 }, { "epoch": 5.41688654353562, "grad_norm": 0.283411162699875, "learning_rate": 0.0001744975204307403, "loss": 2.6563234329223633, "step": 9241, "token_acc": 0.34891630663707474 }, { "epoch": 5.417472881852829, "grad_norm": 0.34295498007173897, "learning_rate": 0.00017449105456651077, "loss": 2.679187297821045, "step": 9242, "token_acc": 0.344854150723208 }, { "epoch": 5.418059220170038, "grad_norm": 0.2777695500015634, "learning_rate": 0.0001744845880025382, "loss": 2.655790328979492, "step": 9243, "token_acc": 0.3501764448756357 }, { "epoch": 5.418645558487247, "grad_norm": 0.38541906609184623, "learning_rate": 0.00017447812073888327, "loss": 2.63789701461792, "step": 9244, "token_acc": 0.350186677297434 }, { "epoch": 5.419231896804456, "grad_norm": 0.29375542977778046, "learning_rate": 0.00017447165277560678, "loss": 2.6485838890075684, "step": 9245, "token_acc": 0.3507300258268766 }, { "epoch": 5.4198182351216655, "grad_norm": 0.34715775652158476, "learning_rate": 0.00017446518411276944, "loss": 2.653679370880127, "step": 9246, "token_acc": 0.34857971879820143 }, { "epoch": 5.420404573438875, "grad_norm": 0.27434429599254373, "learning_rate": 0.00017445871475043205, "loss": 2.607123851776123, "step": 9247, "token_acc": 0.3574174956896203 }, { "epoch": 5.420990911756084, "grad_norm": 0.37457703237735224, "learning_rate": 0.00017445224468865534, "loss": 2.6612343788146973, "step": 9248, "token_acc": 0.3488917267871721 }, { "epoch": 5.421577250073292, "grad_norm": 0.2763339075962531, "learning_rate": 0.00017444577392750017, "loss": 2.676593065261841, "step": 9249, "token_acc": 0.34444216544993683 }, { "epoch": 5.422163588390501, "grad_norm": 0.36263552114490627, "learning_rate": 0.00017443930246702723, "loss": 2.6488003730773926, "step": 9250, "token_acc": 0.34999337684701554 }, { "epoch": 5.42274992670771, "grad_norm": 0.2829850224343466, "learning_rate": 0.0001744328303072974, "loss": 2.6467456817626953, "step": 9251, "token_acc": 0.34968082362549974 }, { "epoch": 5.423336265024919, "grad_norm": 0.41973991451105946, "learning_rate": 0.00017442635744837137, "loss": 2.6746253967285156, "step": 9252, "token_acc": 0.34592618972728617 }, { "epoch": 5.423922603342128, "grad_norm": 0.28772496248680346, "learning_rate": 0.00017441988389031002, "loss": 2.6486639976501465, "step": 9253, "token_acc": 0.3498523973146647 }, { "epoch": 5.4245089416593375, "grad_norm": 0.3431228478030812, "learning_rate": 0.00017441340963317414, "loss": 2.6319870948791504, "step": 9254, "token_acc": 0.35211869069264184 }, { "epoch": 5.425095279976547, "grad_norm": 0.2812869360074288, "learning_rate": 0.00017440693467702455, "loss": 2.66316556930542, "step": 9255, "token_acc": 0.3480773353848482 }, { "epoch": 5.425681618293756, "grad_norm": 0.3348143176497172, "learning_rate": 0.0001744004590219221, "loss": 2.653688430786133, "step": 9256, "token_acc": 0.3481771252429468 }, { "epoch": 5.426267956610965, "grad_norm": 0.284678250997968, "learning_rate": 0.00017439398266792756, "loss": 2.6870598793029785, "step": 9257, "token_acc": 0.3440134776129074 }, { "epoch": 5.426854294928174, "grad_norm": 0.3019696756347643, "learning_rate": 0.0001743875056151018, "loss": 2.6434805393218994, "step": 9258, "token_acc": 0.3494060712714474 }, { "epoch": 5.427440633245382, "grad_norm": 0.31617571155889557, "learning_rate": 0.0001743810278635057, "loss": 2.6683263778686523, "step": 9259, "token_acc": 0.3476538800857325 }, { "epoch": 5.428026971562591, "grad_norm": 0.2925789315656586, "learning_rate": 0.00017437454941320003, "loss": 2.684692144393921, "step": 9260, "token_acc": 0.3438412805818197 }, { "epoch": 5.4286133098798, "grad_norm": 0.3456299784149186, "learning_rate": 0.00017436807026424572, "loss": 2.6515817642211914, "step": 9261, "token_acc": 0.34929095764686147 }, { "epoch": 5.4291996481970095, "grad_norm": 0.2778919608164676, "learning_rate": 0.0001743615904167036, "loss": 2.665006637573242, "step": 9262, "token_acc": 0.34846154450817374 }, { "epoch": 5.429785986514219, "grad_norm": 0.3215575733331176, "learning_rate": 0.00017435510987063454, "loss": 2.6541085243225098, "step": 9263, "token_acc": 0.34982349894839193 }, { "epoch": 5.430372324831428, "grad_norm": 0.28342284044312094, "learning_rate": 0.0001743486286260994, "loss": 2.625, "step": 9264, "token_acc": 0.35392923128023085 }, { "epoch": 5.430958663148637, "grad_norm": 0.32584422131979335, "learning_rate": 0.0001743421466831591, "loss": 2.653529644012451, "step": 9265, "token_acc": 0.3503565585074619 }, { "epoch": 5.431545001465846, "grad_norm": 0.2827144264662771, "learning_rate": 0.0001743356640418745, "loss": 2.666945219039917, "step": 9266, "token_acc": 0.34683041339782933 }, { "epoch": 5.432131339783055, "grad_norm": 0.32468473548878996, "learning_rate": 0.0001743291807023065, "loss": 2.6752285957336426, "step": 9267, "token_acc": 0.345463508355032 }, { "epoch": 5.432717678100264, "grad_norm": 0.27251853685135174, "learning_rate": 0.00017432269666451604, "loss": 2.630253791809082, "step": 9268, "token_acc": 0.3528215984594391 }, { "epoch": 5.433304016417473, "grad_norm": 0.3045888525804336, "learning_rate": 0.00017431621192856396, "loss": 2.66154408454895, "step": 9269, "token_acc": 0.34941003709810387 }, { "epoch": 5.4338903547346815, "grad_norm": 0.260799069106681, "learning_rate": 0.0001743097264945112, "loss": 2.64109468460083, "step": 9270, "token_acc": 0.35025388340151153 }, { "epoch": 5.434476693051891, "grad_norm": 0.2869325668708683, "learning_rate": 0.00017430324036241872, "loss": 2.6270158290863037, "step": 9271, "token_acc": 0.3544345655571141 }, { "epoch": 5.4350630313691, "grad_norm": 0.2834136362404422, "learning_rate": 0.0001742967535323474, "loss": 2.6951050758361816, "step": 9272, "token_acc": 0.3438157365738519 }, { "epoch": 5.435649369686309, "grad_norm": 0.32005213712083863, "learning_rate": 0.00017429026600435823, "loss": 2.6767215728759766, "step": 9273, "token_acc": 0.34603496805630474 }, { "epoch": 5.436235708003518, "grad_norm": 0.26341614561565535, "learning_rate": 0.0001742837777785121, "loss": 2.6697981357574463, "step": 9274, "token_acc": 0.34766043121463885 }, { "epoch": 5.436822046320727, "grad_norm": 0.28202438830085746, "learning_rate": 0.00017427728885486995, "loss": 2.678168535232544, "step": 9275, "token_acc": 0.34597784507898244 }, { "epoch": 5.437408384637936, "grad_norm": 0.26470526030329666, "learning_rate": 0.0001742707992334928, "loss": 2.657444953918457, "step": 9276, "token_acc": 0.3496569787264819 }, { "epoch": 5.437994722955145, "grad_norm": 0.27381762834454226, "learning_rate": 0.00017426430891444156, "loss": 2.6749205589294434, "step": 9277, "token_acc": 0.3448349142175919 }, { "epoch": 5.438581061272354, "grad_norm": 0.2975967795276183, "learning_rate": 0.0001742578178977772, "loss": 2.658967971801758, "step": 9278, "token_acc": 0.34780798407653013 }, { "epoch": 5.4391673995895635, "grad_norm": 0.34075639558908916, "learning_rate": 0.00017425132618356073, "loss": 2.638697385787964, "step": 9279, "token_acc": 0.352623083570198 }, { "epoch": 5.439753737906772, "grad_norm": 0.26368360885623476, "learning_rate": 0.0001742448337718531, "loss": 2.617581367492676, "step": 9280, "token_acc": 0.3542658314487145 }, { "epoch": 5.440340076223981, "grad_norm": 0.3120488755638987, "learning_rate": 0.00017423834066271528, "loss": 2.6785593032836914, "step": 9281, "token_acc": 0.34476466457655447 }, { "epoch": 5.44092641454119, "grad_norm": 0.2744581679399676, "learning_rate": 0.0001742318468562083, "loss": 2.6564488410949707, "step": 9282, "token_acc": 0.3481688195862942 }, { "epoch": 5.441512752858399, "grad_norm": 0.28282940281946123, "learning_rate": 0.00017422535235239313, "loss": 2.651916742324829, "step": 9283, "token_acc": 0.3484321390255043 }, { "epoch": 5.442099091175608, "grad_norm": 0.2756694635126806, "learning_rate": 0.00017421885715133083, "loss": 2.637572765350342, "step": 9284, "token_acc": 0.3511623204907871 }, { "epoch": 5.442685429492817, "grad_norm": 0.2802514613693682, "learning_rate": 0.00017421236125308236, "loss": 2.6657588481903076, "step": 9285, "token_acc": 0.3459594587250693 }, { "epoch": 5.443271767810026, "grad_norm": 0.2806904346102108, "learning_rate": 0.00017420586465770877, "loss": 2.6574153900146484, "step": 9286, "token_acc": 0.34741721103131107 }, { "epoch": 5.4438581061272355, "grad_norm": 0.2821907706139861, "learning_rate": 0.00017419936736527106, "loss": 2.6239233016967773, "step": 9287, "token_acc": 0.3539010996388244 }, { "epoch": 5.444444444444445, "grad_norm": 0.2848098389449877, "learning_rate": 0.00017419286937583027, "loss": 2.653468608856201, "step": 9288, "token_acc": 0.3501569375918989 }, { "epoch": 5.445030782761654, "grad_norm": 0.2893317588870503, "learning_rate": 0.00017418637068944746, "loss": 2.6941704750061035, "step": 9289, "token_acc": 0.34371686517536365 }, { "epoch": 5.445617121078863, "grad_norm": 0.28883291122830984, "learning_rate": 0.00017417987130618364, "loss": 2.672786235809326, "step": 9290, "token_acc": 0.34521196470841403 }, { "epoch": 5.446203459396072, "grad_norm": 0.28333587453210113, "learning_rate": 0.00017417337122609992, "loss": 2.6625771522521973, "step": 9291, "token_acc": 0.3490096465458008 }, { "epoch": 5.44678979771328, "grad_norm": 0.2837438943154363, "learning_rate": 0.00017416687044925734, "loss": 2.691338539123535, "step": 9292, "token_acc": 0.34333685539777853 }, { "epoch": 5.447376136030489, "grad_norm": 0.27935800101214703, "learning_rate": 0.00017416036897571692, "loss": 2.6151061058044434, "step": 9293, "token_acc": 0.35436849670623627 }, { "epoch": 5.447962474347698, "grad_norm": 0.3639009126483607, "learning_rate": 0.0001741538668055398, "loss": 2.640374183654785, "step": 9294, "token_acc": 0.3509067727164434 }, { "epoch": 5.4485488126649075, "grad_norm": 0.3250992497972495, "learning_rate": 0.000174147363938787, "loss": 2.6559290885925293, "step": 9295, "token_acc": 0.34717715452097203 }, { "epoch": 5.449135150982117, "grad_norm": 0.2695904108673803, "learning_rate": 0.00017414086037551962, "loss": 2.6337718963623047, "step": 9296, "token_acc": 0.3511304002050992 }, { "epoch": 5.449721489299326, "grad_norm": 0.29978425485416504, "learning_rate": 0.0001741343561157988, "loss": 2.6178317070007324, "step": 9297, "token_acc": 0.35389719985660417 }, { "epoch": 5.450307827616535, "grad_norm": 0.3177734866705486, "learning_rate": 0.00017412785115968556, "loss": 2.631922721862793, "step": 9298, "token_acc": 0.35220932871149224 }, { "epoch": 5.450894165933744, "grad_norm": 0.267781707337024, "learning_rate": 0.00017412134550724107, "loss": 2.683915376663208, "step": 9299, "token_acc": 0.34389442076152665 }, { "epoch": 5.451480504250953, "grad_norm": 0.27595763555867653, "learning_rate": 0.00017411483915852642, "loss": 2.6174488067626953, "step": 9300, "token_acc": 0.3539585595853808 }, { "epoch": 5.452066842568162, "grad_norm": 0.2999665626197228, "learning_rate": 0.0001741083321136027, "loss": 2.655979633331299, "step": 9301, "token_acc": 0.3489852183662747 }, { "epoch": 5.45265318088537, "grad_norm": 0.26052848551400154, "learning_rate": 0.00017410182437253111, "loss": 2.6884846687316895, "step": 9302, "token_acc": 0.34258274094976693 }, { "epoch": 5.4532395192025795, "grad_norm": 0.2922634219412777, "learning_rate": 0.0001740953159353727, "loss": 2.6818859577178955, "step": 9303, "token_acc": 0.3448977944641597 }, { "epoch": 5.453825857519789, "grad_norm": 0.27134106517901835, "learning_rate": 0.00017408880680218865, "loss": 2.6419026851654053, "step": 9304, "token_acc": 0.3523951655172592 }, { "epoch": 5.454412195836998, "grad_norm": 0.2876124856859591, "learning_rate": 0.0001740822969730401, "loss": 2.6196064949035645, "step": 9305, "token_acc": 0.35319417848611223 }, { "epoch": 5.454998534154207, "grad_norm": 0.29044534669936084, "learning_rate": 0.00017407578644798818, "loss": 2.68595552444458, "step": 9306, "token_acc": 0.3448662334102014 }, { "epoch": 5.455584872471416, "grad_norm": 0.26973562649463134, "learning_rate": 0.00017406927522709408, "loss": 2.630375385284424, "step": 9307, "token_acc": 0.35358303421617143 }, { "epoch": 5.456171210788625, "grad_norm": 0.3151115208262226, "learning_rate": 0.00017406276331041894, "loss": 2.6562623977661133, "step": 9308, "token_acc": 0.34878196178131576 }, { "epoch": 5.456757549105834, "grad_norm": 0.26480537698557466, "learning_rate": 0.00017405625069802393, "loss": 2.6395699977874756, "step": 9309, "token_acc": 0.3518182029057225 }, { "epoch": 5.457343887423043, "grad_norm": 0.28618944384003336, "learning_rate": 0.00017404973738997028, "loss": 2.6706836223602295, "step": 9310, "token_acc": 0.3468155455858324 }, { "epoch": 5.457930225740252, "grad_norm": 0.29773744590181683, "learning_rate": 0.0001740432233863191, "loss": 2.6483659744262695, "step": 9311, "token_acc": 0.3489965044577982 }, { "epoch": 5.4585165640574616, "grad_norm": 0.2662053080072157, "learning_rate": 0.0001740367086871316, "loss": 2.6510047912597656, "step": 9312, "token_acc": 0.3508390813577518 }, { "epoch": 5.45910290237467, "grad_norm": 0.27678042396670544, "learning_rate": 0.000174030193292469, "loss": 2.663435459136963, "step": 9313, "token_acc": 0.3465382490344035 }, { "epoch": 5.459689240691879, "grad_norm": 0.3203594893846758, "learning_rate": 0.00017402367720239248, "loss": 2.660707950592041, "step": 9314, "token_acc": 0.3481814761146114 }, { "epoch": 5.460275579009088, "grad_norm": 0.30332311298228415, "learning_rate": 0.00017401716041696327, "loss": 2.6377854347229004, "step": 9315, "token_acc": 0.35267191155917776 }, { "epoch": 5.460861917326297, "grad_norm": 0.26408385550656044, "learning_rate": 0.0001740106429362426, "loss": 2.6434619426727295, "step": 9316, "token_acc": 0.35168904275983115 }, { "epoch": 5.461448255643506, "grad_norm": 0.30589332025149496, "learning_rate": 0.00017400412476029165, "loss": 2.6600189208984375, "step": 9317, "token_acc": 0.3477878397227271 }, { "epoch": 5.462034593960715, "grad_norm": 0.30668118452255516, "learning_rate": 0.00017399760588917163, "loss": 2.668876886367798, "step": 9318, "token_acc": 0.3469260871155323 }, { "epoch": 5.4626209322779244, "grad_norm": 0.276483070883018, "learning_rate": 0.00017399108632294388, "loss": 2.6713247299194336, "step": 9319, "token_acc": 0.3473461495792193 }, { "epoch": 5.463207270595134, "grad_norm": 0.2838596765542573, "learning_rate": 0.00017398456606166956, "loss": 2.6146204471588135, "step": 9320, "token_acc": 0.3545785389349746 }, { "epoch": 5.463793608912343, "grad_norm": 0.2645734110421507, "learning_rate": 0.00017397804510540992, "loss": 2.661116123199463, "step": 9321, "token_acc": 0.34743536493928917 }, { "epoch": 5.464379947229552, "grad_norm": 0.278720188378057, "learning_rate": 0.00017397152345422626, "loss": 2.6144638061523438, "step": 9322, "token_acc": 0.35589518521964786 }, { "epoch": 5.464966285546761, "grad_norm": 0.28977271362060236, "learning_rate": 0.00017396500110817978, "loss": 2.6785597801208496, "step": 9323, "token_acc": 0.34519601328903654 }, { "epoch": 5.465552623863969, "grad_norm": 0.25313250889276745, "learning_rate": 0.0001739584780673318, "loss": 2.640286445617676, "step": 9324, "token_acc": 0.35178047487947056 }, { "epoch": 5.466138962181178, "grad_norm": 0.28833440127614024, "learning_rate": 0.0001739519543317436, "loss": 2.63649582862854, "step": 9325, "token_acc": 0.3517091972793604 }, { "epoch": 5.466725300498387, "grad_norm": 0.26294784887766226, "learning_rate": 0.0001739454299014764, "loss": 2.6604275703430176, "step": 9326, "token_acc": 0.34682292904145534 }, { "epoch": 5.4673116388155965, "grad_norm": 0.28017460519453047, "learning_rate": 0.00017393890477659157, "loss": 2.646897554397583, "step": 9327, "token_acc": 0.350433971141374 }, { "epoch": 5.467897977132806, "grad_norm": 0.3146127454848998, "learning_rate": 0.00017393237895715034, "loss": 2.6719017028808594, "step": 9328, "token_acc": 0.3449838354435271 }, { "epoch": 5.468484315450015, "grad_norm": 0.26860868264912446, "learning_rate": 0.00017392585244321404, "loss": 2.6594743728637695, "step": 9329, "token_acc": 0.3485607175614855 }, { "epoch": 5.469070653767224, "grad_norm": 0.34455698944111873, "learning_rate": 0.00017391932523484397, "loss": 2.6771316528320312, "step": 9330, "token_acc": 0.3464481451644854 }, { "epoch": 5.469656992084433, "grad_norm": 0.32099053548764633, "learning_rate": 0.00017391279733210144, "loss": 2.6617016792297363, "step": 9331, "token_acc": 0.3493789360613537 }, { "epoch": 5.470243330401642, "grad_norm": 0.30945263570789366, "learning_rate": 0.00017390626873504782, "loss": 2.6670072078704834, "step": 9332, "token_acc": 0.3460895678958384 }, { "epoch": 5.470829668718851, "grad_norm": 0.3973434159286546, "learning_rate": 0.00017389973944374434, "loss": 2.6650776863098145, "step": 9333, "token_acc": 0.3475084473917773 }, { "epoch": 5.47141600703606, "grad_norm": 0.303254980588608, "learning_rate": 0.00017389320945825242, "loss": 2.680159330368042, "step": 9334, "token_acc": 0.344187926458935 }, { "epoch": 5.4720023453532685, "grad_norm": 0.29341717875090767, "learning_rate": 0.00017388667877863334, "loss": 2.6701245307922363, "step": 9335, "token_acc": 0.3480163284128007 }, { "epoch": 5.472588683670478, "grad_norm": 0.2710885264019792, "learning_rate": 0.0001738801474049485, "loss": 2.6102588176727295, "step": 9336, "token_acc": 0.3562948487618327 }, { "epoch": 5.473175021987687, "grad_norm": 0.28160601701267257, "learning_rate": 0.00017387361533725924, "loss": 2.6840109825134277, "step": 9337, "token_acc": 0.3446664531006346 }, { "epoch": 5.473761360304896, "grad_norm": 0.2762566658130932, "learning_rate": 0.00017386708257562686, "loss": 2.6841378211975098, "step": 9338, "token_acc": 0.34368588927656896 }, { "epoch": 5.474347698622105, "grad_norm": 0.2701273096117695, "learning_rate": 0.00017386054912011284, "loss": 2.6066830158233643, "step": 9339, "token_acc": 0.35550053738676496 }, { "epoch": 5.474934036939314, "grad_norm": 0.2763901194200535, "learning_rate": 0.00017385401497077845, "loss": 2.6395537853240967, "step": 9340, "token_acc": 0.35032762445374577 }, { "epoch": 5.475520375256523, "grad_norm": 0.26932033810523065, "learning_rate": 0.00017384748012768512, "loss": 2.65915584564209, "step": 9341, "token_acc": 0.34760120814591083 }, { "epoch": 5.476106713573732, "grad_norm": 0.28250410614893195, "learning_rate": 0.00017384094459089422, "loss": 2.6433002948760986, "step": 9342, "token_acc": 0.351272283036762 }, { "epoch": 5.476693051890941, "grad_norm": 0.27790802438107404, "learning_rate": 0.00017383440836046715, "loss": 2.687185049057007, "step": 9343, "token_acc": 0.34443859083534495 }, { "epoch": 5.4772793902081505, "grad_norm": 0.2715486113615593, "learning_rate": 0.0001738278714364653, "loss": 2.680638074874878, "step": 9344, "token_acc": 0.3454077365394668 }, { "epoch": 5.477865728525359, "grad_norm": 0.28518037759277326, "learning_rate": 0.0001738213338189501, "loss": 2.663626194000244, "step": 9345, "token_acc": 0.34835818368448346 }, { "epoch": 5.478452066842568, "grad_norm": 0.2646345518178395, "learning_rate": 0.00017381479550798297, "loss": 2.64632511138916, "step": 9346, "token_acc": 0.3509986747072985 }, { "epoch": 5.479038405159777, "grad_norm": 0.2592955110522507, "learning_rate": 0.00017380825650362524, "loss": 2.6906065940856934, "step": 9347, "token_acc": 0.3428210290938414 }, { "epoch": 5.479624743476986, "grad_norm": 0.2659796909760562, "learning_rate": 0.00017380171680593843, "loss": 2.6582560539245605, "step": 9348, "token_acc": 0.34921183177041404 }, { "epoch": 5.480211081794195, "grad_norm": 0.2705188844914245, "learning_rate": 0.00017379517641498394, "loss": 2.6619350910186768, "step": 9349, "token_acc": 0.3481304266154617 }, { "epoch": 5.480797420111404, "grad_norm": 0.2909725148900551, "learning_rate": 0.0001737886353308232, "loss": 2.670841693878174, "step": 9350, "token_acc": 0.3489178012369487 }, { "epoch": 5.481383758428613, "grad_norm": 0.2621433360463305, "learning_rate": 0.00017378209355351767, "loss": 2.6771442890167236, "step": 9351, "token_acc": 0.3460719034308029 }, { "epoch": 5.4819700967458225, "grad_norm": 0.28425192352323025, "learning_rate": 0.0001737755510831288, "loss": 2.6732242107391357, "step": 9352, "token_acc": 0.34619621031123127 }, { "epoch": 5.482556435063032, "grad_norm": 0.25994564849115154, "learning_rate": 0.00017376900791971802, "loss": 2.684951066970825, "step": 9353, "token_acc": 0.34395369532313635 }, { "epoch": 5.483142773380241, "grad_norm": 0.278889134498189, "learning_rate": 0.00017376246406334685, "loss": 2.6220078468322754, "step": 9354, "token_acc": 0.3528867340123999 }, { "epoch": 5.48372911169745, "grad_norm": 0.2749130315885006, "learning_rate": 0.0001737559195140767, "loss": 2.666330099105835, "step": 9355, "token_acc": 0.3477057448640669 }, { "epoch": 5.484315450014659, "grad_norm": 0.2858989730591188, "learning_rate": 0.00017374937427196905, "loss": 2.68009614944458, "step": 9356, "token_acc": 0.34429530370898104 }, { "epoch": 5.484901788331867, "grad_norm": 0.4196010261050356, "learning_rate": 0.00017374282833708545, "loss": 2.6617136001586914, "step": 9357, "token_acc": 0.3476075065479039 }, { "epoch": 5.485488126649076, "grad_norm": 0.34987514280049253, "learning_rate": 0.00017373628170948733, "loss": 2.66796875, "step": 9358, "token_acc": 0.3478785191987481 }, { "epoch": 5.486074464966285, "grad_norm": 0.32453463211446626, "learning_rate": 0.0001737297343892362, "loss": 2.635098457336426, "step": 9359, "token_acc": 0.3532302638091136 }, { "epoch": 5.4866608032834945, "grad_norm": 0.3479434236447583, "learning_rate": 0.00017372318637639357, "loss": 2.6387076377868652, "step": 9360, "token_acc": 0.351332601177082 }, { "epoch": 5.487247141600704, "grad_norm": 0.2791611738606666, "learning_rate": 0.00017371663767102094, "loss": 2.654923439025879, "step": 9361, "token_acc": 0.3498700764105071 }, { "epoch": 5.487833479917913, "grad_norm": 0.32531752834081396, "learning_rate": 0.00017371008827317988, "loss": 2.6721725463867188, "step": 9362, "token_acc": 0.34630661957042574 }, { "epoch": 5.488419818235122, "grad_norm": 0.2856942808112665, "learning_rate": 0.0001737035381829318, "loss": 2.610436201095581, "step": 9363, "token_acc": 0.35533196938050543 }, { "epoch": 5.489006156552331, "grad_norm": 0.38209206980466504, "learning_rate": 0.00017369698740033832, "loss": 2.6744191646575928, "step": 9364, "token_acc": 0.345857606546631 }, { "epoch": 5.48959249486954, "grad_norm": 0.2956029594238154, "learning_rate": 0.00017369043592546098, "loss": 2.654921531677246, "step": 9365, "token_acc": 0.3492039670978242 }, { "epoch": 5.490178833186749, "grad_norm": 0.29935619187503226, "learning_rate": 0.00017368388375836128, "loss": 2.6563568115234375, "step": 9366, "token_acc": 0.348972087893895 }, { "epoch": 5.490765171503957, "grad_norm": 0.2758632275787339, "learning_rate": 0.00017367733089910075, "loss": 2.6672253608703613, "step": 9367, "token_acc": 0.3474619321412776 }, { "epoch": 5.4913515098211665, "grad_norm": 0.37015481638140496, "learning_rate": 0.000173670777347741, "loss": 2.683981418609619, "step": 9368, "token_acc": 0.34426463994630624 }, { "epoch": 5.491937848138376, "grad_norm": 0.2819303900260238, "learning_rate": 0.0001736642231043436, "loss": 2.675234317779541, "step": 9369, "token_acc": 0.345387927134943 }, { "epoch": 5.492524186455585, "grad_norm": 0.3457482070183949, "learning_rate": 0.00017365766816897002, "loss": 2.7000961303710938, "step": 9370, "token_acc": 0.3416620720111003 }, { "epoch": 5.493110524772794, "grad_norm": 0.2744326752659587, "learning_rate": 0.00017365111254168196, "loss": 2.659615993499756, "step": 9371, "token_acc": 0.347717599489253 }, { "epoch": 5.493696863090003, "grad_norm": 0.33447248091314147, "learning_rate": 0.00017364455622254093, "loss": 2.68723726272583, "step": 9372, "token_acc": 0.3446416068615438 }, { "epoch": 5.494283201407212, "grad_norm": 0.26341586037490133, "learning_rate": 0.00017363799921160853, "loss": 2.6986615657806396, "step": 9373, "token_acc": 0.34228354284166385 }, { "epoch": 5.494869539724421, "grad_norm": 0.337235905292035, "learning_rate": 0.00017363144150894634, "loss": 2.6360843181610107, "step": 9374, "token_acc": 0.35132004580450765 }, { "epoch": 5.49545587804163, "grad_norm": 0.26558554176260174, "learning_rate": 0.00017362488311461598, "loss": 2.6861143112182617, "step": 9375, "token_acc": 0.3443222490427834 }, { "epoch": 5.496042216358839, "grad_norm": 0.3533698885257246, "learning_rate": 0.00017361832402867905, "loss": 2.659644603729248, "step": 9376, "token_acc": 0.34875447667888704 }, { "epoch": 5.4966285546760485, "grad_norm": 0.2612639177592154, "learning_rate": 0.00017361176425119721, "loss": 2.692948341369629, "step": 9377, "token_acc": 0.3423559764572804 }, { "epoch": 5.497214892993257, "grad_norm": 0.33401237573003445, "learning_rate": 0.000173605203782232, "loss": 2.6597068309783936, "step": 9378, "token_acc": 0.3485873744926298 }, { "epoch": 5.497801231310466, "grad_norm": 0.26197886707808343, "learning_rate": 0.00017359864262184507, "loss": 2.6893529891967773, "step": 9379, "token_acc": 0.34534118191773555 }, { "epoch": 5.498387569627675, "grad_norm": 0.34224790625120116, "learning_rate": 0.0001735920807700981, "loss": 2.710928440093994, "step": 9380, "token_acc": 0.34055076729030903 }, { "epoch": 5.498973907944884, "grad_norm": 0.2582218248274261, "learning_rate": 0.00017358551822705271, "loss": 2.6384365558624268, "step": 9381, "token_acc": 0.3501637432736574 }, { "epoch": 5.499560246262093, "grad_norm": 0.2876180452219583, "learning_rate": 0.00017357895499277052, "loss": 2.653571128845215, "step": 9382, "token_acc": 0.3489325296519541 }, { "epoch": 5.500146584579302, "grad_norm": 0.25048842783702785, "learning_rate": 0.00017357239106731317, "loss": 2.6648755073547363, "step": 9383, "token_acc": 0.34534254943964404 }, { "epoch": 5.500732922896511, "grad_norm": 0.2854732012402806, "learning_rate": 0.00017356582645074235, "loss": 2.671459674835205, "step": 9384, "token_acc": 0.3473649194265138 }, { "epoch": 5.5013192612137205, "grad_norm": 0.26130353309652177, "learning_rate": 0.00017355926114311977, "loss": 2.663386821746826, "step": 9385, "token_acc": 0.3481534262782162 }, { "epoch": 5.50190559953093, "grad_norm": 0.29852216946881543, "learning_rate": 0.000173552695144507, "loss": 2.7103805541992188, "step": 9386, "token_acc": 0.3390252826167655 }, { "epoch": 5.502491937848139, "grad_norm": 0.2595333761191849, "learning_rate": 0.0001735461284549658, "loss": 2.664442539215088, "step": 9387, "token_acc": 0.34683414442352706 }, { "epoch": 5.503078276165347, "grad_norm": 0.2693743804237651, "learning_rate": 0.00017353956107455783, "loss": 2.653409481048584, "step": 9388, "token_acc": 0.34893336592935875 }, { "epoch": 5.503664614482556, "grad_norm": 0.25750135133069596, "learning_rate": 0.00017353299300334476, "loss": 2.6595613956451416, "step": 9389, "token_acc": 0.3484983539231286 }, { "epoch": 5.504250952799765, "grad_norm": 0.2683014170752076, "learning_rate": 0.00017352642424138832, "loss": 2.653118371963501, "step": 9390, "token_acc": 0.3494822208036884 }, { "epoch": 5.504837291116974, "grad_norm": 0.26262004819717516, "learning_rate": 0.00017351985478875022, "loss": 2.6844658851623535, "step": 9391, "token_acc": 0.3438432073544433 }, { "epoch": 5.505423629434183, "grad_norm": 0.2739852468252056, "learning_rate": 0.0001735132846454921, "loss": 2.6895031929016113, "step": 9392, "token_acc": 0.34403324921151945 }, { "epoch": 5.5060099677513925, "grad_norm": 0.2644354186008587, "learning_rate": 0.00017350671381167579, "loss": 2.665036916732788, "step": 9393, "token_acc": 0.3457236859674516 }, { "epoch": 5.506596306068602, "grad_norm": 0.27149964756133255, "learning_rate": 0.0001735001422873629, "loss": 2.706864833831787, "step": 9394, "token_acc": 0.3411302337528898 }, { "epoch": 5.507182644385811, "grad_norm": 0.2874202777434705, "learning_rate": 0.00017349357007261527, "loss": 2.661686420440674, "step": 9395, "token_acc": 0.3484246552446779 }, { "epoch": 5.50776898270302, "grad_norm": 0.26934167645066354, "learning_rate": 0.00017348699716749456, "loss": 2.66831111907959, "step": 9396, "token_acc": 0.34639575020708074 }, { "epoch": 5.508355321020229, "grad_norm": 0.27264006958204745, "learning_rate": 0.00017348042357206254, "loss": 2.6596550941467285, "step": 9397, "token_acc": 0.34834885473754135 }, { "epoch": 5.508941659337438, "grad_norm": 0.2837479217510576, "learning_rate": 0.00017347384928638097, "loss": 2.705252170562744, "step": 9398, "token_acc": 0.3407398966879368 }, { "epoch": 5.509527997654647, "grad_norm": 0.26123979435716504, "learning_rate": 0.00017346727431051155, "loss": 2.6580193042755127, "step": 9399, "token_acc": 0.34781223001875383 }, { "epoch": 5.510114335971855, "grad_norm": 0.2854790579676716, "learning_rate": 0.0001734606986445161, "loss": 2.656141757965088, "step": 9400, "token_acc": 0.3483431670081494 }, { "epoch": 5.5107006742890645, "grad_norm": 0.3105751990942194, "learning_rate": 0.00017345412228845638, "loss": 2.7000811100006104, "step": 9401, "token_acc": 0.34148358749030755 }, { "epoch": 5.511287012606274, "grad_norm": 0.261410517279633, "learning_rate": 0.0001734475452423942, "loss": 2.7129154205322266, "step": 9402, "token_acc": 0.3402588420999839 }, { "epoch": 5.511873350923483, "grad_norm": 0.26749809848006917, "learning_rate": 0.00017344096750639127, "loss": 2.649630069732666, "step": 9403, "token_acc": 0.35005843936283787 }, { "epoch": 5.512459689240692, "grad_norm": 0.26545801573768124, "learning_rate": 0.0001734343890805094, "loss": 2.6510887145996094, "step": 9404, "token_acc": 0.3502283363915634 }, { "epoch": 5.513046027557901, "grad_norm": 0.2811816875724293, "learning_rate": 0.00017342780996481042, "loss": 2.6524534225463867, "step": 9405, "token_acc": 0.3476690624729005 }, { "epoch": 5.51363236587511, "grad_norm": 0.27071182188601506, "learning_rate": 0.0001734212301593561, "loss": 2.6667261123657227, "step": 9406, "token_acc": 0.34701281033896386 }, { "epoch": 5.514218704192319, "grad_norm": 0.2694667105565443, "learning_rate": 0.00017341464966420827, "loss": 2.653271436691284, "step": 9407, "token_acc": 0.3485127423231119 }, { "epoch": 5.514805042509528, "grad_norm": 0.2660609369878042, "learning_rate": 0.00017340806847942876, "loss": 2.6883373260498047, "step": 9408, "token_acc": 0.34254216648962355 }, { "epoch": 5.515391380826737, "grad_norm": 0.27916671670689874, "learning_rate": 0.0001734014866050793, "loss": 2.6706161499023438, "step": 9409, "token_acc": 0.34544597578311675 }, { "epoch": 5.515977719143946, "grad_norm": 0.26366535787870504, "learning_rate": 0.00017339490404122182, "loss": 2.641026020050049, "step": 9410, "token_acc": 0.35069148165440406 }, { "epoch": 5.516564057461155, "grad_norm": 0.28878699951034664, "learning_rate": 0.0001733883207879181, "loss": 2.6849069595336914, "step": 9411, "token_acc": 0.34376355591544 }, { "epoch": 5.517150395778364, "grad_norm": 0.34832723643607066, "learning_rate": 0.00017338173684523005, "loss": 2.67995023727417, "step": 9412, "token_acc": 0.344861109276553 }, { "epoch": 5.517736734095573, "grad_norm": 0.3315500763755568, "learning_rate": 0.00017337515221321943, "loss": 2.6788172721862793, "step": 9413, "token_acc": 0.34414141122567066 }, { "epoch": 5.518323072412782, "grad_norm": 0.2765005512790981, "learning_rate": 0.00017336856689194812, "loss": 2.6608054637908936, "step": 9414, "token_acc": 0.34776972755260294 }, { "epoch": 5.518909410729991, "grad_norm": 0.2777264683142074, "learning_rate": 0.00017336198088147798, "loss": 2.640443801879883, "step": 9415, "token_acc": 0.3515483213073279 }, { "epoch": 5.5194957490472, "grad_norm": 0.2965145179763992, "learning_rate": 0.0001733553941818709, "loss": 2.632626533508301, "step": 9416, "token_acc": 0.35146126692696145 }, { "epoch": 5.520082087364409, "grad_norm": 0.27930860725616957, "learning_rate": 0.00017334880679318877, "loss": 2.6832375526428223, "step": 9417, "token_acc": 0.3442544793034473 }, { "epoch": 5.5206684256816185, "grad_norm": 0.26379131911943793, "learning_rate": 0.00017334221871549338, "loss": 2.672139883041382, "step": 9418, "token_acc": 0.34649010565458316 }, { "epoch": 5.521254763998828, "grad_norm": 0.29427344276677414, "learning_rate": 0.00017333562994884674, "loss": 2.7112064361572266, "step": 9419, "token_acc": 0.340765790110303 }, { "epoch": 5.521841102316037, "grad_norm": 0.26301003316732136, "learning_rate": 0.00017332904049331064, "loss": 2.666936159133911, "step": 9420, "token_acc": 0.3495162704266164 }, { "epoch": 5.522427440633246, "grad_norm": 0.26199707296416314, "learning_rate": 0.000173322450348947, "loss": 2.6565511226654053, "step": 9421, "token_acc": 0.3478055325195307 }, { "epoch": 5.523013778950454, "grad_norm": 0.26870919858202064, "learning_rate": 0.0001733158595158178, "loss": 2.697734832763672, "step": 9422, "token_acc": 0.34291472490967856 }, { "epoch": 5.523600117267663, "grad_norm": 0.24970956702514127, "learning_rate": 0.00017330926799398482, "loss": 2.693329334259033, "step": 9423, "token_acc": 0.34346936983924525 }, { "epoch": 5.524186455584872, "grad_norm": 0.2627069105990618, "learning_rate": 0.0001733026757835101, "loss": 2.671926736831665, "step": 9424, "token_acc": 0.34480162424929917 }, { "epoch": 5.524772793902081, "grad_norm": 0.24951018807561823, "learning_rate": 0.0001732960828844555, "loss": 2.6325125694274902, "step": 9425, "token_acc": 0.352132381699868 }, { "epoch": 5.5253591322192905, "grad_norm": 0.2663978983076645, "learning_rate": 0.000173289489296883, "loss": 2.6883883476257324, "step": 9426, "token_acc": 0.34452838186846974 }, { "epoch": 5.5259454705365, "grad_norm": 0.2831414413177394, "learning_rate": 0.0001732828950208545, "loss": 2.667560577392578, "step": 9427, "token_acc": 0.34693367807668346 }, { "epoch": 5.526531808853709, "grad_norm": 0.32866410144456365, "learning_rate": 0.00017327630005643192, "loss": 2.6780049800872803, "step": 9428, "token_acc": 0.3454617177321824 }, { "epoch": 5.527118147170918, "grad_norm": 0.3054373271427708, "learning_rate": 0.00017326970440367724, "loss": 2.675140857696533, "step": 9429, "token_acc": 0.3470856344823208 }, { "epoch": 5.527704485488127, "grad_norm": 0.2834955353259913, "learning_rate": 0.00017326310806265244, "loss": 2.661965847015381, "step": 9430, "token_acc": 0.34824867668218695 }, { "epoch": 5.528290823805335, "grad_norm": 0.265505641370291, "learning_rate": 0.00017325651103341943, "loss": 2.654348850250244, "step": 9431, "token_acc": 0.3488534911279428 }, { "epoch": 5.528877162122544, "grad_norm": 0.2773785376453433, "learning_rate": 0.00017324991331604025, "loss": 2.7341766357421875, "step": 9432, "token_acc": 0.33629781744464626 }, { "epoch": 5.529463500439753, "grad_norm": 0.29842901379287545, "learning_rate": 0.00017324331491057687, "loss": 2.6686980724334717, "step": 9433, "token_acc": 0.34697090161645866 }, { "epoch": 5.5300498387569625, "grad_norm": 0.31064705131243536, "learning_rate": 0.0001732367158170912, "loss": 2.6444926261901855, "step": 9434, "token_acc": 0.3505938126191291 }, { "epoch": 5.530636177074172, "grad_norm": 0.30745288018032235, "learning_rate": 0.00017323011603564528, "loss": 2.6743435859680176, "step": 9435, "token_acc": 0.34515766357398786 }, { "epoch": 5.531222515391381, "grad_norm": 0.26186769836133367, "learning_rate": 0.00017322351556630107, "loss": 2.6852824687957764, "step": 9436, "token_acc": 0.34505779709939904 }, { "epoch": 5.53180885370859, "grad_norm": 0.3359407176988424, "learning_rate": 0.00017321691440912065, "loss": 2.6544008255004883, "step": 9437, "token_acc": 0.3501308380555491 }, { "epoch": 5.532395192025799, "grad_norm": 0.29643486899031, "learning_rate": 0.00017321031256416596, "loss": 2.672698497772217, "step": 9438, "token_acc": 0.34539805625810427 }, { "epoch": 5.532981530343008, "grad_norm": 0.27589776096650775, "learning_rate": 0.000173203710031499, "loss": 2.6893701553344727, "step": 9439, "token_acc": 0.3441357619545185 }, { "epoch": 5.533567868660217, "grad_norm": 0.3194298934422611, "learning_rate": 0.00017319710681118188, "loss": 2.6448814868927, "step": 9440, "token_acc": 0.3502327235925108 }, { "epoch": 5.534154206977426, "grad_norm": 0.2936641899372622, "learning_rate": 0.00017319050290327657, "loss": 2.67972469329834, "step": 9441, "token_acc": 0.34374282315479276 }, { "epoch": 5.534740545294635, "grad_norm": 0.2661527357511759, "learning_rate": 0.0001731838983078451, "loss": 2.6797404289245605, "step": 9442, "token_acc": 0.3439920602305945 }, { "epoch": 5.535326883611844, "grad_norm": 0.32119209545532196, "learning_rate": 0.00017317729302494952, "loss": 2.6530847549438477, "step": 9443, "token_acc": 0.34917912788873784 }, { "epoch": 5.535913221929053, "grad_norm": 0.3233764906251096, "learning_rate": 0.00017317068705465188, "loss": 2.6515626907348633, "step": 9444, "token_acc": 0.3489255751301931 }, { "epoch": 5.536499560246262, "grad_norm": 0.26871531157327827, "learning_rate": 0.00017316408039701423, "loss": 2.690389633178711, "step": 9445, "token_acc": 0.342866602347062 }, { "epoch": 5.537085898563471, "grad_norm": 0.3626831467148151, "learning_rate": 0.00017315747305209861, "loss": 2.6333746910095215, "step": 9446, "token_acc": 0.3533416608167816 }, { "epoch": 5.53767223688068, "grad_norm": 0.3319070200460502, "learning_rate": 0.00017315086501996715, "loss": 2.6874165534973145, "step": 9447, "token_acc": 0.3426787954511979 }, { "epoch": 5.538258575197889, "grad_norm": 0.30166759164789586, "learning_rate": 0.0001731442563006819, "loss": 2.6765732765197754, "step": 9448, "token_acc": 0.3440254903762479 }, { "epoch": 5.538844913515098, "grad_norm": 0.34795391035649303, "learning_rate": 0.0001731376468943049, "loss": 2.6530773639678955, "step": 9449, "token_acc": 0.34677114127599507 }, { "epoch": 5.5394312518323074, "grad_norm": 0.29991835282214735, "learning_rate": 0.00017313103680089825, "loss": 2.6525368690490723, "step": 9450, "token_acc": 0.3490909836443688 }, { "epoch": 5.540017590149517, "grad_norm": 0.43125454174346906, "learning_rate": 0.00017312442602052407, "loss": 2.6716771125793457, "step": 9451, "token_acc": 0.34605374952090723 }, { "epoch": 5.540603928466726, "grad_norm": 0.2830518989538011, "learning_rate": 0.00017311781455324444, "loss": 2.661417007446289, "step": 9452, "token_acc": 0.3483629029885279 }, { "epoch": 5.541190266783934, "grad_norm": 0.3569299427087462, "learning_rate": 0.00017311120239912146, "loss": 2.7349114418029785, "step": 9453, "token_acc": 0.3362143474503025 }, { "epoch": 5.541776605101143, "grad_norm": 0.26659600000792616, "learning_rate": 0.00017310458955821726, "loss": 2.652721405029297, "step": 9454, "token_acc": 0.34900500376152216 }, { "epoch": 5.542362943418352, "grad_norm": 0.3379262724026496, "learning_rate": 0.00017309797603059398, "loss": 2.699692487716675, "step": 9455, "token_acc": 0.34169497933656456 }, { "epoch": 5.542949281735561, "grad_norm": 0.27987420423582204, "learning_rate": 0.00017309136181631364, "loss": 2.7045164108276367, "step": 9456, "token_acc": 0.3400708845642041 }, { "epoch": 5.54353562005277, "grad_norm": 0.2956594661251451, "learning_rate": 0.00017308474691543851, "loss": 2.6820507049560547, "step": 9457, "token_acc": 0.34294393477125673 }, { "epoch": 5.5441219583699795, "grad_norm": 0.2931623918506195, "learning_rate": 0.00017307813132803066, "loss": 2.6876039505004883, "step": 9458, "token_acc": 0.34183480377815295 }, { "epoch": 5.544708296687189, "grad_norm": 0.25690523799219306, "learning_rate": 0.00017307151505415222, "loss": 2.661341428756714, "step": 9459, "token_acc": 0.34912238896642744 }, { "epoch": 5.545294635004398, "grad_norm": 0.3406428122190264, "learning_rate": 0.0001730648980938654, "loss": 2.6577606201171875, "step": 9460, "token_acc": 0.3475338844099026 }, { "epoch": 5.545880973321607, "grad_norm": 0.2861391411221247, "learning_rate": 0.00017305828044723227, "loss": 2.697904109954834, "step": 9461, "token_acc": 0.3418006114320227 }, { "epoch": 5.546467311638816, "grad_norm": 0.2989303338782182, "learning_rate": 0.00017305166211431508, "loss": 2.689767360687256, "step": 9462, "token_acc": 0.3434398505195293 }, { "epoch": 5.547053649956025, "grad_norm": 0.25128811966664044, "learning_rate": 0.00017304504309517593, "loss": 2.6647088527679443, "step": 9463, "token_acc": 0.348135078940041 }, { "epoch": 5.547639988273234, "grad_norm": 0.3064542146997696, "learning_rate": 0.00017303842338987706, "loss": 2.6758477687835693, "step": 9464, "token_acc": 0.34454232773689114 }, { "epoch": 5.548226326590442, "grad_norm": 0.2546947441397591, "learning_rate": 0.0001730318029984806, "loss": 2.6483469009399414, "step": 9465, "token_acc": 0.3496044153812058 }, { "epoch": 5.5488126649076515, "grad_norm": 0.29504332566230695, "learning_rate": 0.00017302518192104877, "loss": 2.665048599243164, "step": 9466, "token_acc": 0.3482107701215981 }, { "epoch": 5.549399003224861, "grad_norm": 0.256925386255931, "learning_rate": 0.00017301856015764378, "loss": 2.6136746406555176, "step": 9467, "token_acc": 0.3534764349043397 }, { "epoch": 5.54998534154207, "grad_norm": 0.2963430006465843, "learning_rate": 0.00017301193770832778, "loss": 2.71746563911438, "step": 9468, "token_acc": 0.33966461106819645 }, { "epoch": 5.550571679859279, "grad_norm": 0.25643853564385827, "learning_rate": 0.000173005314573163, "loss": 2.6924004554748535, "step": 9469, "token_acc": 0.34381214123466364 }, { "epoch": 5.551158018176488, "grad_norm": 0.2866427943281811, "learning_rate": 0.0001729986907522117, "loss": 2.665832996368408, "step": 9470, "token_acc": 0.34674378997432154 }, { "epoch": 5.551744356493697, "grad_norm": 0.2522961575135783, "learning_rate": 0.00017299206624553606, "loss": 2.6874403953552246, "step": 9471, "token_acc": 0.34224498332933173 }, { "epoch": 5.552330694810906, "grad_norm": 0.2701326536425539, "learning_rate": 0.00017298544105319832, "loss": 2.682722806930542, "step": 9472, "token_acc": 0.3458279798149079 }, { "epoch": 5.552917033128115, "grad_norm": 0.29517384853147194, "learning_rate": 0.00017297881517526066, "loss": 2.6634249687194824, "step": 9473, "token_acc": 0.3478224212883678 }, { "epoch": 5.5535033714453235, "grad_norm": 0.2893749885504846, "learning_rate": 0.00017297218861178545, "loss": 2.666928291320801, "step": 9474, "token_acc": 0.34810414181628535 }, { "epoch": 5.554089709762533, "grad_norm": 0.2927214316628923, "learning_rate": 0.0001729655613628348, "loss": 2.6892919540405273, "step": 9475, "token_acc": 0.3437058444871216 }, { "epoch": 5.554676048079742, "grad_norm": 0.28272498642890176, "learning_rate": 0.00017295893342847104, "loss": 2.6703104972839355, "step": 9476, "token_acc": 0.34774146610871554 }, { "epoch": 5.555262386396951, "grad_norm": 0.2874734528475493, "learning_rate": 0.00017295230480875642, "loss": 2.698310136795044, "step": 9477, "token_acc": 0.34064780552647056 }, { "epoch": 5.55584872471416, "grad_norm": 0.2736366698793052, "learning_rate": 0.0001729456755037532, "loss": 2.6916580200195312, "step": 9478, "token_acc": 0.34195693542381994 }, { "epoch": 5.556435063031369, "grad_norm": 0.3152978451387112, "learning_rate": 0.00017293904551352366, "loss": 2.65950870513916, "step": 9479, "token_acc": 0.34799432140532144 }, { "epoch": 5.557021401348578, "grad_norm": 0.3052567184440223, "learning_rate": 0.00017293241483813006, "loss": 2.6542787551879883, "step": 9480, "token_acc": 0.3490028223271492 }, { "epoch": 5.557607739665787, "grad_norm": 0.2816850458264859, "learning_rate": 0.0001729257834776347, "loss": 2.6838974952697754, "step": 9481, "token_acc": 0.3435410684719049 }, { "epoch": 5.558194077982996, "grad_norm": 0.30439044435788276, "learning_rate": 0.00017291915143209988, "loss": 2.681971788406372, "step": 9482, "token_acc": 0.34243155837733924 }, { "epoch": 5.5587804163002055, "grad_norm": 0.27808374529967367, "learning_rate": 0.00017291251870158792, "loss": 2.695809841156006, "step": 9483, "token_acc": 0.3422477135165316 }, { "epoch": 5.559366754617415, "grad_norm": 0.31562240032589156, "learning_rate": 0.00017290588528616105, "loss": 2.686605453491211, "step": 9484, "token_acc": 0.34430643485201473 }, { "epoch": 5.559953092934624, "grad_norm": 0.28587776119346625, "learning_rate": 0.00017289925118588165, "loss": 2.684319019317627, "step": 9485, "token_acc": 0.3443590536610468 }, { "epoch": 5.560539431251832, "grad_norm": 0.3000043704006392, "learning_rate": 0.000172892616400812, "loss": 2.6316676139831543, "step": 9486, "token_acc": 0.35381603846581977 }, { "epoch": 5.561125769569041, "grad_norm": 0.27938466482531554, "learning_rate": 0.00017288598093101446, "loss": 2.6674113273620605, "step": 9487, "token_acc": 0.34602405284611176 }, { "epoch": 5.56171210788625, "grad_norm": 0.28520319300142155, "learning_rate": 0.00017287934477655135, "loss": 2.689854621887207, "step": 9488, "token_acc": 0.3427489862222329 }, { "epoch": 5.562298446203459, "grad_norm": 0.2856135209529232, "learning_rate": 0.000172872707937485, "loss": 2.6866321563720703, "step": 9489, "token_acc": 0.34463522945645353 }, { "epoch": 5.562884784520668, "grad_norm": 0.27251541740450586, "learning_rate": 0.00017286607041387778, "loss": 2.6591038703918457, "step": 9490, "token_acc": 0.34761383855024713 }, { "epoch": 5.5634711228378775, "grad_norm": 0.2911640657192143, "learning_rate": 0.00017285943220579197, "loss": 2.6778178215026855, "step": 9491, "token_acc": 0.34360578115127133 }, { "epoch": 5.564057461155087, "grad_norm": 0.2924233365135463, "learning_rate": 0.00017285279331329, "loss": 2.6685585975646973, "step": 9492, "token_acc": 0.3461153794864956 }, { "epoch": 5.564643799472296, "grad_norm": 0.2883716303319102, "learning_rate": 0.0001728461537364342, "loss": 2.697382688522339, "step": 9493, "token_acc": 0.34341927768728997 }, { "epoch": 5.565230137789505, "grad_norm": 0.3257203366526236, "learning_rate": 0.00017283951347528694, "loss": 2.674328327178955, "step": 9494, "token_acc": 0.34434078489043124 }, { "epoch": 5.565816476106714, "grad_norm": 0.25778236660646237, "learning_rate": 0.00017283287252991062, "loss": 2.68906307220459, "step": 9495, "token_acc": 0.34408718699111457 }, { "epoch": 5.566402814423922, "grad_norm": 0.29116442122375863, "learning_rate": 0.0001728262309003676, "loss": 2.6713616847991943, "step": 9496, "token_acc": 0.3460609594712976 }, { "epoch": 5.566989152741131, "grad_norm": 0.26458123442862924, "learning_rate": 0.00017281958858672027, "loss": 2.676656723022461, "step": 9497, "token_acc": 0.34633244894995974 }, { "epoch": 5.56757549105834, "grad_norm": 0.2831581877661786, "learning_rate": 0.000172812945589031, "loss": 2.6890816688537598, "step": 9498, "token_acc": 0.3423424142740038 }, { "epoch": 5.5681618293755495, "grad_norm": 0.293515002596814, "learning_rate": 0.0001728063019073623, "loss": 2.668801784515381, "step": 9499, "token_acc": 0.3469486218754812 }, { "epoch": 5.568748167692759, "grad_norm": 0.2607978546093702, "learning_rate": 0.00017279965754177644, "loss": 2.6777501106262207, "step": 9500, "token_acc": 0.34447816734726394 }, { "epoch": 5.569334506009968, "grad_norm": 0.288181359846888, "learning_rate": 0.0001727930124923359, "loss": 2.6453592777252197, "step": 9501, "token_acc": 0.35060994708072435 }, { "epoch": 5.569920844327177, "grad_norm": 0.26250521713133224, "learning_rate": 0.00017278636675910312, "loss": 2.6453347206115723, "step": 9502, "token_acc": 0.3506943804061157 }, { "epoch": 5.570507182644386, "grad_norm": 0.2565523396765342, "learning_rate": 0.0001727797203421405, "loss": 2.6938037872314453, "step": 9503, "token_acc": 0.3417875008878652 }, { "epoch": 5.571093520961595, "grad_norm": 0.27540527025358624, "learning_rate": 0.0001727730732415105, "loss": 2.683742046356201, "step": 9504, "token_acc": 0.34517230336962584 }, { "epoch": 5.571679859278804, "grad_norm": 0.26661861473737386, "learning_rate": 0.00017276642545727548, "loss": 2.6859798431396484, "step": 9505, "token_acc": 0.34400222800044766 }, { "epoch": 5.572266197596013, "grad_norm": 0.2615321347479978, "learning_rate": 0.000172759776989498, "loss": 2.6967639923095703, "step": 9506, "token_acc": 0.3403088967675528 }, { "epoch": 5.572852535913222, "grad_norm": 0.2695278979954013, "learning_rate": 0.00017275312783824042, "loss": 2.6598520278930664, "step": 9507, "token_acc": 0.34723685348885736 }, { "epoch": 5.573438874230431, "grad_norm": 0.2592194044143957, "learning_rate": 0.00017274647800356527, "loss": 2.6663970947265625, "step": 9508, "token_acc": 0.3466866813802804 }, { "epoch": 5.57402521254764, "grad_norm": 0.28867783525241614, "learning_rate": 0.000172739827485535, "loss": 2.6888844966888428, "step": 9509, "token_acc": 0.34336087483579075 }, { "epoch": 5.574611550864849, "grad_norm": 0.27420306398915206, "learning_rate": 0.00017273317628421203, "loss": 2.656083106994629, "step": 9510, "token_acc": 0.3491038651447048 }, { "epoch": 5.575197889182058, "grad_norm": 0.2745292375796734, "learning_rate": 0.0001727265243996589, "loss": 2.6714773178100586, "step": 9511, "token_acc": 0.3463119540964415 }, { "epoch": 5.575784227499267, "grad_norm": 0.2918980168788795, "learning_rate": 0.00017271987183193807, "loss": 2.6668505668640137, "step": 9512, "token_acc": 0.3480056643851782 }, { "epoch": 5.576370565816476, "grad_norm": 0.30481901122545135, "learning_rate": 0.00017271321858111202, "loss": 2.6504790782928467, "step": 9513, "token_acc": 0.3496241589571068 }, { "epoch": 5.576956904133685, "grad_norm": 0.2692296966426547, "learning_rate": 0.0001727065646472433, "loss": 2.682041645050049, "step": 9514, "token_acc": 0.34455677801772205 }, { "epoch": 5.577543242450894, "grad_norm": 0.28949548646926637, "learning_rate": 0.00017269991003039436, "loss": 2.6522960662841797, "step": 9515, "token_acc": 0.34984124949597717 }, { "epoch": 5.5781295807681035, "grad_norm": 0.3150092720675681, "learning_rate": 0.00017269325473062773, "loss": 2.689664125442505, "step": 9516, "token_acc": 0.3421989177125709 }, { "epoch": 5.578715919085313, "grad_norm": 0.2962501861272474, "learning_rate": 0.00017268659874800592, "loss": 2.6928863525390625, "step": 9517, "token_acc": 0.3433965796194526 }, { "epoch": 5.579302257402521, "grad_norm": 0.29964019541642506, "learning_rate": 0.00017267994208259143, "loss": 2.6890602111816406, "step": 9518, "token_acc": 0.3422720331872358 }, { "epoch": 5.57988859571973, "grad_norm": 0.3385485587414542, "learning_rate": 0.00017267328473444688, "loss": 2.7093987464904785, "step": 9519, "token_acc": 0.34061969784851226 }, { "epoch": 5.580474934036939, "grad_norm": 0.26641709907040595, "learning_rate": 0.0001726666267036347, "loss": 2.69830322265625, "step": 9520, "token_acc": 0.3423059869303126 }, { "epoch": 5.581061272354148, "grad_norm": 0.27748577719155926, "learning_rate": 0.00017265996799021752, "loss": 2.6623306274414062, "step": 9521, "token_acc": 0.34782296383400996 }, { "epoch": 5.581647610671357, "grad_norm": 0.2739024857547672, "learning_rate": 0.00017265330859425783, "loss": 2.6953396797180176, "step": 9522, "token_acc": 0.34351493483775036 }, { "epoch": 5.582233948988566, "grad_norm": 0.31848067354307735, "learning_rate": 0.0001726466485158182, "loss": 2.6923258304595947, "step": 9523, "token_acc": 0.34257672200361045 }, { "epoch": 5.5828202873057755, "grad_norm": 0.34115839876893417, "learning_rate": 0.00017263998775496125, "loss": 2.674044609069824, "step": 9524, "token_acc": 0.3451939927682513 }, { "epoch": 5.583406625622985, "grad_norm": 0.25283423836727303, "learning_rate": 0.00017263332631174946, "loss": 2.7193233966827393, "step": 9525, "token_acc": 0.33926552006903166 }, { "epoch": 5.583992963940194, "grad_norm": 0.3231957621995199, "learning_rate": 0.00017262666418624544, "loss": 2.6371898651123047, "step": 9526, "token_acc": 0.3502690381305339 }, { "epoch": 5.584579302257403, "grad_norm": 0.27162916626609257, "learning_rate": 0.0001726200013785118, "loss": 2.6481292247772217, "step": 9527, "token_acc": 0.3483629885063339 }, { "epoch": 5.585165640574612, "grad_norm": 0.3040474833045279, "learning_rate": 0.00017261333788861106, "loss": 2.6644997596740723, "step": 9528, "token_acc": 0.3463392718741215 }, { "epoch": 5.585751978891821, "grad_norm": 0.31590883761611954, "learning_rate": 0.0001726066737166059, "loss": 2.6718826293945312, "step": 9529, "token_acc": 0.3455741056040383 }, { "epoch": 5.586338317209029, "grad_norm": 0.26545033238765453, "learning_rate": 0.00017260000886255887, "loss": 2.6872425079345703, "step": 9530, "token_acc": 0.3450030769070982 }, { "epoch": 5.586924655526238, "grad_norm": 0.28795215125051715, "learning_rate": 0.0001725933433265326, "loss": 2.645616054534912, "step": 9531, "token_acc": 0.3495080317160347 }, { "epoch": 5.5875109938434475, "grad_norm": 0.25399968073061185, "learning_rate": 0.00017258667710858965, "loss": 2.6257543563842773, "step": 9532, "token_acc": 0.353845174595496 }, { "epoch": 5.588097332160657, "grad_norm": 0.3300004459222821, "learning_rate": 0.0001725800102087927, "loss": 2.729137659072876, "step": 9533, "token_acc": 0.3357266200127238 }, { "epoch": 5.588683670477866, "grad_norm": 0.3026773629599954, "learning_rate": 0.00017257334262720436, "loss": 2.6408743858337402, "step": 9534, "token_acc": 0.3509868395681918 }, { "epoch": 5.589270008795075, "grad_norm": 0.29936761228932973, "learning_rate": 0.00017256667436388722, "loss": 2.680730104446411, "step": 9535, "token_acc": 0.3439705690135936 }, { "epoch": 5.589856347112284, "grad_norm": 0.30545073651369886, "learning_rate": 0.00017256000541890403, "loss": 2.688145399093628, "step": 9536, "token_acc": 0.3436648889349143 }, { "epoch": 5.590442685429493, "grad_norm": 0.28566245336509705, "learning_rate": 0.00017255333579231733, "loss": 2.6684179306030273, "step": 9537, "token_acc": 0.3462927970189093 }, { "epoch": 5.591029023746702, "grad_norm": 0.3153970582488031, "learning_rate": 0.00017254666548418982, "loss": 2.6944031715393066, "step": 9538, "token_acc": 0.3428997630973876 }, { "epoch": 5.59161536206391, "grad_norm": 0.27455213831192754, "learning_rate": 0.0001725399944945841, "loss": 2.686351776123047, "step": 9539, "token_acc": 0.345387926656823 }, { "epoch": 5.5922017003811195, "grad_norm": 0.31689319639101826, "learning_rate": 0.0001725333228235629, "loss": 2.670151710510254, "step": 9540, "token_acc": 0.3468669018045342 }, { "epoch": 5.592788038698329, "grad_norm": 0.2651920529823943, "learning_rate": 0.00017252665047118892, "loss": 2.702652931213379, "step": 9541, "token_acc": 0.34046762317626456 }, { "epoch": 5.593374377015538, "grad_norm": 0.3082296243265281, "learning_rate": 0.00017251997743752474, "loss": 2.6784915924072266, "step": 9542, "token_acc": 0.34402279156063387 }, { "epoch": 5.593960715332747, "grad_norm": 0.26460348720071364, "learning_rate": 0.00017251330372263312, "loss": 2.665478229522705, "step": 9543, "token_acc": 0.3483158284425782 }, { "epoch": 5.594547053649956, "grad_norm": 0.2979342262535822, "learning_rate": 0.00017250662932657672, "loss": 2.69720458984375, "step": 9544, "token_acc": 0.34080049971412524 }, { "epoch": 5.595133391967165, "grad_norm": 0.26510563711093627, "learning_rate": 0.00017249995424941823, "loss": 2.6349191665649414, "step": 9545, "token_acc": 0.35066568989680214 }, { "epoch": 5.595719730284374, "grad_norm": 0.2970555271125922, "learning_rate": 0.00017249327849122036, "loss": 2.66591215133667, "step": 9546, "token_acc": 0.3466287077822815 }, { "epoch": 5.596306068601583, "grad_norm": 0.3138910758786903, "learning_rate": 0.00017248660205204585, "loss": 2.671900749206543, "step": 9547, "token_acc": 0.34628694164909907 }, { "epoch": 5.596892406918792, "grad_norm": 0.24128026291554897, "learning_rate": 0.0001724799249319574, "loss": 2.6586809158325195, "step": 9548, "token_acc": 0.3483696264267911 }, { "epoch": 5.5974787452360015, "grad_norm": 0.29391646535329724, "learning_rate": 0.00017247324713101766, "loss": 2.6854958534240723, "step": 9549, "token_acc": 0.3436215880275134 }, { "epoch": 5.598065083553211, "grad_norm": 0.24681472867351084, "learning_rate": 0.00017246656864928947, "loss": 2.678588390350342, "step": 9550, "token_acc": 0.3446805159703911 }, { "epoch": 5.598651421870419, "grad_norm": 0.28132837032821467, "learning_rate": 0.00017245988948683553, "loss": 2.668708324432373, "step": 9551, "token_acc": 0.3481025574925617 }, { "epoch": 5.599237760187628, "grad_norm": 0.272817763183024, "learning_rate": 0.00017245320964371855, "loss": 2.706871509552002, "step": 9552, "token_acc": 0.34029273212360717 }, { "epoch": 5.599824098504837, "grad_norm": 0.2625142910426835, "learning_rate": 0.0001724465291200013, "loss": 2.6884288787841797, "step": 9553, "token_acc": 0.34264260343362046 }, { "epoch": 5.600410436822046, "grad_norm": 0.2537382693024419, "learning_rate": 0.00017243984791574652, "loss": 2.6718735694885254, "step": 9554, "token_acc": 0.34656953389019957 }, { "epoch": 5.600996775139255, "grad_norm": 0.2642069352103558, "learning_rate": 0.00017243316603101697, "loss": 2.7170276641845703, "step": 9555, "token_acc": 0.3410430213847748 }, { "epoch": 5.601583113456464, "grad_norm": 0.24868884253586906, "learning_rate": 0.00017242648346587545, "loss": 2.703139305114746, "step": 9556, "token_acc": 0.3410795339858084 }, { "epoch": 5.6021694517736735, "grad_norm": 0.27178682092858986, "learning_rate": 0.00017241980022038474, "loss": 2.6944518089294434, "step": 9557, "token_acc": 0.34216920066626305 }, { "epoch": 5.602755790090883, "grad_norm": 0.27424919814975207, "learning_rate": 0.00017241311629460757, "loss": 2.6754965782165527, "step": 9558, "token_acc": 0.3455730184137227 }, { "epoch": 5.603342128408092, "grad_norm": 0.2685413706781158, "learning_rate": 0.00017240643168860674, "loss": 2.713334798812866, "step": 9559, "token_acc": 0.3409522537562604 }, { "epoch": 5.603928466725301, "grad_norm": 0.25971886386869275, "learning_rate": 0.00017239974640244506, "loss": 2.654721736907959, "step": 9560, "token_acc": 0.34911859544904333 }, { "epoch": 5.604514805042509, "grad_norm": 0.27804199803844065, "learning_rate": 0.00017239306043618534, "loss": 2.6440765857696533, "step": 9561, "token_acc": 0.3507363307768283 }, { "epoch": 5.605101143359718, "grad_norm": 0.2846340218616572, "learning_rate": 0.0001723863737898903, "loss": 2.6840686798095703, "step": 9562, "token_acc": 0.34385098189511926 }, { "epoch": 5.605687481676927, "grad_norm": 0.2585214699981028, "learning_rate": 0.0001723796864636229, "loss": 2.7015018463134766, "step": 9563, "token_acc": 0.34110689844290243 }, { "epoch": 5.606273819994136, "grad_norm": 0.2839966679831815, "learning_rate": 0.00017237299845744585, "loss": 2.708827257156372, "step": 9564, "token_acc": 0.3394794960404281 }, { "epoch": 5.6068601583113455, "grad_norm": 0.27633458431248276, "learning_rate": 0.000172366309771422, "loss": 2.705092668533325, "step": 9565, "token_acc": 0.34158589248930854 }, { "epoch": 5.607446496628555, "grad_norm": 0.26731375037233446, "learning_rate": 0.0001723596204056142, "loss": 2.703127384185791, "step": 9566, "token_acc": 0.3413327552297048 }, { "epoch": 5.608032834945764, "grad_norm": 0.2628737009413341, "learning_rate": 0.00017235293036008524, "loss": 2.7016396522521973, "step": 9567, "token_acc": 0.34079449921887656 }, { "epoch": 5.608619173262973, "grad_norm": 0.271931918458244, "learning_rate": 0.00017234623963489803, "loss": 2.6606388092041016, "step": 9568, "token_acc": 0.3474572043033341 }, { "epoch": 5.609205511580182, "grad_norm": 0.2742698171333254, "learning_rate": 0.0001723395482301154, "loss": 2.711544990539551, "step": 9569, "token_acc": 0.3401948679407344 }, { "epoch": 5.609791849897391, "grad_norm": 0.3018361748449417, "learning_rate": 0.00017233285614580014, "loss": 2.6753997802734375, "step": 9570, "token_acc": 0.3458021778493862 }, { "epoch": 5.6103781882146, "grad_norm": 0.29775006466249143, "learning_rate": 0.00017232616338201518, "loss": 2.672884702682495, "step": 9571, "token_acc": 0.34581213096024704 }, { "epoch": 5.610964526531809, "grad_norm": 0.2674109045338595, "learning_rate": 0.00017231946993882338, "loss": 2.681978464126587, "step": 9572, "token_acc": 0.3447729336243011 }, { "epoch": 5.6115508648490176, "grad_norm": 0.2657367020654729, "learning_rate": 0.00017231277581628764, "loss": 2.6371989250183105, "step": 9573, "token_acc": 0.3520868223596587 }, { "epoch": 5.612137203166227, "grad_norm": 0.29888779535527715, "learning_rate": 0.00017230608101447077, "loss": 2.6924996376037598, "step": 9574, "token_acc": 0.34226447030217916 }, { "epoch": 5.612723541483436, "grad_norm": 0.26593816707227347, "learning_rate": 0.00017229938553343574, "loss": 2.7103960514068604, "step": 9575, "token_acc": 0.34020578019529457 }, { "epoch": 5.613309879800645, "grad_norm": 0.28000804429137094, "learning_rate": 0.00017229268937324541, "loss": 2.6699142456054688, "step": 9576, "token_acc": 0.34684881022913233 }, { "epoch": 5.613896218117854, "grad_norm": 0.3280636946451515, "learning_rate": 0.00017228599253396265, "loss": 2.684858798980713, "step": 9577, "token_acc": 0.3441450143963026 }, { "epoch": 5.614482556435063, "grad_norm": 0.29497026079725164, "learning_rate": 0.0001722792950156504, "loss": 2.7349681854248047, "step": 9578, "token_acc": 0.3373895929970979 }, { "epoch": 5.615068894752272, "grad_norm": 0.27888967947162163, "learning_rate": 0.00017227259681837159, "loss": 2.6370911598205566, "step": 9579, "token_acc": 0.3508704183839928 }, { "epoch": 5.615655233069481, "grad_norm": 0.4086271425555681, "learning_rate": 0.00017226589794218911, "loss": 2.653137683868408, "step": 9580, "token_acc": 0.34714875678837415 }, { "epoch": 5.6162415713866904, "grad_norm": 0.35028685706137985, "learning_rate": 0.0001722591983871659, "loss": 2.6893744468688965, "step": 9581, "token_acc": 0.3429903224312769 }, { "epoch": 5.616827909703899, "grad_norm": 0.27977161103939074, "learning_rate": 0.00017225249815336488, "loss": 2.7082693576812744, "step": 9582, "token_acc": 0.3391929049796378 }, { "epoch": 5.617414248021108, "grad_norm": 0.3407775971940978, "learning_rate": 0.000172245797240849, "loss": 2.7055768966674805, "step": 9583, "token_acc": 0.34166554092670914 }, { "epoch": 5.618000586338317, "grad_norm": 0.2600046419542586, "learning_rate": 0.00017223909564968124, "loss": 2.654989242553711, "step": 9584, "token_acc": 0.35002241458442296 }, { "epoch": 5.618586924655526, "grad_norm": 0.3054417166482799, "learning_rate": 0.0001722323933799245, "loss": 2.710479259490967, "step": 9585, "token_acc": 0.33896057703875343 }, { "epoch": 5.619173262972735, "grad_norm": 0.25609859261555895, "learning_rate": 0.00017222569043164176, "loss": 2.6899924278259277, "step": 9586, "token_acc": 0.342729484898677 }, { "epoch": 5.619759601289944, "grad_norm": 0.30319158262514767, "learning_rate": 0.00017221898680489596, "loss": 2.671921968460083, "step": 9587, "token_acc": 0.348451389778404 }, { "epoch": 5.620345939607153, "grad_norm": 0.27243401289824437, "learning_rate": 0.00017221228249975014, "loss": 2.683389902114868, "step": 9588, "token_acc": 0.34456922859978345 }, { "epoch": 5.6209322779243625, "grad_norm": 0.30901186004536363, "learning_rate": 0.0001722055775162672, "loss": 2.6792807579040527, "step": 9589, "token_acc": 0.3444609148509137 }, { "epoch": 5.621518616241572, "grad_norm": 0.26434804750524393, "learning_rate": 0.00017219887185451017, "loss": 2.6610584259033203, "step": 9590, "token_acc": 0.3453238309890159 }, { "epoch": 5.622104954558781, "grad_norm": 0.3343600033096775, "learning_rate": 0.00017219216551454203, "loss": 2.6997694969177246, "step": 9591, "token_acc": 0.341328741910586 }, { "epoch": 5.62269129287599, "grad_norm": 0.29944898718648144, "learning_rate": 0.00017218545849642577, "loss": 2.6932477951049805, "step": 9592, "token_acc": 0.3417195902788799 }, { "epoch": 5.623277631193199, "grad_norm": 0.29199629429564067, "learning_rate": 0.00017217875080022442, "loss": 2.698925256729126, "step": 9593, "token_acc": 0.34316761928397954 }, { "epoch": 5.623863969510407, "grad_norm": 0.27236689540699016, "learning_rate": 0.00017217204242600092, "loss": 2.6657328605651855, "step": 9594, "token_acc": 0.3469391548652474 }, { "epoch": 5.624450307827616, "grad_norm": 0.30446357084770387, "learning_rate": 0.00017216533337381838, "loss": 2.675774574279785, "step": 9595, "token_acc": 0.3456307751726765 }, { "epoch": 5.625036646144825, "grad_norm": 0.2667273900288917, "learning_rate": 0.00017215862364373976, "loss": 2.720653533935547, "step": 9596, "token_acc": 0.3394340658158104 }, { "epoch": 5.6256229844620345, "grad_norm": 0.3135891993616265, "learning_rate": 0.0001721519132358281, "loss": 2.73087477684021, "step": 9597, "token_acc": 0.3373571856917002 }, { "epoch": 5.626209322779244, "grad_norm": 0.2750544075417309, "learning_rate": 0.00017214520215014643, "loss": 2.6480722427368164, "step": 9598, "token_acc": 0.3507457487231431 }, { "epoch": 5.626795661096453, "grad_norm": 0.2902383142752462, "learning_rate": 0.00017213849038675782, "loss": 2.6900877952575684, "step": 9599, "token_acc": 0.34327257401749023 }, { "epoch": 5.627381999413662, "grad_norm": 0.29442137581977007, "learning_rate": 0.00017213177794572532, "loss": 2.6956427097320557, "step": 9600, "token_acc": 0.3416669934256101 }, { "epoch": 5.627968337730871, "grad_norm": 0.275834745398906, "learning_rate": 0.00017212506482711194, "loss": 2.662900447845459, "step": 9601, "token_acc": 0.34729395425545767 }, { "epoch": 5.62855467604808, "grad_norm": 0.2617486056192483, "learning_rate": 0.00017211835103098078, "loss": 2.7033324241638184, "step": 9602, "token_acc": 0.34047504537908996 }, { "epoch": 5.629141014365289, "grad_norm": 0.2527867342072555, "learning_rate": 0.0001721116365573949, "loss": 2.665907859802246, "step": 9603, "token_acc": 0.34680471605994334 }, { "epoch": 5.629727352682497, "grad_norm": 0.2615780198377606, "learning_rate": 0.00017210492140641733, "loss": 2.701897144317627, "step": 9604, "token_acc": 0.3413744386179302 }, { "epoch": 5.6303136909997065, "grad_norm": 0.2586832242263612, "learning_rate": 0.0001720982055781112, "loss": 2.6905431747436523, "step": 9605, "token_acc": 0.3426048157144366 }, { "epoch": 5.630900029316916, "grad_norm": 0.258771689622604, "learning_rate": 0.00017209148907253958, "loss": 2.6716136932373047, "step": 9606, "token_acc": 0.34494342843462783 }, { "epoch": 5.631486367634125, "grad_norm": 0.2672388769846871, "learning_rate": 0.00017208477188976557, "loss": 2.6875696182250977, "step": 9607, "token_acc": 0.34482784092028285 }, { "epoch": 5.632072705951334, "grad_norm": 0.24877714299573475, "learning_rate": 0.00017207805402985228, "loss": 2.657158851623535, "step": 9608, "token_acc": 0.3474502657250928 }, { "epoch": 5.632659044268543, "grad_norm": 0.26040353690542056, "learning_rate": 0.00017207133549286278, "loss": 2.6807336807250977, "step": 9609, "token_acc": 0.3426333170807719 }, { "epoch": 5.633245382585752, "grad_norm": 0.2750706571856512, "learning_rate": 0.0001720646162788602, "loss": 2.6953413486480713, "step": 9610, "token_acc": 0.3441295224350982 }, { "epoch": 5.633831720902961, "grad_norm": 0.27217154412555883, "learning_rate": 0.00017205789638790768, "loss": 2.677666664123535, "step": 9611, "token_acc": 0.3437205925141484 }, { "epoch": 5.63441805922017, "grad_norm": 0.27063261896767504, "learning_rate": 0.0001720511758200683, "loss": 2.6994733810424805, "step": 9612, "token_acc": 0.34223900958007675 }, { "epoch": 5.635004397537379, "grad_norm": 0.2668747280725409, "learning_rate": 0.0001720444545754052, "loss": 2.700747013092041, "step": 9613, "token_acc": 0.341882276349987 }, { "epoch": 5.6355907358545885, "grad_norm": 0.3074591598718835, "learning_rate": 0.00017203773265398158, "loss": 2.731694221496582, "step": 9614, "token_acc": 0.33585005770072557 }, { "epoch": 5.636177074171798, "grad_norm": 0.2810924054234089, "learning_rate": 0.0001720310100558605, "loss": 2.720261573791504, "step": 9615, "token_acc": 0.3396750025550114 }, { "epoch": 5.636763412489006, "grad_norm": 0.2615150538293546, "learning_rate": 0.00017202428678110513, "loss": 2.728724479675293, "step": 9616, "token_acc": 0.33753262936971207 }, { "epoch": 5.637349750806215, "grad_norm": 0.32455518003444894, "learning_rate": 0.00017201756282977866, "loss": 2.685471534729004, "step": 9617, "token_acc": 0.3442908791848462 }, { "epoch": 5.637936089123424, "grad_norm": 0.28631815065560834, "learning_rate": 0.00017201083820194422, "loss": 2.686674118041992, "step": 9618, "token_acc": 0.34382407614929267 }, { "epoch": 5.638522427440633, "grad_norm": 0.2795183824407709, "learning_rate": 0.000172004112897665, "loss": 2.7081964015960693, "step": 9619, "token_acc": 0.34199075405575263 }, { "epoch": 5.639108765757842, "grad_norm": 0.35445840211209584, "learning_rate": 0.00017199738691700417, "loss": 2.6855039596557617, "step": 9620, "token_acc": 0.34333955859977305 }, { "epoch": 5.639695104075051, "grad_norm": 0.3257199841356022, "learning_rate": 0.00017199066026002492, "loss": 2.724978446960449, "step": 9621, "token_acc": 0.33752105450218656 }, { "epoch": 5.6402814423922605, "grad_norm": 0.26070852774632136, "learning_rate": 0.0001719839329267904, "loss": 2.683920383453369, "step": 9622, "token_acc": 0.3445674770274 }, { "epoch": 5.64086778070947, "grad_norm": 0.3233129408396068, "learning_rate": 0.00017197720491736384, "loss": 2.668210506439209, "step": 9623, "token_acc": 0.34627317362784016 }, { "epoch": 5.641454119026679, "grad_norm": 0.2746717369620902, "learning_rate": 0.00017197047623180843, "loss": 2.656402587890625, "step": 9624, "token_acc": 0.3484665826931706 }, { "epoch": 5.642040457343887, "grad_norm": 0.28546700046892254, "learning_rate": 0.00017196374687018738, "loss": 2.6803884506225586, "step": 9625, "token_acc": 0.3434194138531584 }, { "epoch": 5.642626795661096, "grad_norm": 0.29501524391772743, "learning_rate": 0.0001719570168325639, "loss": 2.6659979820251465, "step": 9626, "token_acc": 0.3477107296734918 }, { "epoch": 5.643213133978305, "grad_norm": 0.258006246091174, "learning_rate": 0.00017195028611900122, "loss": 2.708184242248535, "step": 9627, "token_acc": 0.33988353454610115 }, { "epoch": 5.643799472295514, "grad_norm": 0.2789750490267383, "learning_rate": 0.00017194355472956253, "loss": 2.6745424270629883, "step": 9628, "token_acc": 0.34688089486926454 }, { "epoch": 5.644385810612723, "grad_norm": 0.26013898401880814, "learning_rate": 0.00017193682266431108, "loss": 2.672896146774292, "step": 9629, "token_acc": 0.3469715571925517 }, { "epoch": 5.6449721489299325, "grad_norm": 0.30409176665869486, "learning_rate": 0.00017193008992331013, "loss": 2.6519622802734375, "step": 9630, "token_acc": 0.3495431657260984 }, { "epoch": 5.645558487247142, "grad_norm": 0.2748934565335675, "learning_rate": 0.00017192335650662296, "loss": 2.709531307220459, "step": 9631, "token_acc": 0.33962980185900826 }, { "epoch": 5.646144825564351, "grad_norm": 0.28745747670992294, "learning_rate": 0.00017191662241431273, "loss": 2.6950721740722656, "step": 9632, "token_acc": 0.3426986680393797 }, { "epoch": 5.64673116388156, "grad_norm": 0.29364886080963093, "learning_rate": 0.00017190988764644271, "loss": 2.657607078552246, "step": 9633, "token_acc": 0.3474349142171143 }, { "epoch": 5.647317502198769, "grad_norm": 0.28820697744904045, "learning_rate": 0.00017190315220307626, "loss": 2.705496311187744, "step": 9634, "token_acc": 0.34098763392583553 }, { "epoch": 5.647903840515978, "grad_norm": 0.2809696773094621, "learning_rate": 0.00017189641608427657, "loss": 2.6582584381103516, "step": 9635, "token_acc": 0.3495842122235791 }, { "epoch": 5.648490178833187, "grad_norm": 0.2943127476028719, "learning_rate": 0.00017188967929010688, "loss": 2.687264919281006, "step": 9636, "token_acc": 0.3433402161036356 }, { "epoch": 5.649076517150396, "grad_norm": 0.2732125172876398, "learning_rate": 0.0001718829418206306, "loss": 2.672905445098877, "step": 9637, "token_acc": 0.3473263969736054 }, { "epoch": 5.6496628554676045, "grad_norm": 0.33251542886324703, "learning_rate": 0.0001718762036759109, "loss": 2.7080190181732178, "step": 9638, "token_acc": 0.3407064001350952 }, { "epoch": 5.650249193784814, "grad_norm": 0.29275527323922623, "learning_rate": 0.0001718694648560111, "loss": 2.6641740798950195, "step": 9639, "token_acc": 0.3485184433442355 }, { "epoch": 5.650835532102023, "grad_norm": 0.2720275855721091, "learning_rate": 0.00017186272536099458, "loss": 2.677889823913574, "step": 9640, "token_acc": 0.3453799311622981 }, { "epoch": 5.651421870419232, "grad_norm": 0.28025747483633473, "learning_rate": 0.00017185598519092455, "loss": 2.695666790008545, "step": 9641, "token_acc": 0.34273902661722466 }, { "epoch": 5.652008208736441, "grad_norm": 0.2687714962643324, "learning_rate": 0.0001718492443458644, "loss": 2.664212703704834, "step": 9642, "token_acc": 0.34570813380120763 }, { "epoch": 5.65259454705365, "grad_norm": 0.28614792822812724, "learning_rate": 0.00017184250282587738, "loss": 2.708156108856201, "step": 9643, "token_acc": 0.33888891767131735 }, { "epoch": 5.653180885370859, "grad_norm": 0.28021189524005335, "learning_rate": 0.0001718357606310269, "loss": 2.6956028938293457, "step": 9644, "token_acc": 0.3421257377577404 }, { "epoch": 5.653767223688068, "grad_norm": 0.26468917323063723, "learning_rate": 0.00017182901776137622, "loss": 2.700101375579834, "step": 9645, "token_acc": 0.3430033549588279 }, { "epoch": 5.654353562005277, "grad_norm": 0.27955813134907753, "learning_rate": 0.00017182227421698868, "loss": 2.6796088218688965, "step": 9646, "token_acc": 0.34540723675294754 }, { "epoch": 5.654939900322486, "grad_norm": 0.27865219821203446, "learning_rate": 0.0001718155299979277, "loss": 2.6931333541870117, "step": 9647, "token_acc": 0.34297121344096415 }, { "epoch": 5.655526238639695, "grad_norm": 0.2548991478352482, "learning_rate": 0.00017180878510425658, "loss": 2.6627683639526367, "step": 9648, "token_acc": 0.34634670303471543 }, { "epoch": 5.656112576956904, "grad_norm": 0.28179119724857904, "learning_rate": 0.00017180203953603867, "loss": 2.6768953800201416, "step": 9649, "token_acc": 0.346237124050323 }, { "epoch": 5.656698915274113, "grad_norm": 0.28296715775712505, "learning_rate": 0.00017179529329333737, "loss": 2.6644973754882812, "step": 9650, "token_acc": 0.34607728142736305 }, { "epoch": 5.657285253591322, "grad_norm": 0.2536074287064516, "learning_rate": 0.00017178854637621603, "loss": 2.682227849960327, "step": 9651, "token_acc": 0.3434657927079066 }, { "epoch": 5.657871591908531, "grad_norm": 0.2743872489109879, "learning_rate": 0.00017178179878473806, "loss": 2.6914095878601074, "step": 9652, "token_acc": 0.3436564545097958 }, { "epoch": 5.65845793022574, "grad_norm": 0.2756830047285492, "learning_rate": 0.00017177505051896676, "loss": 2.653563976287842, "step": 9653, "token_acc": 0.34873480608529106 }, { "epoch": 5.659044268542949, "grad_norm": 0.26370904273242785, "learning_rate": 0.00017176830157896564, "loss": 2.6928234100341797, "step": 9654, "token_acc": 0.3407285117868705 }, { "epoch": 5.6596306068601585, "grad_norm": 0.2844213816246217, "learning_rate": 0.00017176155196479796, "loss": 2.6598589420318604, "step": 9655, "token_acc": 0.34840487318673136 }, { "epoch": 5.660216945177368, "grad_norm": 0.25334053750060304, "learning_rate": 0.00017175480167652725, "loss": 2.6874332427978516, "step": 9656, "token_acc": 0.34339116836696704 }, { "epoch": 5.660803283494577, "grad_norm": 0.2604208831574149, "learning_rate": 0.0001717480507142169, "loss": 2.7017476558685303, "step": 9657, "token_acc": 0.3409253267218557 }, { "epoch": 5.661389621811786, "grad_norm": 0.26336914932303757, "learning_rate": 0.00017174129907793025, "loss": 2.699188470840454, "step": 9658, "token_acc": 0.3424822419414038 }, { "epoch": 5.661975960128994, "grad_norm": 0.25451341195512084, "learning_rate": 0.0001717345467677308, "loss": 2.7013988494873047, "step": 9659, "token_acc": 0.34254907455596034 }, { "epoch": 5.662562298446203, "grad_norm": 0.2512169594366114, "learning_rate": 0.00017172779378368192, "loss": 2.6777291297912598, "step": 9660, "token_acc": 0.34638555013071604 }, { "epoch": 5.663148636763412, "grad_norm": 0.2678853539599262, "learning_rate": 0.00017172104012584707, "loss": 2.6957106590270996, "step": 9661, "token_acc": 0.3419926040723635 }, { "epoch": 5.663734975080621, "grad_norm": 0.24524049835171582, "learning_rate": 0.00017171428579428969, "loss": 2.6461286544799805, "step": 9662, "token_acc": 0.35020916211575825 }, { "epoch": 5.6643213133978305, "grad_norm": 0.313378698430541, "learning_rate": 0.00017170753078907326, "loss": 2.7188878059387207, "step": 9663, "token_acc": 0.3385147126021487 }, { "epoch": 5.66490765171504, "grad_norm": 0.37109935330587024, "learning_rate": 0.00017170077511026116, "loss": 2.728109359741211, "step": 9664, "token_acc": 0.33765468473777255 }, { "epoch": 5.665493990032249, "grad_norm": 0.3130998597121793, "learning_rate": 0.00017169401875791692, "loss": 2.698941230773926, "step": 9665, "token_acc": 0.34041382289861183 }, { "epoch": 5.666080328349458, "grad_norm": 0.26219306768097944, "learning_rate": 0.000171687261732104, "loss": 2.699347496032715, "step": 9666, "token_acc": 0.34244095114386447 }, { "epoch": 5.666666666666667, "grad_norm": 0.3112212318369894, "learning_rate": 0.00017168050403288583, "loss": 2.6614480018615723, "step": 9667, "token_acc": 0.34581196269187947 }, { "epoch": 5.667253004983876, "grad_norm": 0.26897563477935615, "learning_rate": 0.0001716737456603259, "loss": 2.654895305633545, "step": 9668, "token_acc": 0.3492307573427708 }, { "epoch": 5.667839343301084, "grad_norm": 0.2663202729538439, "learning_rate": 0.00017166698661448776, "loss": 2.6832804679870605, "step": 9669, "token_acc": 0.34296849714100763 }, { "epoch": 5.668425681618293, "grad_norm": 0.2740789366429989, "learning_rate": 0.0001716602268954348, "loss": 2.6734683513641357, "step": 9670, "token_acc": 0.3465400805150867 }, { "epoch": 5.6690120199355025, "grad_norm": 0.25878683053469054, "learning_rate": 0.00017165346650323065, "loss": 2.738891124725342, "step": 9671, "token_acc": 0.33742612697543817 }, { "epoch": 5.669598358252712, "grad_norm": 0.25625375861972927, "learning_rate": 0.00017164670543793866, "loss": 2.6904873847961426, "step": 9672, "token_acc": 0.34311797057574517 }, { "epoch": 5.670184696569921, "grad_norm": 0.27341181256757546, "learning_rate": 0.00017163994369962245, "loss": 2.7016983032226562, "step": 9673, "token_acc": 0.3420264647248498 }, { "epoch": 5.67077103488713, "grad_norm": 0.26570693964556785, "learning_rate": 0.0001716331812883455, "loss": 2.6520755290985107, "step": 9674, "token_acc": 0.3481397923256737 }, { "epoch": 5.671357373204339, "grad_norm": 0.26346167444512103, "learning_rate": 0.00017162641820417133, "loss": 2.6935617923736572, "step": 9675, "token_acc": 0.3432988108730943 }, { "epoch": 5.671943711521548, "grad_norm": 0.27757711180086914, "learning_rate": 0.0001716196544471635, "loss": 2.7076027393341064, "step": 9676, "token_acc": 0.3399895236432324 }, { "epoch": 5.672530049838757, "grad_norm": 0.25614865138210385, "learning_rate": 0.0001716128900173855, "loss": 2.6771514415740967, "step": 9677, "token_acc": 0.34508505144955154 }, { "epoch": 5.673116388155966, "grad_norm": 0.26100486460293554, "learning_rate": 0.00017160612491490092, "loss": 2.670776844024658, "step": 9678, "token_acc": 0.34649800731390085 }, { "epoch": 5.673702726473175, "grad_norm": 0.29769488433209373, "learning_rate": 0.00017159935913977327, "loss": 2.6809566020965576, "step": 9679, "token_acc": 0.34354621279111547 }, { "epoch": 5.6742890647903845, "grad_norm": 0.27357565229498243, "learning_rate": 0.00017159259269206614, "loss": 2.723518133163452, "step": 9680, "token_acc": 0.3390090773537029 }, { "epoch": 5.674875403107593, "grad_norm": 0.27292475923486215, "learning_rate": 0.00017158582557184307, "loss": 2.72519588470459, "step": 9681, "token_acc": 0.3355027972844923 }, { "epoch": 5.675461741424802, "grad_norm": 0.2992115192055746, "learning_rate": 0.00017157905777916762, "loss": 2.6943047046661377, "step": 9682, "token_acc": 0.3425933866370722 }, { "epoch": 5.676048079742011, "grad_norm": 0.29030520350441075, "learning_rate": 0.00017157228931410337, "loss": 2.7105729579925537, "step": 9683, "token_acc": 0.3399942208679206 }, { "epoch": 5.67663441805922, "grad_norm": 0.29053278857007825, "learning_rate": 0.0001715655201767139, "loss": 2.734339475631714, "step": 9684, "token_acc": 0.3366634537040489 }, { "epoch": 5.677220756376429, "grad_norm": 0.4211042765428095, "learning_rate": 0.00017155875036706285, "loss": 2.7287237644195557, "step": 9685, "token_acc": 0.33740072129021803 }, { "epoch": 5.677807094693638, "grad_norm": 0.33259880997898006, "learning_rate": 0.00017155197988521375, "loss": 2.7345170974731445, "step": 9686, "token_acc": 0.3353360258226222 }, { "epoch": 5.678393433010847, "grad_norm": 0.2961986663595038, "learning_rate": 0.0001715452087312302, "loss": 2.699075222015381, "step": 9687, "token_acc": 0.3412668754440906 }, { "epoch": 5.6789797713280565, "grad_norm": 0.3012429298451997, "learning_rate": 0.00017153843690517583, "loss": 2.6976025104522705, "step": 9688, "token_acc": 0.34229070415421065 }, { "epoch": 5.679566109645266, "grad_norm": 0.2793880479123014, "learning_rate": 0.00017153166440711423, "loss": 2.6744914054870605, "step": 9689, "token_acc": 0.3469209457544942 }, { "epoch": 5.680152447962474, "grad_norm": 0.2802968624648089, "learning_rate": 0.00017152489123710904, "loss": 2.6746459007263184, "step": 9690, "token_acc": 0.3463219663303863 }, { "epoch": 5.680738786279683, "grad_norm": 0.28662892548268065, "learning_rate": 0.00017151811739522387, "loss": 2.6628518104553223, "step": 9691, "token_acc": 0.3467135959379254 }, { "epoch": 5.681325124596892, "grad_norm": 0.27934699048564315, "learning_rate": 0.0001715113428815224, "loss": 2.712088108062744, "step": 9692, "token_acc": 0.3404275393573431 }, { "epoch": 5.681911462914101, "grad_norm": 0.2671031181331191, "learning_rate": 0.00017150456769606816, "loss": 2.7613983154296875, "step": 9693, "token_acc": 0.3318236219625351 }, { "epoch": 5.68249780123131, "grad_norm": 0.2657156241642082, "learning_rate": 0.00017149779183892492, "loss": 2.6949009895324707, "step": 9694, "token_acc": 0.3420149259675197 }, { "epoch": 5.683084139548519, "grad_norm": 0.27283830017725214, "learning_rate": 0.00017149101531015623, "loss": 2.6977996826171875, "step": 9695, "token_acc": 0.34328876314463863 }, { "epoch": 5.6836704778657285, "grad_norm": 0.27890756220096297, "learning_rate": 0.00017148423810982584, "loss": 2.7115440368652344, "step": 9696, "token_acc": 0.3387540434125231 }, { "epoch": 5.684256816182938, "grad_norm": 0.2960220158246693, "learning_rate": 0.0001714774602379973, "loss": 2.6821229457855225, "step": 9697, "token_acc": 0.3448735055867804 }, { "epoch": 5.684843154500147, "grad_norm": 0.2945279182128348, "learning_rate": 0.00017147068169473436, "loss": 2.696906328201294, "step": 9698, "token_acc": 0.341732442825339 }, { "epoch": 5.685429492817356, "grad_norm": 0.26122457169313246, "learning_rate": 0.00017146390248010067, "loss": 2.706333875656128, "step": 9699, "token_acc": 0.3401145108662193 }, { "epoch": 5.686015831134565, "grad_norm": 0.28358980148016516, "learning_rate": 0.00017145712259415994, "loss": 2.7144322395324707, "step": 9700, "token_acc": 0.3390273770729986 }, { "epoch": 5.686602169451774, "grad_norm": 0.27764826409851145, "learning_rate": 0.0001714503420369758, "loss": 2.6760425567626953, "step": 9701, "token_acc": 0.34607765451664024 }, { "epoch": 5.687188507768982, "grad_norm": 0.25429064158741754, "learning_rate": 0.00017144356080861202, "loss": 2.6748547554016113, "step": 9702, "token_acc": 0.3450052164840897 }, { "epoch": 5.687774846086191, "grad_norm": 0.2747682412834148, "learning_rate": 0.00017143677890913222, "loss": 2.711700439453125, "step": 9703, "token_acc": 0.3383512622187198 }, { "epoch": 5.6883611844034006, "grad_norm": 0.25800833645683646, "learning_rate": 0.00017142999633860013, "loss": 2.72167706489563, "step": 9704, "token_acc": 0.3387947603727071 }, { "epoch": 5.68894752272061, "grad_norm": 0.2680073352135934, "learning_rate": 0.0001714232130970795, "loss": 2.716604232788086, "step": 9705, "token_acc": 0.33833822923451146 }, { "epoch": 5.689533861037819, "grad_norm": 0.26353410286587386, "learning_rate": 0.00017141642918463404, "loss": 2.70736026763916, "step": 9706, "token_acc": 0.3398236522994486 }, { "epoch": 5.690120199355028, "grad_norm": 0.27581644818499984, "learning_rate": 0.00017140964460132745, "loss": 2.6875500679016113, "step": 9707, "token_acc": 0.3437701502240838 }, { "epoch": 5.690706537672237, "grad_norm": 0.25279342391570686, "learning_rate": 0.00017140285934722348, "loss": 2.7416634559631348, "step": 9708, "token_acc": 0.33541525475941103 }, { "epoch": 5.691292875989446, "grad_norm": 0.2567513742149731, "learning_rate": 0.00017139607342238583, "loss": 2.6870784759521484, "step": 9709, "token_acc": 0.34341012405472005 }, { "epoch": 5.691879214306655, "grad_norm": 0.2662546011804505, "learning_rate": 0.0001713892868268783, "loss": 2.698129177093506, "step": 9710, "token_acc": 0.33983693030223067 }, { "epoch": 5.692465552623864, "grad_norm": 0.2520978973721482, "learning_rate": 0.00017138249956076464, "loss": 2.720951557159424, "step": 9711, "token_acc": 0.33784967541778554 }, { "epoch": 5.693051890941073, "grad_norm": 0.25435649613726424, "learning_rate": 0.0001713757116241086, "loss": 2.6916401386260986, "step": 9712, "token_acc": 0.34335120521677337 }, { "epoch": 5.693638229258282, "grad_norm": 0.27114417000353586, "learning_rate": 0.0001713689230169739, "loss": 2.6933059692382812, "step": 9713, "token_acc": 0.3416384367953664 }, { "epoch": 5.694224567575491, "grad_norm": 0.26859230660058864, "learning_rate": 0.0001713621337394243, "loss": 2.6944518089294434, "step": 9714, "token_acc": 0.34360502885093047 }, { "epoch": 5.6948109058927, "grad_norm": 0.2745675136113956, "learning_rate": 0.00017135534379152367, "loss": 2.678412437438965, "step": 9715, "token_acc": 0.3448146412634852 }, { "epoch": 5.695397244209909, "grad_norm": 0.2977936494330076, "learning_rate": 0.00017134855317333574, "loss": 2.744518280029297, "step": 9716, "token_acc": 0.3354496491371167 }, { "epoch": 5.695983582527118, "grad_norm": 0.2862108879482743, "learning_rate": 0.00017134176188492425, "loss": 2.6785643100738525, "step": 9717, "token_acc": 0.3455358361598945 }, { "epoch": 5.696569920844327, "grad_norm": 0.28035757868718536, "learning_rate": 0.00017133496992635308, "loss": 2.7197721004486084, "step": 9718, "token_acc": 0.33951212645715595 }, { "epoch": 5.697156259161536, "grad_norm": 0.2587638836193722, "learning_rate": 0.00017132817729768597, "loss": 2.7041447162628174, "step": 9719, "token_acc": 0.34045063888621596 }, { "epoch": 5.6977425974787455, "grad_norm": 0.26726880007916765, "learning_rate": 0.00017132138399898676, "loss": 2.6794629096984863, "step": 9720, "token_acc": 0.3459427585292821 }, { "epoch": 5.698328935795955, "grad_norm": 0.25885507650634726, "learning_rate": 0.00017131459003031927, "loss": 2.676908016204834, "step": 9721, "token_acc": 0.345653594857722 }, { "epoch": 5.698915274113164, "grad_norm": 0.25569052988181756, "learning_rate": 0.00017130779539174728, "loss": 2.683712959289551, "step": 9722, "token_acc": 0.3451104756122889 }, { "epoch": 5.699501612430373, "grad_norm": 0.26017009619750575, "learning_rate": 0.00017130100008333466, "loss": 2.6609983444213867, "step": 9723, "token_acc": 0.3469309854698322 }, { "epoch": 5.700087950747581, "grad_norm": 0.2783159908329817, "learning_rate": 0.0001712942041051452, "loss": 2.670192241668701, "step": 9724, "token_acc": 0.34642788114030326 }, { "epoch": 5.70067428906479, "grad_norm": 0.28967877623041083, "learning_rate": 0.00017128740745724278, "loss": 2.7244067192077637, "step": 9725, "token_acc": 0.33750685862095997 }, { "epoch": 5.701260627381999, "grad_norm": 0.28872852257096737, "learning_rate": 0.00017128061013969124, "loss": 2.7011876106262207, "step": 9726, "token_acc": 0.3427534897712038 }, { "epoch": 5.701846965699208, "grad_norm": 0.26385900821646313, "learning_rate": 0.00017127381215255438, "loss": 2.7199716567993164, "step": 9727, "token_acc": 0.33854522119116254 }, { "epoch": 5.7024333040164175, "grad_norm": 0.26415495525422983, "learning_rate": 0.00017126701349589614, "loss": 2.6752676963806152, "step": 9728, "token_acc": 0.3442394700270629 }, { "epoch": 5.703019642333627, "grad_norm": 0.25377827077903775, "learning_rate": 0.00017126021416978034, "loss": 2.6789393424987793, "step": 9729, "token_acc": 0.3440548303614595 }, { "epoch": 5.703605980650836, "grad_norm": 0.2999311219193406, "learning_rate": 0.00017125341417427082, "loss": 2.721673011779785, "step": 9730, "token_acc": 0.33771657041314973 }, { "epoch": 5.704192318968045, "grad_norm": 0.26782542896093176, "learning_rate": 0.0001712466135094315, "loss": 2.7176051139831543, "step": 9731, "token_acc": 0.33998301874469333 }, { "epoch": 5.704778657285254, "grad_norm": 0.2688848674073152, "learning_rate": 0.00017123981217532626, "loss": 2.6988096237182617, "step": 9732, "token_acc": 0.34087736916369 }, { "epoch": 5.705364995602462, "grad_norm": 0.37106934319896084, "learning_rate": 0.00017123301017201898, "loss": 2.6864659786224365, "step": 9733, "token_acc": 0.3436038096890127 }, { "epoch": 5.705951333919671, "grad_norm": 0.3260039128527722, "learning_rate": 0.00017122620749957353, "loss": 2.680455207824707, "step": 9734, "token_acc": 0.34424102712537213 }, { "epoch": 5.70653767223688, "grad_norm": 0.27620516224952646, "learning_rate": 0.00017121940415805388, "loss": 2.650160312652588, "step": 9735, "token_acc": 0.35079507272810423 }, { "epoch": 5.7071240105540895, "grad_norm": 0.2949262597828128, "learning_rate": 0.00017121260014752387, "loss": 2.671657085418701, "step": 9736, "token_acc": 0.34582348305752564 }, { "epoch": 5.707710348871299, "grad_norm": 0.2990210719743637, "learning_rate": 0.00017120579546804745, "loss": 2.6990580558776855, "step": 9737, "token_acc": 0.3421709303343136 }, { "epoch": 5.708296687188508, "grad_norm": 0.29328931084556414, "learning_rate": 0.00017119899011968856, "loss": 2.6936371326446533, "step": 9738, "token_acc": 0.34231551268949073 }, { "epoch": 5.708883025505717, "grad_norm": 0.3198841559198466, "learning_rate": 0.00017119218410251106, "loss": 2.6864168643951416, "step": 9739, "token_acc": 0.34291262638685704 }, { "epoch": 5.709469363822926, "grad_norm": 0.26424598711522645, "learning_rate": 0.00017118537741657894, "loss": 2.6947760581970215, "step": 9740, "token_acc": 0.34041564058220497 }, { "epoch": 5.710055702140135, "grad_norm": 0.30728248812990044, "learning_rate": 0.00017117857006195613, "loss": 2.7045998573303223, "step": 9741, "token_acc": 0.33979316465325193 }, { "epoch": 5.710642040457344, "grad_norm": 0.260746561086653, "learning_rate": 0.00017117176203870654, "loss": 2.7167842388153076, "step": 9742, "token_acc": 0.3384944081029753 }, { "epoch": 5.711228378774553, "grad_norm": 0.32745000709729316, "learning_rate": 0.0001711649533468942, "loss": 2.714959144592285, "step": 9743, "token_acc": 0.33814542908369494 }, { "epoch": 5.711814717091762, "grad_norm": 0.30974940885090324, "learning_rate": 0.000171158143986583, "loss": 2.6906955242156982, "step": 9744, "token_acc": 0.3432065079817636 }, { "epoch": 5.7124010554089715, "grad_norm": 0.2601905798155636, "learning_rate": 0.0001711513339578369, "loss": 2.7093000411987305, "step": 9745, "token_acc": 0.3405824285362115 }, { "epoch": 5.71298739372618, "grad_norm": 0.3375232613722728, "learning_rate": 0.00017114452326071988, "loss": 2.690463066101074, "step": 9746, "token_acc": 0.34313885053367704 }, { "epoch": 5.713573732043389, "grad_norm": 0.2563378741457845, "learning_rate": 0.00017113771189529597, "loss": 2.7091119289398193, "step": 9747, "token_acc": 0.34089345848877617 }, { "epoch": 5.714160070360598, "grad_norm": 0.30036645834518505, "learning_rate": 0.0001711308998616291, "loss": 2.665311336517334, "step": 9748, "token_acc": 0.34789655522812724 }, { "epoch": 5.714746408677807, "grad_norm": 0.26170582974491674, "learning_rate": 0.0001711240871597833, "loss": 2.680868625640869, "step": 9749, "token_acc": 0.3446827133479212 }, { "epoch": 5.715332746995016, "grad_norm": 0.3111825179256955, "learning_rate": 0.00017111727378982253, "loss": 2.700237989425659, "step": 9750, "token_acc": 0.34122715632489464 }, { "epoch": 5.715919085312225, "grad_norm": 0.2925940683568707, "learning_rate": 0.0001711104597518108, "loss": 2.724099636077881, "step": 9751, "token_acc": 0.3383845768234606 }, { "epoch": 5.716505423629434, "grad_norm": 0.26024852222650807, "learning_rate": 0.0001711036450458121, "loss": 2.6854677200317383, "step": 9752, "token_acc": 0.3426606006027891 }, { "epoch": 5.7170917619466435, "grad_norm": 0.2692403325356353, "learning_rate": 0.0001710968296718905, "loss": 2.6941633224487305, "step": 9753, "token_acc": 0.3423151643422362 }, { "epoch": 5.717678100263853, "grad_norm": 0.2545522749257339, "learning_rate": 0.00017109001363011, "loss": 2.68007230758667, "step": 9754, "token_acc": 0.34384698853459883 }, { "epoch": 5.718264438581061, "grad_norm": 0.25431640576537073, "learning_rate": 0.00017108319692053458, "loss": 2.690493106842041, "step": 9755, "token_acc": 0.3428599166176764 }, { "epoch": 5.71885077689827, "grad_norm": 0.25750593221008694, "learning_rate": 0.00017107637954322832, "loss": 2.720430850982666, "step": 9756, "token_acc": 0.33831440849506944 }, { "epoch": 5.719437115215479, "grad_norm": 0.2599397867838589, "learning_rate": 0.0001710695614982553, "loss": 2.700011730194092, "step": 9757, "token_acc": 0.3420546074908482 }, { "epoch": 5.720023453532688, "grad_norm": 0.2607872752975199, "learning_rate": 0.00017106274278567944, "loss": 2.686519145965576, "step": 9758, "token_acc": 0.34319085167414876 }, { "epoch": 5.720609791849897, "grad_norm": 0.2560634124270843, "learning_rate": 0.0001710559234055649, "loss": 2.705650806427002, "step": 9759, "token_acc": 0.34117915557246103 }, { "epoch": 5.721196130167106, "grad_norm": 0.2555389294052096, "learning_rate": 0.00017104910335797574, "loss": 2.6971700191497803, "step": 9760, "token_acc": 0.3409017863071917 }, { "epoch": 5.7217824684843155, "grad_norm": 0.2737229160664518, "learning_rate": 0.00017104228264297597, "loss": 2.6999640464782715, "step": 9761, "token_acc": 0.34035474600371723 }, { "epoch": 5.722368806801525, "grad_norm": 0.25775683000244204, "learning_rate": 0.00017103546126062967, "loss": 2.729640245437622, "step": 9762, "token_acc": 0.3371763485098539 }, { "epoch": 5.722955145118734, "grad_norm": 0.28364063335595124, "learning_rate": 0.00017102863921100093, "loss": 2.706819772720337, "step": 9763, "token_acc": 0.34029474149803474 }, { "epoch": 5.723541483435943, "grad_norm": 0.28267700268868706, "learning_rate": 0.00017102181649415385, "loss": 2.6976799964904785, "step": 9764, "token_acc": 0.3414142371310949 }, { "epoch": 5.724127821753152, "grad_norm": 0.26026206756766684, "learning_rate": 0.00017101499311015253, "loss": 2.7080278396606445, "step": 9765, "token_acc": 0.3390354585272451 }, { "epoch": 5.724714160070361, "grad_norm": 0.29403574871222393, "learning_rate": 0.00017100816905906102, "loss": 2.7065558433532715, "step": 9766, "token_acc": 0.3404565158493633 }, { "epoch": 5.725300498387569, "grad_norm": 0.2504377363742552, "learning_rate": 0.00017100134434094345, "loss": 2.715836524963379, "step": 9767, "token_acc": 0.33924575827770165 }, { "epoch": 5.725886836704778, "grad_norm": 0.30351948374689247, "learning_rate": 0.00017099451895586393, "loss": 2.6740026473999023, "step": 9768, "token_acc": 0.34603050218850384 }, { "epoch": 5.7264731750219875, "grad_norm": 0.2960985384998379, "learning_rate": 0.00017098769290388655, "loss": 2.71990966796875, "step": 9769, "token_acc": 0.33975473491434266 }, { "epoch": 5.727059513339197, "grad_norm": 0.2660476293423822, "learning_rate": 0.00017098086618507548, "loss": 2.687426805496216, "step": 9770, "token_acc": 0.34411978403136323 }, { "epoch": 5.727645851656406, "grad_norm": 0.30950077527573744, "learning_rate": 0.00017097403879949482, "loss": 2.662525177001953, "step": 9771, "token_acc": 0.34718101685467034 }, { "epoch": 5.728232189973615, "grad_norm": 0.25575448276451124, "learning_rate": 0.00017096721074720867, "loss": 2.699496269226074, "step": 9772, "token_acc": 0.3423390608915987 }, { "epoch": 5.728818528290824, "grad_norm": 0.29161144072794143, "learning_rate": 0.00017096038202828125, "loss": 2.699843406677246, "step": 9773, "token_acc": 0.34085409271370054 }, { "epoch": 5.729404866608033, "grad_norm": 0.31530960157301263, "learning_rate": 0.00017095355264277665, "loss": 2.6798179149627686, "step": 9774, "token_acc": 0.34392669285997673 }, { "epoch": 5.729991204925242, "grad_norm": 0.2700875857523055, "learning_rate": 0.000170946722590759, "loss": 2.711947202682495, "step": 9775, "token_acc": 0.33865903634129824 }, { "epoch": 5.730577543242451, "grad_norm": 0.2672616730425358, "learning_rate": 0.00017093989187229251, "loss": 2.7120652198791504, "step": 9776, "token_acc": 0.34098240276940023 }, { "epoch": 5.7311638815596595, "grad_norm": 0.2626277186236491, "learning_rate": 0.00017093306048744132, "loss": 2.700963020324707, "step": 9777, "token_acc": 0.34114509173752083 }, { "epoch": 5.731750219876869, "grad_norm": 0.26941788536500166, "learning_rate": 0.00017092622843626964, "loss": 2.689690113067627, "step": 9778, "token_acc": 0.34305546134888876 }, { "epoch": 5.732336558194078, "grad_norm": 0.2651415603443734, "learning_rate": 0.0001709193957188416, "loss": 2.729811429977417, "step": 9779, "token_acc": 0.3366144356303663 }, { "epoch": 5.732922896511287, "grad_norm": 0.26929007793912, "learning_rate": 0.00017091256233522142, "loss": 2.665167808532715, "step": 9780, "token_acc": 0.34690240559879965 }, { "epoch": 5.733509234828496, "grad_norm": 0.3010938273143518, "learning_rate": 0.00017090572828547327, "loss": 2.699704647064209, "step": 9781, "token_acc": 0.33939044948943564 }, { "epoch": 5.734095573145705, "grad_norm": 0.263358523281644, "learning_rate": 0.00017089889356966133, "loss": 2.6912002563476562, "step": 9782, "token_acc": 0.34348387295840704 }, { "epoch": 5.734681911462914, "grad_norm": 0.2853994119846367, "learning_rate": 0.00017089205818784985, "loss": 2.7092292308807373, "step": 9783, "token_acc": 0.34015680676902305 }, { "epoch": 5.735268249780123, "grad_norm": 0.26808640613941365, "learning_rate": 0.00017088522214010299, "loss": 2.707169532775879, "step": 9784, "token_acc": 0.3403616927087603 }, { "epoch": 5.735854588097332, "grad_norm": 0.27839189640848083, "learning_rate": 0.00017087838542648496, "loss": 2.6989340782165527, "step": 9785, "token_acc": 0.34183965974694347 }, { "epoch": 5.7364409264145415, "grad_norm": 0.31492998215987655, "learning_rate": 0.00017087154804706006, "loss": 2.708911895751953, "step": 9786, "token_acc": 0.3408630807808951 }, { "epoch": 5.737027264731751, "grad_norm": 0.2736303969955324, "learning_rate": 0.00017086471000189244, "loss": 2.7278456687927246, "step": 9787, "token_acc": 0.3365573596902869 }, { "epoch": 5.73761360304896, "grad_norm": 0.2805829287957194, "learning_rate": 0.00017085787129104634, "loss": 2.737631320953369, "step": 9788, "token_acc": 0.33693952540368693 }, { "epoch": 5.738199941366168, "grad_norm": 0.27965463383293826, "learning_rate": 0.00017085103191458604, "loss": 2.6989285945892334, "step": 9789, "token_acc": 0.34236492451050077 }, { "epoch": 5.738786279683377, "grad_norm": 0.2596622286561764, "learning_rate": 0.0001708441918725758, "loss": 2.730696439743042, "step": 9790, "token_acc": 0.33648783135800236 }, { "epoch": 5.739372618000586, "grad_norm": 0.2997292460840302, "learning_rate": 0.0001708373511650798, "loss": 2.6817541122436523, "step": 9791, "token_acc": 0.34421246869645444 }, { "epoch": 5.739958956317795, "grad_norm": 0.24675660635945773, "learning_rate": 0.00017083050979216236, "loss": 2.673198699951172, "step": 9792, "token_acc": 0.346139544187454 }, { "epoch": 5.740545294635004, "grad_norm": 0.36060202645340544, "learning_rate": 0.00017082366775388773, "loss": 2.6794328689575195, "step": 9793, "token_acc": 0.345360091281294 }, { "epoch": 5.7411316329522135, "grad_norm": 0.3101345877382126, "learning_rate": 0.00017081682505032015, "loss": 2.724865198135376, "step": 9794, "token_acc": 0.33718523399698686 }, { "epoch": 5.741717971269423, "grad_norm": 0.2750404062721753, "learning_rate": 0.00017080998168152395, "loss": 2.691318988800049, "step": 9795, "token_acc": 0.3415126596882801 }, { "epoch": 5.742304309586632, "grad_norm": 0.2820934091914795, "learning_rate": 0.0001708031376475634, "loss": 2.693655014038086, "step": 9796, "token_acc": 0.3416504559037426 }, { "epoch": 5.742890647903841, "grad_norm": 0.29047740790181464, "learning_rate": 0.00017079629294850275, "loss": 2.7151474952697754, "step": 9797, "token_acc": 0.3401318369206965 }, { "epoch": 5.743476986221049, "grad_norm": 0.30857586091775796, "learning_rate": 0.00017078944758440633, "loss": 2.689760208129883, "step": 9798, "token_acc": 0.3425099349190808 }, { "epoch": 5.744063324538258, "grad_norm": 0.26762364101303837, "learning_rate": 0.00017078260155533847, "loss": 2.6821975708007812, "step": 9799, "token_acc": 0.34278142893178654 }, { "epoch": 5.744649662855467, "grad_norm": 0.29230952279561856, "learning_rate": 0.00017077575486136343, "loss": 2.6854281425476074, "step": 9800, "token_acc": 0.34561163212767687 }, { "epoch": 5.745236001172676, "grad_norm": 0.2571663120802006, "learning_rate": 0.00017076890750254554, "loss": 2.706923007965088, "step": 9801, "token_acc": 0.34170859588696834 }, { "epoch": 5.7458223394898855, "grad_norm": 0.2767398307788116, "learning_rate": 0.00017076205947894914, "loss": 2.7301294803619385, "step": 9802, "token_acc": 0.33542818815067293 }, { "epoch": 5.746408677807095, "grad_norm": 0.26326954399113556, "learning_rate": 0.00017075521079063855, "loss": 2.675654888153076, "step": 9803, "token_acc": 0.3440135714897886 }, { "epoch": 5.746995016124304, "grad_norm": 0.2535708109225153, "learning_rate": 0.00017074836143767807, "loss": 2.6660447120666504, "step": 9804, "token_acc": 0.34559922416043964 }, { "epoch": 5.747581354441513, "grad_norm": 0.2703662907696137, "learning_rate": 0.00017074151142013207, "loss": 2.6732985973358154, "step": 9805, "token_acc": 0.3448076226619127 }, { "epoch": 5.748167692758722, "grad_norm": 0.2635740837579622, "learning_rate": 0.00017073466073806492, "loss": 2.7102131843566895, "step": 9806, "token_acc": 0.34080677858981123 }, { "epoch": 5.748754031075931, "grad_norm": 0.284722146492488, "learning_rate": 0.00017072780939154093, "loss": 2.704298973083496, "step": 9807, "token_acc": 0.342929684253423 }, { "epoch": 5.74934036939314, "grad_norm": 0.27315126737049256, "learning_rate": 0.0001707209573806245, "loss": 2.666435718536377, "step": 9808, "token_acc": 0.3470690326557032 }, { "epoch": 5.749926707710349, "grad_norm": 0.28322056175773214, "learning_rate": 0.00017071410470537995, "loss": 2.6925525665283203, "step": 9809, "token_acc": 0.342310037873335 }, { "epoch": 5.7505130460275575, "grad_norm": 0.2637783080790276, "learning_rate": 0.0001707072513658717, "loss": 2.71258544921875, "step": 9810, "token_acc": 0.3395110985040282 }, { "epoch": 5.751099384344767, "grad_norm": 0.2588024379429364, "learning_rate": 0.0001707003973621641, "loss": 2.7056877613067627, "step": 9811, "token_acc": 0.3412268778595766 }, { "epoch": 5.751685722661976, "grad_norm": 0.27747871925939843, "learning_rate": 0.00017069354269432148, "loss": 2.689424514770508, "step": 9812, "token_acc": 0.3426340561264247 }, { "epoch": 5.752272060979185, "grad_norm": 0.2491068919037751, "learning_rate": 0.00017068668736240836, "loss": 2.695323944091797, "step": 9813, "token_acc": 0.339852237058096 }, { "epoch": 5.752858399296394, "grad_norm": 0.27572638773769415, "learning_rate": 0.00017067983136648902, "loss": 2.7250115871429443, "step": 9814, "token_acc": 0.3369874991903621 }, { "epoch": 5.753444737613603, "grad_norm": 0.26063019077469046, "learning_rate": 0.00017067297470662793, "loss": 2.705073356628418, "step": 9815, "token_acc": 0.34066050211772103 }, { "epoch": 5.754031075930812, "grad_norm": 0.26848478906821854, "learning_rate": 0.00017066611738288944, "loss": 2.6968345642089844, "step": 9816, "token_acc": 0.3423200932976607 }, { "epoch": 5.754617414248021, "grad_norm": 0.24301836934732673, "learning_rate": 0.00017065925939533803, "loss": 2.6776082515716553, "step": 9817, "token_acc": 0.3452482763318222 }, { "epoch": 5.75520375256523, "grad_norm": 0.2698772694944979, "learning_rate": 0.0001706524007440381, "loss": 2.678867816925049, "step": 9818, "token_acc": 0.345342908861112 }, { "epoch": 5.7557900908824395, "grad_norm": 0.27588484694928056, "learning_rate": 0.00017064554142905407, "loss": 2.706106185913086, "step": 9819, "token_acc": 0.3416944742199619 }, { "epoch": 5.756376429199648, "grad_norm": 0.26777047948125227, "learning_rate": 0.00017063868145045038, "loss": 2.7054548263549805, "step": 9820, "token_acc": 0.3411221484550492 }, { "epoch": 5.756962767516857, "grad_norm": 0.2669185983870583, "learning_rate": 0.00017063182080829143, "loss": 2.68900728225708, "step": 9821, "token_acc": 0.3419860445740471 }, { "epoch": 5.757549105834066, "grad_norm": 0.2491126689060044, "learning_rate": 0.0001706249595026417, "loss": 2.675116777420044, "step": 9822, "token_acc": 0.34582792805133206 }, { "epoch": 5.758135444151275, "grad_norm": 0.28586494416290215, "learning_rate": 0.00017061809753356565, "loss": 2.6776599884033203, "step": 9823, "token_acc": 0.3445602446732178 }, { "epoch": 5.758721782468484, "grad_norm": 0.2812164791649811, "learning_rate": 0.00017061123490112777, "loss": 2.6965208053588867, "step": 9824, "token_acc": 0.3430281166992415 }, { "epoch": 5.759308120785693, "grad_norm": 0.2676566619814792, "learning_rate": 0.00017060437160539246, "loss": 2.75732159614563, "step": 9825, "token_acc": 0.3325398585811589 }, { "epoch": 5.759894459102902, "grad_norm": 0.2628373444888273, "learning_rate": 0.00017059750764642423, "loss": 2.7129392623901367, "step": 9826, "token_acc": 0.33887337544362645 }, { "epoch": 5.7604807974201115, "grad_norm": 0.26176939565665425, "learning_rate": 0.00017059064302428752, "loss": 2.695472240447998, "step": 9827, "token_acc": 0.3412850065051976 }, { "epoch": 5.761067135737321, "grad_norm": 0.24716713038309338, "learning_rate": 0.0001705837777390469, "loss": 2.687631130218506, "step": 9828, "token_acc": 0.34257091983113447 }, { "epoch": 5.76165347405453, "grad_norm": 0.2634440661237052, "learning_rate": 0.00017057691179076672, "loss": 2.705292224884033, "step": 9829, "token_acc": 0.33907477239109146 }, { "epoch": 5.762239812371739, "grad_norm": 0.2591503987369195, "learning_rate": 0.00017057004517951162, "loss": 2.658693552017212, "step": 9830, "token_acc": 0.3477375141795627 }, { "epoch": 5.762826150688948, "grad_norm": 0.2851792219357145, "learning_rate": 0.00017056317790534604, "loss": 2.7019896507263184, "step": 9831, "token_acc": 0.340577612919157 }, { "epoch": 5.763412489006156, "grad_norm": 0.2708704863624883, "learning_rate": 0.00017055630996833446, "loss": 2.6829514503479004, "step": 9832, "token_acc": 0.34385945637701515 }, { "epoch": 5.763998827323365, "grad_norm": 0.25477104906801956, "learning_rate": 0.00017054944136854144, "loss": 2.7061400413513184, "step": 9833, "token_acc": 0.340193029688949 }, { "epoch": 5.764585165640574, "grad_norm": 0.25157506377940575, "learning_rate": 0.00017054257210603148, "loss": 2.6914963722229004, "step": 9834, "token_acc": 0.3424424841302894 }, { "epoch": 5.7651715039577835, "grad_norm": 0.2795433526164303, "learning_rate": 0.00017053570218086907, "loss": 2.682927131652832, "step": 9835, "token_acc": 0.34317964118362804 }, { "epoch": 5.765757842274993, "grad_norm": 0.2971328736056097, "learning_rate": 0.00017052883159311883, "loss": 2.7140562534332275, "step": 9836, "token_acc": 0.33813859782014055 }, { "epoch": 5.766344180592202, "grad_norm": 0.34355356910324225, "learning_rate": 0.00017052196034284523, "loss": 2.721029758453369, "step": 9837, "token_acc": 0.338979455296095 }, { "epoch": 5.766930518909411, "grad_norm": 0.35846365049813533, "learning_rate": 0.0001705150884301129, "loss": 2.707603931427002, "step": 9838, "token_acc": 0.34089033018867926 }, { "epoch": 5.76751685722662, "grad_norm": 0.28590922033111105, "learning_rate": 0.0001705082158549863, "loss": 2.6835408210754395, "step": 9839, "token_acc": 0.34474857230044903 }, { "epoch": 5.768103195543829, "grad_norm": 0.2912358074407059, "learning_rate": 0.00017050134261753, "loss": 2.6697421073913574, "step": 9840, "token_acc": 0.34671373891300006 }, { "epoch": 5.768689533861037, "grad_norm": 0.3085099938409532, "learning_rate": 0.00017049446871780864, "loss": 2.683354616165161, "step": 9841, "token_acc": 0.34385701552751324 }, { "epoch": 5.7692758721782464, "grad_norm": 0.2823658427206369, "learning_rate": 0.0001704875941558867, "loss": 2.718597888946533, "step": 9842, "token_acc": 0.33956534593432586 }, { "epoch": 5.769862210495456, "grad_norm": 0.3818686591329294, "learning_rate": 0.0001704807189318288, "loss": 2.686283588409424, "step": 9843, "token_acc": 0.34476382320772386 }, { "epoch": 5.770448548812665, "grad_norm": 0.2947384138222875, "learning_rate": 0.0001704738430456995, "loss": 2.6760177612304688, "step": 9844, "token_acc": 0.34464518616242307 }, { "epoch": 5.771034887129874, "grad_norm": 0.3036931938079206, "learning_rate": 0.00017046696649756344, "loss": 2.722147226333618, "step": 9845, "token_acc": 0.3384175743855767 }, { "epoch": 5.771621225447083, "grad_norm": 0.3153643447246259, "learning_rate": 0.0001704600892874852, "loss": 2.6861658096313477, "step": 9846, "token_acc": 0.3426058422646939 }, { "epoch": 5.772207563764292, "grad_norm": 0.28455942714804217, "learning_rate": 0.00017045321141552933, "loss": 2.7050533294677734, "step": 9847, "token_acc": 0.3418295680674236 }, { "epoch": 5.772793902081501, "grad_norm": 0.32415554933072555, "learning_rate": 0.00017044633288176047, "loss": 2.6833689212799072, "step": 9848, "token_acc": 0.34444718023380294 }, { "epoch": 5.77338024039871, "grad_norm": 0.28234730029784844, "learning_rate": 0.00017043945368624326, "loss": 2.7180192470550537, "step": 9849, "token_acc": 0.3387732670048456 }, { "epoch": 5.773966578715919, "grad_norm": 0.32880208754215906, "learning_rate": 0.0001704325738290423, "loss": 2.7026286125183105, "step": 9850, "token_acc": 0.34178133465043953 }, { "epoch": 5.7745529170331285, "grad_norm": 0.27976745585884905, "learning_rate": 0.00017042569331022223, "loss": 2.7076733112335205, "step": 9851, "token_acc": 0.3388434852825453 }, { "epoch": 5.775139255350338, "grad_norm": 0.28837813967273035, "learning_rate": 0.00017041881212984765, "loss": 2.714110851287842, "step": 9852, "token_acc": 0.3383640967191671 }, { "epoch": 5.775725593667546, "grad_norm": 0.27822257280391227, "learning_rate": 0.00017041193028798324, "loss": 2.693833112716675, "step": 9853, "token_acc": 0.34195815364957677 }, { "epoch": 5.776311931984755, "grad_norm": 0.3113184000401301, "learning_rate": 0.00017040504778469362, "loss": 2.73563289642334, "step": 9854, "token_acc": 0.33658639559618725 }, { "epoch": 5.776898270301964, "grad_norm": 0.270158787339092, "learning_rate": 0.00017039816462004344, "loss": 2.7067089080810547, "step": 9855, "token_acc": 0.34094095416124837 }, { "epoch": 5.777484608619173, "grad_norm": 0.2700713912768026, "learning_rate": 0.00017039128079409735, "loss": 2.7130002975463867, "step": 9856, "token_acc": 0.3384421104015833 }, { "epoch": 5.778070946936382, "grad_norm": 0.2715597860272181, "learning_rate": 0.00017038439630692007, "loss": 2.677408218383789, "step": 9857, "token_acc": 0.3453836053673794 }, { "epoch": 5.778657285253591, "grad_norm": 0.2809980535825738, "learning_rate": 0.00017037751115857623, "loss": 2.6988730430603027, "step": 9858, "token_acc": 0.3413678155737596 }, { "epoch": 5.7792436235708005, "grad_norm": 0.271100893965222, "learning_rate": 0.00017037062534913048, "loss": 2.742131471633911, "step": 9859, "token_acc": 0.33461016949152544 }, { "epoch": 5.77982996188801, "grad_norm": 0.26731614449294994, "learning_rate": 0.00017036373887864754, "loss": 2.694150924682617, "step": 9860, "token_acc": 0.3423007208633693 }, { "epoch": 5.780416300205219, "grad_norm": 0.29240572023505085, "learning_rate": 0.00017035685174719207, "loss": 2.702456474304199, "step": 9861, "token_acc": 0.3415720270172978 }, { "epoch": 5.781002638522428, "grad_norm": 0.27560362088919893, "learning_rate": 0.0001703499639548288, "loss": 2.712031126022339, "step": 9862, "token_acc": 0.34105246174944204 }, { "epoch": 5.781588976839636, "grad_norm": 0.2740949900561151, "learning_rate": 0.00017034307550162244, "loss": 2.7155933380126953, "step": 9863, "token_acc": 0.34084302131221633 }, { "epoch": 5.782175315156845, "grad_norm": 0.26057453892939514, "learning_rate": 0.00017033618638763765, "loss": 2.7052063941955566, "step": 9864, "token_acc": 0.3396987698454421 }, { "epoch": 5.782761653474054, "grad_norm": 0.2650309871599445, "learning_rate": 0.0001703292966129392, "loss": 2.7079169750213623, "step": 9865, "token_acc": 0.3395972780573755 }, { "epoch": 5.783347991791263, "grad_norm": 0.25919450461879134, "learning_rate": 0.00017032240617759174, "loss": 2.7104878425598145, "step": 9866, "token_acc": 0.3389209678175163 }, { "epoch": 5.7839343301084725, "grad_norm": 0.2640726158263265, "learning_rate": 0.00017031551508166003, "loss": 2.694451332092285, "step": 9867, "token_acc": 0.341696492756642 }, { "epoch": 5.784520668425682, "grad_norm": 0.26199249057539475, "learning_rate": 0.00017030862332520881, "loss": 2.7151966094970703, "step": 9868, "token_acc": 0.3399060614479747 }, { "epoch": 5.785107006742891, "grad_norm": 0.27173103939122334, "learning_rate": 0.00017030173090830286, "loss": 2.6787304878234863, "step": 9869, "token_acc": 0.34536717800096606 }, { "epoch": 5.7856933450601, "grad_norm": 0.2508454059759158, "learning_rate": 0.00017029483783100684, "loss": 2.6575913429260254, "step": 9870, "token_acc": 0.3473974721348195 }, { "epoch": 5.786279683377309, "grad_norm": 0.2637645934097564, "learning_rate": 0.00017028794409338556, "loss": 2.7062621116638184, "step": 9871, "token_acc": 0.3410778986662043 }, { "epoch": 5.786866021694518, "grad_norm": 0.24851569103866536, "learning_rate": 0.00017028104969550375, "loss": 2.6707816123962402, "step": 9872, "token_acc": 0.3466608989412104 }, { "epoch": 5.787452360011727, "grad_norm": 0.27884252514212116, "learning_rate": 0.0001702741546374262, "loss": 2.7049195766448975, "step": 9873, "token_acc": 0.3398031907676837 }, { "epoch": 5.788038698328936, "grad_norm": 0.2611104713806163, "learning_rate": 0.00017026725891921765, "loss": 2.710439682006836, "step": 9874, "token_acc": 0.3409179999325038 }, { "epoch": 5.7886250366461445, "grad_norm": 0.271528142839158, "learning_rate": 0.00017026036254094286, "loss": 2.711056709289551, "step": 9875, "token_acc": 0.33907462039940583 }, { "epoch": 5.789211374963354, "grad_norm": 0.261896450168623, "learning_rate": 0.00017025346550266667, "loss": 2.7395944595336914, "step": 9876, "token_acc": 0.3364757878156703 }, { "epoch": 5.789797713280563, "grad_norm": 0.2747974194373344, "learning_rate": 0.00017024656780445385, "loss": 2.6984763145446777, "step": 9877, "token_acc": 0.3416516993182384 }, { "epoch": 5.790384051597772, "grad_norm": 0.25256219589867474, "learning_rate": 0.00017023966944636917, "loss": 2.6613640785217285, "step": 9878, "token_acc": 0.34767067763067383 }, { "epoch": 5.790970389914981, "grad_norm": 0.2653499963420842, "learning_rate": 0.00017023277042847745, "loss": 2.7117462158203125, "step": 9879, "token_acc": 0.33886905983590554 }, { "epoch": 5.79155672823219, "grad_norm": 0.2897111629229459, "learning_rate": 0.00017022587075084348, "loss": 2.7103726863861084, "step": 9880, "token_acc": 0.3379442390451047 }, { "epoch": 5.792143066549399, "grad_norm": 0.3922365650863327, "learning_rate": 0.00017021897041353209, "loss": 2.7264010906219482, "step": 9881, "token_acc": 0.3369276610429645 }, { "epoch": 5.792729404866608, "grad_norm": 0.3657571921914574, "learning_rate": 0.0001702120694166081, "loss": 2.7064368724823, "step": 9882, "token_acc": 0.34111021925348983 }, { "epoch": 5.793315743183817, "grad_norm": 0.2606322539847341, "learning_rate": 0.00017020516776013634, "loss": 2.677274465560913, "step": 9883, "token_acc": 0.34396318702535716 }, { "epoch": 5.7939020815010265, "grad_norm": 0.3540055089079396, "learning_rate": 0.0001701982654441816, "loss": 2.7213034629821777, "step": 9884, "token_acc": 0.3380639183438106 }, { "epoch": 5.794488419818235, "grad_norm": 0.2871227717703711, "learning_rate": 0.00017019136246880878, "loss": 2.724184513092041, "step": 9885, "token_acc": 0.3383453071447403 }, { "epoch": 5.795074758135444, "grad_norm": 0.2882191398309349, "learning_rate": 0.00017018445883408266, "loss": 2.739137649536133, "step": 9886, "token_acc": 0.3352867506321038 }, { "epoch": 5.795661096452653, "grad_norm": 0.2855845744317301, "learning_rate": 0.00017017755454006817, "loss": 2.6894781589508057, "step": 9887, "token_acc": 0.3415025231108473 }, { "epoch": 5.796247434769862, "grad_norm": 0.25232864042816594, "learning_rate": 0.00017017064958683008, "loss": 2.704000949859619, "step": 9888, "token_acc": 0.3417113026874007 }, { "epoch": 5.796833773087071, "grad_norm": 0.286912625387369, "learning_rate": 0.0001701637439744333, "loss": 2.708357095718384, "step": 9889, "token_acc": 0.3411382288752959 }, { "epoch": 5.79742011140428, "grad_norm": 0.2673600945824778, "learning_rate": 0.00017015683770294274, "loss": 2.692500591278076, "step": 9890, "token_acc": 0.3421621259237485 }, { "epoch": 5.798006449721489, "grad_norm": 0.35247089279727145, "learning_rate": 0.00017014993077242317, "loss": 2.66996693611145, "step": 9891, "token_acc": 0.3469176194545721 }, { "epoch": 5.7985927880386985, "grad_norm": 0.25670137465137927, "learning_rate": 0.00017014302318293952, "loss": 2.709150791168213, "step": 9892, "token_acc": 0.3407032013812327 }, { "epoch": 5.799179126355908, "grad_norm": 0.3004955298700424, "learning_rate": 0.00017013611493455673, "loss": 2.71744966506958, "step": 9893, "token_acc": 0.3381570329130506 }, { "epoch": 5.799765464673117, "grad_norm": 0.2776637363342749, "learning_rate": 0.00017012920602733962, "loss": 2.6978535652160645, "step": 9894, "token_acc": 0.34159674548690566 }, { "epoch": 5.800351802990326, "grad_norm": 0.3105306906853624, "learning_rate": 0.00017012229646135314, "loss": 2.7267956733703613, "step": 9895, "token_acc": 0.33800561086627434 }, { "epoch": 5.800938141307535, "grad_norm": 0.2742074383091318, "learning_rate": 0.00017011538623666215, "loss": 2.720053195953369, "step": 9896, "token_acc": 0.3393743319840173 }, { "epoch": 5.801524479624743, "grad_norm": 0.2991120024134006, "learning_rate": 0.00017010847535333163, "loss": 2.7433085441589355, "step": 9897, "token_acc": 0.3354463130659767 }, { "epoch": 5.802110817941952, "grad_norm": 0.26908350350116056, "learning_rate": 0.00017010156381142642, "loss": 2.7164466381073, "step": 9898, "token_acc": 0.33720555609397057 }, { "epoch": 5.802697156259161, "grad_norm": 0.2791132245236656, "learning_rate": 0.00017009465161101151, "loss": 2.695408344268799, "step": 9899, "token_acc": 0.3414637453591836 }, { "epoch": 5.8032834945763705, "grad_norm": 0.256986655377693, "learning_rate": 0.0001700877387521518, "loss": 2.6765737533569336, "step": 9900, "token_acc": 0.3441346546684964 }, { "epoch": 5.80386983289358, "grad_norm": 0.2816778481906404, "learning_rate": 0.00017008082523491217, "loss": 2.744688034057617, "step": 9901, "token_acc": 0.3353278808474659 }, { "epoch": 5.804456171210789, "grad_norm": 0.26803354233762583, "learning_rate": 0.00017007391105935767, "loss": 2.674678325653076, "step": 9902, "token_acc": 0.3462757118262842 }, { "epoch": 5.805042509527998, "grad_norm": 0.24871430935658076, "learning_rate": 0.00017006699622555322, "loss": 2.7315614223480225, "step": 9903, "token_acc": 0.3355871997823733 }, { "epoch": 5.805628847845207, "grad_norm": 0.26032667024581124, "learning_rate": 0.00017006008073356372, "loss": 2.664966106414795, "step": 9904, "token_acc": 0.3475562591997599 }, { "epoch": 5.806215186162416, "grad_norm": 0.2634571288378092, "learning_rate": 0.00017005316458345419, "loss": 2.6923460960388184, "step": 9905, "token_acc": 0.3433181736426938 }, { "epoch": 5.806801524479624, "grad_norm": 0.2540756801350001, "learning_rate": 0.00017004624777528955, "loss": 2.7298476696014404, "step": 9906, "token_acc": 0.337559711552304 }, { "epoch": 5.807387862796833, "grad_norm": 0.27032998428841026, "learning_rate": 0.00017003933030913484, "loss": 2.7230639457702637, "step": 9907, "token_acc": 0.3412245006516131 }, { "epoch": 5.8079742011140425, "grad_norm": 0.2619893067194834, "learning_rate": 0.000170032412185055, "loss": 2.7009029388427734, "step": 9908, "token_acc": 0.34153084480622337 }, { "epoch": 5.808560539431252, "grad_norm": 0.2624514054167554, "learning_rate": 0.00017002549340311497, "loss": 2.7039709091186523, "step": 9909, "token_acc": 0.3404080535687573 }, { "epoch": 5.809146877748461, "grad_norm": 0.27098018640290145, "learning_rate": 0.0001700185739633798, "loss": 2.6965699195861816, "step": 9910, "token_acc": 0.34230634845883906 }, { "epoch": 5.80973321606567, "grad_norm": 0.27992902575083634, "learning_rate": 0.00017001165386591453, "loss": 2.725646495819092, "step": 9911, "token_acc": 0.33803956764552534 }, { "epoch": 5.810319554382879, "grad_norm": 0.25422863433920795, "learning_rate": 0.0001700047331107841, "loss": 2.706587076187134, "step": 9912, "token_acc": 0.34110687834483183 }, { "epoch": 5.810905892700088, "grad_norm": 0.2772500181655618, "learning_rate": 0.0001699978116980535, "loss": 2.7238001823425293, "step": 9913, "token_acc": 0.3383483360101595 }, { "epoch": 5.811492231017297, "grad_norm": 0.42270189565024113, "learning_rate": 0.0001699908896277878, "loss": 2.7148947715759277, "step": 9914, "token_acc": 0.33979248875088464 }, { "epoch": 5.812078569334506, "grad_norm": 0.36910019612221867, "learning_rate": 0.00016998396690005198, "loss": 2.703819513320923, "step": 9915, "token_acc": 0.34196862816684703 }, { "epoch": 5.812664907651715, "grad_norm": 0.2897915684067254, "learning_rate": 0.00016997704351491113, "loss": 2.7244815826416016, "step": 9916, "token_acc": 0.3383307858250133 }, { "epoch": 5.8132512459689245, "grad_norm": 0.3777776631188572, "learning_rate": 0.00016997011947243024, "loss": 2.7135095596313477, "step": 9917, "token_acc": 0.33834990455082053 }, { "epoch": 5.813837584286133, "grad_norm": 0.2662816191919359, "learning_rate": 0.00016996319477267436, "loss": 2.7268238067626953, "step": 9918, "token_acc": 0.33744900061817434 }, { "epoch": 5.814423922603342, "grad_norm": 0.32684631391700236, "learning_rate": 0.00016995626941570854, "loss": 2.711203098297119, "step": 9919, "token_acc": 0.33953106430091223 }, { "epoch": 5.815010260920551, "grad_norm": 0.25419712176027265, "learning_rate": 0.00016994934340159784, "loss": 2.6893577575683594, "step": 9920, "token_acc": 0.34266725542046556 }, { "epoch": 5.81559659923776, "grad_norm": 0.3398682523196575, "learning_rate": 0.00016994241673040734, "loss": 2.6690673828125, "step": 9921, "token_acc": 0.3466630618882148 }, { "epoch": 5.816182937554969, "grad_norm": 0.2456883262681673, "learning_rate": 0.00016993548940220205, "loss": 2.7188563346862793, "step": 9922, "token_acc": 0.3393908666972976 }, { "epoch": 5.816769275872178, "grad_norm": 0.28520728015909846, "learning_rate": 0.00016992856141704712, "loss": 2.708329677581787, "step": 9923, "token_acc": 0.34072562601800643 }, { "epoch": 5.817355614189387, "grad_norm": 0.2456178031467423, "learning_rate": 0.00016992163277500754, "loss": 2.7370846271514893, "step": 9924, "token_acc": 0.33466575926114245 }, { "epoch": 5.8179419525065965, "grad_norm": 0.2933964984982332, "learning_rate": 0.00016991470347614844, "loss": 2.7135021686553955, "step": 9925, "token_acc": 0.3398556927968693 }, { "epoch": 5.818528290823806, "grad_norm": 0.25145191091752794, "learning_rate": 0.00016990777352053494, "loss": 2.730299949645996, "step": 9926, "token_acc": 0.3364658442401668 }, { "epoch": 5.819114629141015, "grad_norm": 0.2654714925566974, "learning_rate": 0.0001699008429082321, "loss": 2.7233567237854004, "step": 9927, "token_acc": 0.33760656225218083 }, { "epoch": 5.819700967458223, "grad_norm": 0.26987655787105874, "learning_rate": 0.00016989391163930502, "loss": 2.669539213180542, "step": 9928, "token_acc": 0.34442568316991884 }, { "epoch": 5.820287305775432, "grad_norm": 0.2672032742127561, "learning_rate": 0.00016988697971381884, "loss": 2.7079505920410156, "step": 9929, "token_acc": 0.341373858308432 }, { "epoch": 5.820873644092641, "grad_norm": 0.28123575078024077, "learning_rate": 0.00016988004713183865, "loss": 2.7293198108673096, "step": 9930, "token_acc": 0.3373321056477932 }, { "epoch": 5.82145998240985, "grad_norm": 0.26603945586842404, "learning_rate": 0.00016987311389342956, "loss": 2.7408835887908936, "step": 9931, "token_acc": 0.3354048940304004 }, { "epoch": 5.822046320727059, "grad_norm": 0.2644369511745641, "learning_rate": 0.00016986617999865678, "loss": 2.685732126235962, "step": 9932, "token_acc": 0.3445139327668244 }, { "epoch": 5.8226326590442685, "grad_norm": 0.27624884793473636, "learning_rate": 0.00016985924544758534, "loss": 2.718756914138794, "step": 9933, "token_acc": 0.33832097923483073 }, { "epoch": 5.823218997361478, "grad_norm": 0.2536518600650178, "learning_rate": 0.00016985231024028045, "loss": 2.6872506141662598, "step": 9934, "token_acc": 0.34382720539700246 }, { "epoch": 5.823805335678687, "grad_norm": 0.25480957616751143, "learning_rate": 0.00016984537437680718, "loss": 2.7489430904388428, "step": 9935, "token_acc": 0.33409342461260966 }, { "epoch": 5.824391673995896, "grad_norm": 0.27108110740992875, "learning_rate": 0.0001698384378572308, "loss": 2.7308692932128906, "step": 9936, "token_acc": 0.33691821590229964 }, { "epoch": 5.824978012313105, "grad_norm": 0.24807992885136756, "learning_rate": 0.00016983150068161637, "loss": 2.72170352935791, "step": 9937, "token_acc": 0.33933277191164085 }, { "epoch": 5.825564350630314, "grad_norm": 0.2716620070621618, "learning_rate": 0.0001698245628500291, "loss": 2.775378704071045, "step": 9938, "token_acc": 0.3298786653185035 }, { "epoch": 5.826150688947523, "grad_norm": 0.24440034382546813, "learning_rate": 0.00016981762436253414, "loss": 2.703197479248047, "step": 9939, "token_acc": 0.3406854688492977 }, { "epoch": 5.826737027264731, "grad_norm": 0.28210599664211694, "learning_rate": 0.0001698106852191967, "loss": 2.730391025543213, "step": 9940, "token_acc": 0.3370422022798933 }, { "epoch": 5.8273233655819405, "grad_norm": 0.25155715021008596, "learning_rate": 0.00016980374542008194, "loss": 2.709158420562744, "step": 9941, "token_acc": 0.3411392787386988 }, { "epoch": 5.82790970389915, "grad_norm": 0.2869697838097068, "learning_rate": 0.00016979680496525504, "loss": 2.7391624450683594, "step": 9942, "token_acc": 0.33503271972412424 }, { "epoch": 5.828496042216359, "grad_norm": 0.29240338891864864, "learning_rate": 0.00016978986385478122, "loss": 2.706935405731201, "step": 9943, "token_acc": 0.34227092028659284 }, { "epoch": 5.829082380533568, "grad_norm": 0.2664310569058118, "learning_rate": 0.00016978292208872565, "loss": 2.6844325065612793, "step": 9944, "token_acc": 0.3443434690242699 }, { "epoch": 5.829668718850777, "grad_norm": 0.40606653446626256, "learning_rate": 0.0001697759796671536, "loss": 2.7427282333374023, "step": 9945, "token_acc": 0.33529680488001506 }, { "epoch": 5.830255057167986, "grad_norm": 0.2888228317100483, "learning_rate": 0.0001697690365901302, "loss": 2.7218165397644043, "step": 9946, "token_acc": 0.3391144759156344 }, { "epoch": 5.830841395485195, "grad_norm": 0.3483322131078382, "learning_rate": 0.00016976209285772076, "loss": 2.7196950912475586, "step": 9947, "token_acc": 0.3384283955683217 }, { "epoch": 5.831427733802404, "grad_norm": 0.3082320085449344, "learning_rate": 0.00016975514846999046, "loss": 2.698625087738037, "step": 9948, "token_acc": 0.3413796194735066 }, { "epoch": 5.8320140721196125, "grad_norm": 0.3108255610072256, "learning_rate": 0.0001697482034270045, "loss": 2.724987030029297, "step": 9949, "token_acc": 0.33773047402541856 }, { "epoch": 5.832600410436822, "grad_norm": 0.31028656233283125, "learning_rate": 0.00016974125772882816, "loss": 2.6671595573425293, "step": 9950, "token_acc": 0.34660720554392627 }, { "epoch": 5.833186748754031, "grad_norm": 0.30857944762494294, "learning_rate": 0.0001697343113755267, "loss": 2.712918996810913, "step": 9951, "token_acc": 0.339410552652881 }, { "epoch": 5.83377308707124, "grad_norm": 0.2868327620466821, "learning_rate": 0.00016972736436716537, "loss": 2.6995151042938232, "step": 9952, "token_acc": 0.3424719020889493 }, { "epoch": 5.834359425388449, "grad_norm": 0.3081804607465968, "learning_rate": 0.0001697204167038094, "loss": 2.6873269081115723, "step": 9953, "token_acc": 0.34373103926497356 }, { "epoch": 5.834945763705658, "grad_norm": 0.28268646342874776, "learning_rate": 0.00016971346838552402, "loss": 2.7061896324157715, "step": 9954, "token_acc": 0.3408886752283497 }, { "epoch": 5.835532102022867, "grad_norm": 0.29450958473737765, "learning_rate": 0.0001697065194123746, "loss": 2.6924009323120117, "step": 9955, "token_acc": 0.3427805519610256 }, { "epoch": 5.836118440340076, "grad_norm": 0.2552245376143469, "learning_rate": 0.00016969956978442634, "loss": 2.6997978687286377, "step": 9956, "token_acc": 0.34066715930537944 }, { "epoch": 5.836704778657285, "grad_norm": 0.3009136038326612, "learning_rate": 0.00016969261950174454, "loss": 2.691918134689331, "step": 9957, "token_acc": 0.34179415764593485 }, { "epoch": 5.8372911169744945, "grad_norm": 0.2513591545575658, "learning_rate": 0.0001696856685643945, "loss": 2.7007393836975098, "step": 9958, "token_acc": 0.34120910993972103 }, { "epoch": 5.837877455291704, "grad_norm": 0.2739958395930539, "learning_rate": 0.0001696787169724415, "loss": 2.6600186824798584, "step": 9959, "token_acc": 0.3460854370143851 }, { "epoch": 5.838463793608913, "grad_norm": 0.2594616963930268, "learning_rate": 0.00016967176472595084, "loss": 2.6728415489196777, "step": 9960, "token_acc": 0.3455509469503189 }, { "epoch": 5.839050131926121, "grad_norm": 0.26462913258508086, "learning_rate": 0.00016966481182498786, "loss": 2.7204184532165527, "step": 9961, "token_acc": 0.3384629283087049 }, { "epoch": 5.83963647024333, "grad_norm": 0.25803996151663977, "learning_rate": 0.00016965785826961782, "loss": 2.7037971019744873, "step": 9962, "token_acc": 0.33892441026839293 }, { "epoch": 5.840222808560539, "grad_norm": 0.25859270697650855, "learning_rate": 0.0001696509040599061, "loss": 2.6715481281280518, "step": 9963, "token_acc": 0.34756979939726246 }, { "epoch": 5.840809146877748, "grad_norm": 0.2646490504791317, "learning_rate": 0.00016964394919591794, "loss": 2.7166812419891357, "step": 9964, "token_acc": 0.3395637322623688 }, { "epoch": 5.841395485194957, "grad_norm": 0.25410632111166526, "learning_rate": 0.00016963699367771878, "loss": 2.7142887115478516, "step": 9965, "token_acc": 0.3400764870107413 }, { "epoch": 5.8419818235121665, "grad_norm": 0.2475568789473016, "learning_rate": 0.00016963003750537387, "loss": 2.6944870948791504, "step": 9966, "token_acc": 0.3415110015543641 }, { "epoch": 5.842568161829376, "grad_norm": 0.25241725539995225, "learning_rate": 0.0001696230806789486, "loss": 2.714154005050659, "step": 9967, "token_acc": 0.339983186426861 }, { "epoch": 5.843154500146585, "grad_norm": 0.2563744322372722, "learning_rate": 0.0001696161231985083, "loss": 2.7427749633789062, "step": 9968, "token_acc": 0.3356262223286319 }, { "epoch": 5.843740838463794, "grad_norm": 0.2683617216159031, "learning_rate": 0.00016960916506411828, "loss": 2.735119104385376, "step": 9969, "token_acc": 0.3357239921485732 }, { "epoch": 5.844327176781003, "grad_norm": 0.24855856901659903, "learning_rate": 0.000169602206275844, "loss": 2.7303712368011475, "step": 9970, "token_acc": 0.33612715341787647 }, { "epoch": 5.844913515098211, "grad_norm": 0.27671665516563054, "learning_rate": 0.00016959524683375082, "loss": 2.665950298309326, "step": 9971, "token_acc": 0.3482199889281608 }, { "epoch": 5.84549985341542, "grad_norm": 0.24415923777402596, "learning_rate": 0.00016958828673790404, "loss": 2.7043566703796387, "step": 9972, "token_acc": 0.34074640421590613 }, { "epoch": 5.8460861917326294, "grad_norm": 0.2706878373713526, "learning_rate": 0.00016958132598836906, "loss": 2.6884937286376953, "step": 9973, "token_acc": 0.3417396286322187 }, { "epoch": 5.846672530049839, "grad_norm": 0.2533230871594401, "learning_rate": 0.0001695743645852113, "loss": 2.7115750312805176, "step": 9974, "token_acc": 0.3400953443554113 }, { "epoch": 5.847258868367048, "grad_norm": 0.2886379767740009, "learning_rate": 0.00016956740252849613, "loss": 2.74137806892395, "step": 9975, "token_acc": 0.33615073029779263 }, { "epoch": 5.847845206684257, "grad_norm": 0.2957519840778072, "learning_rate": 0.00016956043981828896, "loss": 2.708195209503174, "step": 9976, "token_acc": 0.3398092890503373 }, { "epoch": 5.848431545001466, "grad_norm": 0.26677935705571937, "learning_rate": 0.00016955347645465524, "loss": 2.727712631225586, "step": 9977, "token_acc": 0.33780282109811055 }, { "epoch": 5.849017883318675, "grad_norm": 0.3323888400127302, "learning_rate": 0.00016954651243766028, "loss": 2.678679943084717, "step": 9978, "token_acc": 0.3435169085694535 }, { "epoch": 5.849604221635884, "grad_norm": 0.297354970618711, "learning_rate": 0.00016953954776736954, "loss": 2.673854351043701, "step": 9979, "token_acc": 0.3457612313601875 }, { "epoch": 5.850190559953093, "grad_norm": 0.27558008240987675, "learning_rate": 0.00016953258244384846, "loss": 2.71500825881958, "step": 9980, "token_acc": 0.33866103029462774 }, { "epoch": 5.850776898270302, "grad_norm": 0.35320214782461534, "learning_rate": 0.0001695256164671625, "loss": 2.7390122413635254, "step": 9981, "token_acc": 0.3352152970052937 }, { "epoch": 5.8513632365875115, "grad_norm": 0.26727435911858827, "learning_rate": 0.00016951864983737704, "loss": 2.690431833267212, "step": 9982, "token_acc": 0.3430393781030196 }, { "epoch": 5.85194957490472, "grad_norm": 0.2961654439002612, "learning_rate": 0.00016951168255455754, "loss": 2.705667018890381, "step": 9983, "token_acc": 0.33995198184717407 }, { "epoch": 5.852535913221929, "grad_norm": 0.2803647409541711, "learning_rate": 0.00016950471461876944, "loss": 2.7075843811035156, "step": 9984, "token_acc": 0.34105333510864655 }, { "epoch": 5.853122251539138, "grad_norm": 0.27612137691321464, "learning_rate": 0.00016949774603007822, "loss": 2.705247402191162, "step": 9985, "token_acc": 0.3412376437147342 }, { "epoch": 5.853708589856347, "grad_norm": 0.28171585377805697, "learning_rate": 0.00016949077678854931, "loss": 2.730396270751953, "step": 9986, "token_acc": 0.3373874340130421 }, { "epoch": 5.854294928173556, "grad_norm": 0.2523313080847772, "learning_rate": 0.00016948380689424823, "loss": 2.735898017883301, "step": 9987, "token_acc": 0.33607392004123515 }, { "epoch": 5.854881266490765, "grad_norm": 0.2761524101196835, "learning_rate": 0.00016947683634724035, "loss": 2.6894724369049072, "step": 9988, "token_acc": 0.34331600689621405 }, { "epoch": 5.855467604807974, "grad_norm": 0.25534915392967134, "learning_rate": 0.00016946986514759126, "loss": 2.7613301277160645, "step": 9989, "token_acc": 0.33098989056588424 }, { "epoch": 5.8560539431251835, "grad_norm": 0.2957055430775944, "learning_rate": 0.00016946289329536641, "loss": 2.7702126502990723, "step": 9990, "token_acc": 0.3319583209163961 }, { "epoch": 5.856640281442393, "grad_norm": 0.27198599490216757, "learning_rate": 0.00016945592079063127, "loss": 2.721731185913086, "step": 9991, "token_acc": 0.338326440732585 }, { "epoch": 5.857226619759601, "grad_norm": 0.27316199541981495, "learning_rate": 0.00016944894763345136, "loss": 2.7360892295837402, "step": 9992, "token_acc": 0.33559566421714326 }, { "epoch": 5.85781295807681, "grad_norm": 0.2771587595203273, "learning_rate": 0.00016944197382389212, "loss": 2.681577205657959, "step": 9993, "token_acc": 0.3427863588187181 }, { "epoch": 5.858399296394019, "grad_norm": 0.2656120639492622, "learning_rate": 0.00016943499936201915, "loss": 2.6937918663024902, "step": 9994, "token_acc": 0.34134429748060724 }, { "epoch": 5.858985634711228, "grad_norm": 0.25648093204243366, "learning_rate": 0.0001694280242478979, "loss": 2.706294536590576, "step": 9995, "token_acc": 0.3403471884847471 }, { "epoch": 5.859571973028437, "grad_norm": 0.25068124282515175, "learning_rate": 0.00016942104848159396, "loss": 2.7461438179016113, "step": 9996, "token_acc": 0.33354783186454157 }, { "epoch": 5.860158311345646, "grad_norm": 0.2555126375074528, "learning_rate": 0.00016941407206317276, "loss": 2.7627153396606445, "step": 9997, "token_acc": 0.3310113704368641 }, { "epoch": 5.8607446496628555, "grad_norm": 0.2781497098931414, "learning_rate": 0.00016940709499269993, "loss": 2.72983980178833, "step": 9998, "token_acc": 0.33620312193601937 }, { "epoch": 5.861330987980065, "grad_norm": 0.25106612709367165, "learning_rate": 0.00016940011727024096, "loss": 2.732506036758423, "step": 9999, "token_acc": 0.33766975922302483 }, { "epoch": 5.861917326297274, "grad_norm": 0.24493323068107956, "learning_rate": 0.00016939313889586142, "loss": 2.6832520961761475, "step": 10000, "token_acc": 0.3431363208096463 }, { "epoch": 5.862503664614483, "grad_norm": 0.25396668754230994, "learning_rate": 0.00016938615986962684, "loss": 2.7155680656433105, "step": 10001, "token_acc": 0.33868345608028366 }, { "epoch": 5.863090002931692, "grad_norm": 0.24431532469658523, "learning_rate": 0.00016937918019160276, "loss": 2.7020833492279053, "step": 10002, "token_acc": 0.3406546046804314 }, { "epoch": 5.863676341248901, "grad_norm": 0.2559684887359459, "learning_rate": 0.00016937219986185479, "loss": 2.723104476928711, "step": 10003, "token_acc": 0.33706968703800755 }, { "epoch": 5.86426267956611, "grad_norm": 0.24635514179198958, "learning_rate": 0.00016936521888044848, "loss": 2.747711420059204, "step": 10004, "token_acc": 0.33622377476917636 }, { "epoch": 5.864849017883318, "grad_norm": 0.24555119175792478, "learning_rate": 0.00016935823724744943, "loss": 2.745075225830078, "step": 10005, "token_acc": 0.3349257612183621 }, { "epoch": 5.8654353562005275, "grad_norm": 0.246204847647822, "learning_rate": 0.00016935125496292318, "loss": 2.679321765899658, "step": 10006, "token_acc": 0.34444394563176534 }, { "epoch": 5.866021694517737, "grad_norm": 0.2526062642049166, "learning_rate": 0.00016934427202693536, "loss": 2.736929178237915, "step": 10007, "token_acc": 0.33564534710523747 }, { "epoch": 5.866608032834946, "grad_norm": 0.2551705697262054, "learning_rate": 0.00016933728843955152, "loss": 2.717261791229248, "step": 10008, "token_acc": 0.3356517813827152 }, { "epoch": 5.867194371152155, "grad_norm": 0.3028303010112887, "learning_rate": 0.0001693303042008373, "loss": 2.682924747467041, "step": 10009, "token_acc": 0.3436227512378863 }, { "epoch": 5.867780709469364, "grad_norm": 0.3131615954871314, "learning_rate": 0.00016932331931085833, "loss": 2.7356791496276855, "step": 10010, "token_acc": 0.3361654145196607 }, { "epoch": 5.868367047786573, "grad_norm": 0.25116871468897767, "learning_rate": 0.00016931633376968014, "loss": 2.677077293395996, "step": 10011, "token_acc": 0.3451714131028838 }, { "epoch": 5.868953386103782, "grad_norm": 0.27282656042662845, "learning_rate": 0.00016930934757736842, "loss": 2.7018814086914062, "step": 10012, "token_acc": 0.33883248070527866 }, { "epoch": 5.869539724420991, "grad_norm": 0.2839502227954031, "learning_rate": 0.00016930236073398874, "loss": 2.7132701873779297, "step": 10013, "token_acc": 0.33983370285311654 }, { "epoch": 5.8701260627381995, "grad_norm": 0.2631069401271334, "learning_rate": 0.00016929537323960684, "loss": 2.7463746070861816, "step": 10014, "token_acc": 0.334955290107709 }, { "epoch": 5.870712401055409, "grad_norm": 0.2950921774695979, "learning_rate": 0.00016928838509428824, "loss": 2.71547269821167, "step": 10015, "token_acc": 0.33972507768959803 }, { "epoch": 5.871298739372618, "grad_norm": 0.2735020683664484, "learning_rate": 0.00016928139629809863, "loss": 2.677398204803467, "step": 10016, "token_acc": 0.34326303631997873 }, { "epoch": 5.871885077689827, "grad_norm": 0.27052708383697827, "learning_rate": 0.00016927440685110366, "loss": 2.700289011001587, "step": 10017, "token_acc": 0.34301772869150243 }, { "epoch": 5.872471416007036, "grad_norm": 0.2738451345925023, "learning_rate": 0.000169267416753369, "loss": 2.721689462661743, "step": 10018, "token_acc": 0.3367934651111415 }, { "epoch": 5.873057754324245, "grad_norm": 0.2570570651585135, "learning_rate": 0.00016926042600496025, "loss": 2.6989293098449707, "step": 10019, "token_acc": 0.3429785904384273 }, { "epoch": 5.873644092641454, "grad_norm": 0.3042203856212929, "learning_rate": 0.0001692534346059432, "loss": 2.7226452827453613, "step": 10020, "token_acc": 0.3396675457860062 }, { "epoch": 5.874230430958663, "grad_norm": 0.29444838585426586, "learning_rate": 0.00016924644255638342, "loss": 2.726085662841797, "step": 10021, "token_acc": 0.3390521908250893 }, { "epoch": 5.874816769275872, "grad_norm": 0.26004467931772224, "learning_rate": 0.0001692394498563466, "loss": 2.700791835784912, "step": 10022, "token_acc": 0.34140784365335264 }, { "epoch": 5.8754031075930815, "grad_norm": 0.35153222034173465, "learning_rate": 0.00016923245650589847, "loss": 2.727527618408203, "step": 10023, "token_acc": 0.33672599572259115 }, { "epoch": 5.875989445910291, "grad_norm": 0.3663739715983905, "learning_rate": 0.00016922546250510472, "loss": 2.7064261436462402, "step": 10024, "token_acc": 0.34125639229685867 }, { "epoch": 5.8765757842275, "grad_norm": 0.25233248188302204, "learning_rate": 0.00016921846785403102, "loss": 2.7502636909484863, "step": 10025, "token_acc": 0.3335853920951666 }, { "epoch": 5.877162122544708, "grad_norm": 0.3661230261285711, "learning_rate": 0.0001692114725527431, "loss": 2.691458225250244, "step": 10026, "token_acc": 0.34192705826649566 }, { "epoch": 5.877748460861917, "grad_norm": 0.24771104007834868, "learning_rate": 0.0001692044766013066, "loss": 2.7181143760681152, "step": 10027, "token_acc": 0.33770751312125863 }, { "epoch": 5.878334799179126, "grad_norm": 0.30941000830892607, "learning_rate": 0.00016919747999978734, "loss": 2.7243120670318604, "step": 10028, "token_acc": 0.33832667903213975 }, { "epoch": 5.878921137496335, "grad_norm": 0.24378793971991228, "learning_rate": 0.000169190482748251, "loss": 2.744317054748535, "step": 10029, "token_acc": 0.3343541830222851 }, { "epoch": 5.879507475813544, "grad_norm": 0.29450228183821714, "learning_rate": 0.00016918348484676332, "loss": 2.727663993835449, "step": 10030, "token_acc": 0.337220224875248 }, { "epoch": 5.8800938141307535, "grad_norm": 0.24575377786513727, "learning_rate": 0.00016917648629539002, "loss": 2.7202882766723633, "step": 10031, "token_acc": 0.33815590356273006 }, { "epoch": 5.880680152447963, "grad_norm": 0.278284442438251, "learning_rate": 0.00016916948709419684, "loss": 2.690361976623535, "step": 10032, "token_acc": 0.34312333466690725 }, { "epoch": 5.881266490765172, "grad_norm": 0.24946675331438417, "learning_rate": 0.00016916248724324954, "loss": 2.710378408432007, "step": 10033, "token_acc": 0.3392705978161868 }, { "epoch": 5.881852829082381, "grad_norm": 0.2964444978003064, "learning_rate": 0.00016915548674261387, "loss": 2.6898016929626465, "step": 10034, "token_acc": 0.3432944167092522 }, { "epoch": 5.88243916739959, "grad_norm": 0.25498506091700623, "learning_rate": 0.0001691484855923556, "loss": 2.7475600242614746, "step": 10035, "token_acc": 0.3359395931658457 }, { "epoch": 5.883025505716798, "grad_norm": 0.2624255268620245, "learning_rate": 0.00016914148379254047, "loss": 2.6807546615600586, "step": 10036, "token_acc": 0.3458657612120471 }, { "epoch": 5.883611844034007, "grad_norm": 0.25074544928686404, "learning_rate": 0.00016913448134323427, "loss": 2.700540781021118, "step": 10037, "token_acc": 0.3412262433363459 }, { "epoch": 5.884198182351216, "grad_norm": 0.30626560982390727, "learning_rate": 0.00016912747824450276, "loss": 2.7350263595581055, "step": 10038, "token_acc": 0.3358779440968589 }, { "epoch": 5.8847845206684255, "grad_norm": 0.2683487849631449, "learning_rate": 0.0001691204744964118, "loss": 2.7285869121551514, "step": 10039, "token_acc": 0.336587305430735 }, { "epoch": 5.885370858985635, "grad_norm": 0.2541068544379682, "learning_rate": 0.00016911347009902707, "loss": 2.7220938205718994, "step": 10040, "token_acc": 0.33699998427829514 }, { "epoch": 5.885957197302844, "grad_norm": 0.26701081712120683, "learning_rate": 0.00016910646505241444, "loss": 2.714755058288574, "step": 10041, "token_acc": 0.3411365128444058 }, { "epoch": 5.886543535620053, "grad_norm": 0.2660290823455849, "learning_rate": 0.0001690994593566397, "loss": 2.7057929039001465, "step": 10042, "token_acc": 0.3422482336111616 }, { "epoch": 5.887129873937262, "grad_norm": 0.27057891232764697, "learning_rate": 0.00016909245301176861, "loss": 2.7170567512512207, "step": 10043, "token_acc": 0.33949803925677746 }, { "epoch": 5.887716212254471, "grad_norm": 0.253691041291685, "learning_rate": 0.00016908544601786706, "loss": 2.713286876678467, "step": 10044, "token_acc": 0.33850873429202005 }, { "epoch": 5.88830255057168, "grad_norm": 0.26975142116684775, "learning_rate": 0.00016907843837500085, "loss": 2.698566436767578, "step": 10045, "token_acc": 0.34244160536843465 }, { "epoch": 5.888888888888889, "grad_norm": 0.25655403369670665, "learning_rate": 0.0001690714300832358, "loss": 2.7141926288604736, "step": 10046, "token_acc": 0.33841881063634144 }, { "epoch": 5.889475227206098, "grad_norm": 0.2775470679048955, "learning_rate": 0.0001690644211426377, "loss": 2.6909971237182617, "step": 10047, "token_acc": 0.34290730653221085 }, { "epoch": 5.890061565523307, "grad_norm": 0.24186288697184866, "learning_rate": 0.00016905741155327246, "loss": 2.728443145751953, "step": 10048, "token_acc": 0.3360793724703607 }, { "epoch": 5.890647903840516, "grad_norm": 0.2658494822318302, "learning_rate": 0.00016905040131520588, "loss": 2.6868700981140137, "step": 10049, "token_acc": 0.34324313822001734 }, { "epoch": 5.891234242157725, "grad_norm": 0.24731428669705158, "learning_rate": 0.00016904339042850386, "loss": 2.6995363235473633, "step": 10050, "token_acc": 0.3438812381419344 }, { "epoch": 5.891820580474934, "grad_norm": 0.26846177166257823, "learning_rate": 0.00016903637889323218, "loss": 2.7287354469299316, "step": 10051, "token_acc": 0.33603048879726577 }, { "epoch": 5.892406918792143, "grad_norm": 0.2503188336907777, "learning_rate": 0.00016902936670945678, "loss": 2.719649076461792, "step": 10052, "token_acc": 0.3387959422137242 }, { "epoch": 5.892993257109352, "grad_norm": 0.28499320101761116, "learning_rate": 0.00016902235387724346, "loss": 2.723842144012451, "step": 10053, "token_acc": 0.3391266773270406 }, { "epoch": 5.893579595426561, "grad_norm": 0.26114462898386326, "learning_rate": 0.00016901534039665816, "loss": 2.697676658630371, "step": 10054, "token_acc": 0.34146936334931344 }, { "epoch": 5.89416593374377, "grad_norm": 0.2582907228033285, "learning_rate": 0.00016900832626776672, "loss": 2.697214126586914, "step": 10055, "token_acc": 0.3404737483688385 }, { "epoch": 5.8947522720609795, "grad_norm": 0.2775114090668468, "learning_rate": 0.00016900131149063507, "loss": 2.7080087661743164, "step": 10056, "token_acc": 0.3397652943938934 }, { "epoch": 5.895338610378188, "grad_norm": 0.26195127181940703, "learning_rate": 0.00016899429606532905, "loss": 2.7213408946990967, "step": 10057, "token_acc": 0.3380230583148901 }, { "epoch": 5.895924948695397, "grad_norm": 0.24399284525978315, "learning_rate": 0.0001689872799919146, "loss": 2.7128515243530273, "step": 10058, "token_acc": 0.34011451895774175 }, { "epoch": 5.896511287012606, "grad_norm": 0.2527571299463814, "learning_rate": 0.00016898026327045765, "loss": 2.6993370056152344, "step": 10059, "token_acc": 0.34091895163666164 }, { "epoch": 5.897097625329815, "grad_norm": 0.2455955128465418, "learning_rate": 0.00016897324590102404, "loss": 2.7303175926208496, "step": 10060, "token_acc": 0.3350498483335134 }, { "epoch": 5.897683963647024, "grad_norm": 0.2581313722605992, "learning_rate": 0.00016896622788367975, "loss": 2.6940836906433105, "step": 10061, "token_acc": 0.34191245064570297 }, { "epoch": 5.898270301964233, "grad_norm": 0.28236584875064186, "learning_rate": 0.00016895920921849067, "loss": 2.711979627609253, "step": 10062, "token_acc": 0.33907035010219555 }, { "epoch": 5.898856640281442, "grad_norm": 0.2548364117502969, "learning_rate": 0.00016895218990552272, "loss": 2.715607166290283, "step": 10063, "token_acc": 0.3397585628993401 }, { "epoch": 5.8994429785986515, "grad_norm": 0.2546606514810528, "learning_rate": 0.0001689451699448419, "loss": 2.724278211593628, "step": 10064, "token_acc": 0.33668514693626117 }, { "epoch": 5.900029316915861, "grad_norm": 0.3172609746865497, "learning_rate": 0.00016893814933651412, "loss": 2.7009623050689697, "step": 10065, "token_acc": 0.34205782922508426 }, { "epoch": 5.90061565523307, "grad_norm": 0.28790419764338393, "learning_rate": 0.00016893112808060527, "loss": 2.723097801208496, "step": 10066, "token_acc": 0.3378764518449995 }, { "epoch": 5.901201993550279, "grad_norm": 0.25280529544607333, "learning_rate": 0.0001689241061771814, "loss": 2.6836447715759277, "step": 10067, "token_acc": 0.34226113233922384 }, { "epoch": 5.901788331867488, "grad_norm": 0.33438953330389093, "learning_rate": 0.00016891708362630842, "loss": 2.741642475128174, "step": 10068, "token_acc": 0.33421431069998836 }, { "epoch": 5.902374670184696, "grad_norm": 0.25299918839933627, "learning_rate": 0.0001689100604280523, "loss": 2.743349075317383, "step": 10069, "token_acc": 0.3329881576945539 }, { "epoch": 5.902961008501905, "grad_norm": 0.2690771408670462, "learning_rate": 0.00016890303658247902, "loss": 2.7648682594299316, "step": 10070, "token_acc": 0.3312713595172364 }, { "epoch": 5.903547346819114, "grad_norm": 0.2610014029532568, "learning_rate": 0.00016889601208965456, "loss": 2.7135047912597656, "step": 10071, "token_acc": 0.33981160886858663 }, { "epoch": 5.9041336851363235, "grad_norm": 0.24999015687568105, "learning_rate": 0.00016888898694964488, "loss": 2.664006233215332, "step": 10072, "token_acc": 0.34749988943894816 }, { "epoch": 5.904720023453533, "grad_norm": 0.257429646063742, "learning_rate": 0.00016888196116251604, "loss": 2.719818353652954, "step": 10073, "token_acc": 0.3377865245978108 }, { "epoch": 5.905306361770742, "grad_norm": 0.26387080191662715, "learning_rate": 0.00016887493472833397, "loss": 2.7447457313537598, "step": 10074, "token_acc": 0.33444293757448734 }, { "epoch": 5.905892700087951, "grad_norm": 0.24807202646982135, "learning_rate": 0.0001688679076471647, "loss": 2.721515655517578, "step": 10075, "token_acc": 0.33652087884699017 }, { "epoch": 5.90647903840516, "grad_norm": 0.27162421528252745, "learning_rate": 0.00016886087991907424, "loss": 2.7095324993133545, "step": 10076, "token_acc": 0.3397269967298922 }, { "epoch": 5.907065376722369, "grad_norm": 0.25609099850570555, "learning_rate": 0.00016885385154412862, "loss": 2.680759906768799, "step": 10077, "token_acc": 0.3448195958154769 }, { "epoch": 5.907651715039578, "grad_norm": 0.24784251886823802, "learning_rate": 0.00016884682252239384, "loss": 2.741856098175049, "step": 10078, "token_acc": 0.3345811934347349 }, { "epoch": 5.908238053356786, "grad_norm": 0.2555031980846938, "learning_rate": 0.00016883979285393593, "loss": 2.675039291381836, "step": 10079, "token_acc": 0.34487673989602546 }, { "epoch": 5.9088243916739955, "grad_norm": 0.24819935423417258, "learning_rate": 0.00016883276253882093, "loss": 2.685575485229492, "step": 10080, "token_acc": 0.34459982854959154 }, { "epoch": 5.909410729991205, "grad_norm": 0.2676498754293093, "learning_rate": 0.00016882573157711486, "loss": 2.6943202018737793, "step": 10081, "token_acc": 0.3405362134491183 }, { "epoch": 5.909997068308414, "grad_norm": 0.2686884666601504, "learning_rate": 0.0001688186999688838, "loss": 2.768827199935913, "step": 10082, "token_acc": 0.3319729039286235 }, { "epoch": 5.910583406625623, "grad_norm": 0.2523587822239195, "learning_rate": 0.0001688116677141938, "loss": 2.7474024295806885, "step": 10083, "token_acc": 0.3340068013062398 }, { "epoch": 5.911169744942832, "grad_norm": 0.2654585892112961, "learning_rate": 0.0001688046348131109, "loss": 2.7422313690185547, "step": 10084, "token_acc": 0.3355551605640479 }, { "epoch": 5.911756083260041, "grad_norm": 0.2584848805150892, "learning_rate": 0.0001687976012657012, "loss": 2.7703118324279785, "step": 10085, "token_acc": 0.3311910221908282 }, { "epoch": 5.91234242157725, "grad_norm": 0.2544015870909013, "learning_rate": 0.00016879056707203068, "loss": 2.7154178619384766, "step": 10086, "token_acc": 0.3379070265079357 }, { "epoch": 5.912928759894459, "grad_norm": 0.2611113798033157, "learning_rate": 0.0001687835322321655, "loss": 2.7297258377075195, "step": 10087, "token_acc": 0.3378544941492403 }, { "epoch": 5.913515098211668, "grad_norm": 0.24706921966583278, "learning_rate": 0.00016877649674617174, "loss": 2.7041428089141846, "step": 10088, "token_acc": 0.34071838353377365 }, { "epoch": 5.9141014365288775, "grad_norm": 0.2561260248175171, "learning_rate": 0.00016876946061411546, "loss": 2.724003314971924, "step": 10089, "token_acc": 0.33762091019609913 }, { "epoch": 5.914687774846087, "grad_norm": 0.24707000149635414, "learning_rate": 0.00016876242383606277, "loss": 2.734863519668579, "step": 10090, "token_acc": 0.33636622268201216 }, { "epoch": 5.915274113163295, "grad_norm": 0.249752896429239, "learning_rate": 0.00016875538641207975, "loss": 2.721538543701172, "step": 10091, "token_acc": 0.33632850752984145 }, { "epoch": 5.915860451480504, "grad_norm": 0.2612057511735482, "learning_rate": 0.00016874834834223256, "loss": 2.6812188625335693, "step": 10092, "token_acc": 0.3451552663297347 }, { "epoch": 5.916446789797713, "grad_norm": 0.2843336172093094, "learning_rate": 0.00016874130962658726, "loss": 2.7335152626037598, "step": 10093, "token_acc": 0.33540852236246455 }, { "epoch": 5.917033128114922, "grad_norm": 0.28123713903239245, "learning_rate": 0.00016873427026520998, "loss": 2.7167530059814453, "step": 10094, "token_acc": 0.33933930053012074 }, { "epoch": 5.917619466432131, "grad_norm": 0.25441452740695014, "learning_rate": 0.00016872723025816683, "loss": 2.6968724727630615, "step": 10095, "token_acc": 0.34167201687608917 }, { "epoch": 5.91820580474934, "grad_norm": 0.25176451116859444, "learning_rate": 0.000168720189605524, "loss": 2.705535650253296, "step": 10096, "token_acc": 0.34005191385106875 }, { "epoch": 5.9187921430665495, "grad_norm": 0.26072492490265464, "learning_rate": 0.00016871314830734757, "loss": 2.7111315727233887, "step": 10097, "token_acc": 0.3392982410389475 }, { "epoch": 5.919378481383759, "grad_norm": 0.2767946720655263, "learning_rate": 0.0001687061063637037, "loss": 2.727653980255127, "step": 10098, "token_acc": 0.3377626509118931 }, { "epoch": 5.919964819700968, "grad_norm": 0.26826580300472896, "learning_rate": 0.00016869906377465856, "loss": 2.7062578201293945, "step": 10099, "token_acc": 0.34077583669557016 }, { "epoch": 5.920551158018176, "grad_norm": 0.2633998971499819, "learning_rate": 0.0001686920205402783, "loss": 2.7585015296936035, "step": 10100, "token_acc": 0.33199755428495886 }, { "epoch": 5.921137496335385, "grad_norm": 0.252971193379832, "learning_rate": 0.00016868497666062903, "loss": 2.749950885772705, "step": 10101, "token_acc": 0.3347576951642185 }, { "epoch": 5.921723834652594, "grad_norm": 0.3218610297108547, "learning_rate": 0.00016867793213577698, "loss": 2.6981067657470703, "step": 10102, "token_acc": 0.341605800992275 }, { "epoch": 5.922310172969803, "grad_norm": 0.38947634568837347, "learning_rate": 0.0001686708869657883, "loss": 2.733358860015869, "step": 10103, "token_acc": 0.33697598914481636 }, { "epoch": 5.9228965112870124, "grad_norm": 0.3371059251436706, "learning_rate": 0.00016866384115072917, "loss": 2.7359580993652344, "step": 10104, "token_acc": 0.336489905845813 }, { "epoch": 5.923482849604222, "grad_norm": 0.2743430984718241, "learning_rate": 0.0001686567946906658, "loss": 2.7520432472229004, "step": 10105, "token_acc": 0.33413284558810324 }, { "epoch": 5.924069187921431, "grad_norm": 0.3126853672817783, "learning_rate": 0.00016864974758566434, "loss": 2.7216532230377197, "step": 10106, "token_acc": 0.3373539012993663 }, { "epoch": 5.92465552623864, "grad_norm": 0.26192901266594687, "learning_rate": 0.000168642699835791, "loss": 2.7034573554992676, "step": 10107, "token_acc": 0.3406948537836108 }, { "epoch": 5.925241864555849, "grad_norm": 0.2706106850519992, "learning_rate": 0.000168635651441112, "loss": 2.7299089431762695, "step": 10108, "token_acc": 0.3362670817104227 }, { "epoch": 5.925828202873058, "grad_norm": 0.2611442056301714, "learning_rate": 0.00016862860240169356, "loss": 2.738332509994507, "step": 10109, "token_acc": 0.33614985293918326 }, { "epoch": 5.926414541190267, "grad_norm": 0.2541051735472812, "learning_rate": 0.00016862155271760187, "loss": 2.7286152839660645, "step": 10110, "token_acc": 0.3371387562449971 }, { "epoch": 5.927000879507476, "grad_norm": 0.27289537558375987, "learning_rate": 0.00016861450238890314, "loss": 2.6953213214874268, "step": 10111, "token_acc": 0.3430040074420237 }, { "epoch": 5.927587217824685, "grad_norm": 0.2504841583694696, "learning_rate": 0.00016860745141566365, "loss": 2.7200679779052734, "step": 10112, "token_acc": 0.33947174996804297 }, { "epoch": 5.928173556141894, "grad_norm": 0.23989532943745695, "learning_rate": 0.00016860039979794958, "loss": 2.68721866607666, "step": 10113, "token_acc": 0.34244721169463993 }, { "epoch": 5.928759894459103, "grad_norm": 0.24303221104789188, "learning_rate": 0.00016859334753582724, "loss": 2.7521440982818604, "step": 10114, "token_acc": 0.3336711281567576 }, { "epoch": 5.929346232776312, "grad_norm": 0.24885470575374802, "learning_rate": 0.00016858629462936277, "loss": 2.745771884918213, "step": 10115, "token_acc": 0.336010983460049 }, { "epoch": 5.929932571093521, "grad_norm": 0.2581521681386411, "learning_rate": 0.00016857924107862248, "loss": 2.704254150390625, "step": 10116, "token_acc": 0.3404411349918204 }, { "epoch": 5.93051890941073, "grad_norm": 0.26578524238338724, "learning_rate": 0.00016857218688367268, "loss": 2.7237706184387207, "step": 10117, "token_acc": 0.33910852425349725 }, { "epoch": 5.931105247727939, "grad_norm": 0.2522166406505705, "learning_rate": 0.00016856513204457959, "loss": 2.7263214588165283, "step": 10118, "token_acc": 0.3374403567678826 }, { "epoch": 5.931691586045148, "grad_norm": 0.2530443232199545, "learning_rate": 0.00016855807656140941, "loss": 2.722933769226074, "step": 10119, "token_acc": 0.33871392072044726 }, { "epoch": 5.932277924362357, "grad_norm": 0.27053339127944986, "learning_rate": 0.00016855102043422852, "loss": 2.7333338260650635, "step": 10120, "token_acc": 0.33559873377390875 }, { "epoch": 5.9328642626795665, "grad_norm": 0.2537423696113559, "learning_rate": 0.00016854396366310317, "loss": 2.7365479469299316, "step": 10121, "token_acc": 0.3346153238255299 }, { "epoch": 5.933450600996775, "grad_norm": 0.25255995469474407, "learning_rate": 0.00016853690624809965, "loss": 2.692580461502075, "step": 10122, "token_acc": 0.3423083300383914 }, { "epoch": 5.934036939313984, "grad_norm": 0.25241324414005767, "learning_rate": 0.00016852984818928427, "loss": 2.702218532562256, "step": 10123, "token_acc": 0.34203271930954526 }, { "epoch": 5.934623277631193, "grad_norm": 0.24392493538590038, "learning_rate": 0.00016852278948672328, "loss": 2.733661413192749, "step": 10124, "token_acc": 0.33667951084032816 }, { "epoch": 5.935209615948402, "grad_norm": 0.24434229797482795, "learning_rate": 0.00016851573014048304, "loss": 2.6804187297821045, "step": 10125, "token_acc": 0.34484381146813875 }, { "epoch": 5.935795954265611, "grad_norm": 0.24714498283598646, "learning_rate": 0.0001685086701506298, "loss": 2.6839091777801514, "step": 10126, "token_acc": 0.34314231005417956 }, { "epoch": 5.93638229258282, "grad_norm": 0.2604206156321687, "learning_rate": 0.00016850160951722995, "loss": 2.6907825469970703, "step": 10127, "token_acc": 0.3426993880640688 }, { "epoch": 5.936968630900029, "grad_norm": 0.25348794327519003, "learning_rate": 0.0001684945482403498, "loss": 2.722682476043701, "step": 10128, "token_acc": 0.33785346672063843 }, { "epoch": 5.9375549692172385, "grad_norm": 0.2663427991611868, "learning_rate": 0.00016848748632005567, "loss": 2.717118263244629, "step": 10129, "token_acc": 0.3382822151796745 }, { "epoch": 5.938141307534448, "grad_norm": 0.2592244041794843, "learning_rate": 0.00016848042375641387, "loss": 2.698159694671631, "step": 10130, "token_acc": 0.34121336067256647 }, { "epoch": 5.938727645851657, "grad_norm": 0.26502909876679615, "learning_rate": 0.00016847336054949077, "loss": 2.7254021167755127, "step": 10131, "token_acc": 0.33753504314402666 }, { "epoch": 5.939313984168866, "grad_norm": 0.3027665784760766, "learning_rate": 0.0001684662966993527, "loss": 2.7336504459381104, "step": 10132, "token_acc": 0.33650704678567445 }, { "epoch": 5.939900322486075, "grad_norm": 0.2943076997509595, "learning_rate": 0.00016845923220606607, "loss": 2.738840341567993, "step": 10133, "token_acc": 0.33701679740275453 }, { "epoch": 5.940486660803283, "grad_norm": 0.23510233266352326, "learning_rate": 0.0001684521670696972, "loss": 2.6735754013061523, "step": 10134, "token_acc": 0.3450651931211783 }, { "epoch": 5.941072999120492, "grad_norm": 0.28858344536940916, "learning_rate": 0.00016844510129031243, "loss": 2.7069296836853027, "step": 10135, "token_acc": 0.3413103807813627 }, { "epoch": 5.941659337437701, "grad_norm": 0.29836664348950076, "learning_rate": 0.0001684380348679782, "loss": 2.6930034160614014, "step": 10136, "token_acc": 0.342329442658203 }, { "epoch": 5.9422456757549105, "grad_norm": 0.24096952179874076, "learning_rate": 0.00016843096780276082, "loss": 2.750548839569092, "step": 10137, "token_acc": 0.33375187697405895 }, { "epoch": 5.94283201407212, "grad_norm": 0.2648365452386403, "learning_rate": 0.00016842390009472674, "loss": 2.7154617309570312, "step": 10138, "token_acc": 0.33912107303884664 }, { "epoch": 5.943418352389329, "grad_norm": 0.2731096608202849, "learning_rate": 0.00016841683174394228, "loss": 2.6951241493225098, "step": 10139, "token_acc": 0.3432894149173738 }, { "epoch": 5.944004690706538, "grad_norm": 0.2468093132454965, "learning_rate": 0.0001684097627504739, "loss": 2.680095911026001, "step": 10140, "token_acc": 0.3441169368227233 }, { "epoch": 5.944591029023747, "grad_norm": 0.23566015728538875, "learning_rate": 0.000168402693114388, "loss": 2.727893352508545, "step": 10141, "token_acc": 0.3377867698626128 }, { "epoch": 5.945177367340956, "grad_norm": 0.2659399442860299, "learning_rate": 0.00016839562283575097, "loss": 2.7441470623016357, "step": 10142, "token_acc": 0.3349252364561787 }, { "epoch": 5.945763705658165, "grad_norm": 0.2714657672663293, "learning_rate": 0.00016838855191462918, "loss": 2.703430414199829, "step": 10143, "token_acc": 0.3400894624091793 }, { "epoch": 5.946350043975373, "grad_norm": 0.24435987325458336, "learning_rate": 0.00016838148035108917, "loss": 2.699824333190918, "step": 10144, "token_acc": 0.3393396688442198 }, { "epoch": 5.9469363822925825, "grad_norm": 0.24733412993940038, "learning_rate": 0.00016837440814519724, "loss": 2.717195510864258, "step": 10145, "token_acc": 0.3383077331861708 }, { "epoch": 5.947522720609792, "grad_norm": 0.2515290675540613, "learning_rate": 0.0001683673352970199, "loss": 2.7203683853149414, "step": 10146, "token_acc": 0.3375116699467514 }, { "epoch": 5.948109058927001, "grad_norm": 0.2646140643081045, "learning_rate": 0.00016836026180662357, "loss": 2.728562355041504, "step": 10147, "token_acc": 0.33753807721901913 }, { "epoch": 5.94869539724421, "grad_norm": 0.2498993709438596, "learning_rate": 0.0001683531876740747, "loss": 2.753882884979248, "step": 10148, "token_acc": 0.3342149123512168 }, { "epoch": 5.949281735561419, "grad_norm": 0.2594515677619726, "learning_rate": 0.0001683461128994398, "loss": 2.735823631286621, "step": 10149, "token_acc": 0.335089879340064 }, { "epoch": 5.949868073878628, "grad_norm": 0.23983166408656292, "learning_rate": 0.00016833903748278517, "loss": 2.699150562286377, "step": 10150, "token_acc": 0.34081424230580104 }, { "epoch": 5.950454412195837, "grad_norm": 0.26590181742368757, "learning_rate": 0.0001683319614241774, "loss": 2.707749128341675, "step": 10151, "token_acc": 0.34069754168958255 }, { "epoch": 5.951040750513046, "grad_norm": 0.2631816576989613, "learning_rate": 0.00016832488472368296, "loss": 2.7378439903259277, "step": 10152, "token_acc": 0.3353515922696478 }, { "epoch": 5.951627088830255, "grad_norm": 0.2568349334883304, "learning_rate": 0.00016831780738136827, "loss": 2.724374294281006, "step": 10153, "token_acc": 0.33815388002893904 }, { "epoch": 5.9522134271474645, "grad_norm": 0.25290173775604213, "learning_rate": 0.00016831072939729985, "loss": 2.75616455078125, "step": 10154, "token_acc": 0.3337582123407342 }, { "epoch": 5.952799765464674, "grad_norm": 0.2510819112805984, "learning_rate": 0.00016830365077154415, "loss": 2.746598720550537, "step": 10155, "token_acc": 0.3346535841027553 }, { "epoch": 5.953386103781882, "grad_norm": 0.2570786773194706, "learning_rate": 0.0001682965715041677, "loss": 2.715606927871704, "step": 10156, "token_acc": 0.3382761914033209 }, { "epoch": 5.953972442099091, "grad_norm": 0.25310745334052026, "learning_rate": 0.00016828949159523703, "loss": 2.718245029449463, "step": 10157, "token_acc": 0.33891849918916495 }, { "epoch": 5.9545587804163, "grad_norm": 0.2643569262687907, "learning_rate": 0.00016828241104481858, "loss": 2.7268900871276855, "step": 10158, "token_acc": 0.3371385420599685 }, { "epoch": 5.955145118733509, "grad_norm": 0.27702447603389446, "learning_rate": 0.0001682753298529789, "loss": 2.727816581726074, "step": 10159, "token_acc": 0.33719018124850864 }, { "epoch": 5.955731457050718, "grad_norm": 0.295007505266152, "learning_rate": 0.0001682682480197845, "loss": 2.742077589035034, "step": 10160, "token_acc": 0.3336946370081951 }, { "epoch": 5.956317795367927, "grad_norm": 0.2786256298859136, "learning_rate": 0.00016826116554530187, "loss": 2.7045798301696777, "step": 10161, "token_acc": 0.3411555187192144 }, { "epoch": 5.9569041336851365, "grad_norm": 0.2542391678454758, "learning_rate": 0.0001682540824295976, "loss": 2.7173380851745605, "step": 10162, "token_acc": 0.33700387382108304 }, { "epoch": 5.957490472002346, "grad_norm": 0.2677545372647092, "learning_rate": 0.0001682469986727382, "loss": 2.689178943634033, "step": 10163, "token_acc": 0.3434429090345015 }, { "epoch": 5.958076810319555, "grad_norm": 0.3021554761012849, "learning_rate": 0.00016823991427479017, "loss": 2.7516028881073, "step": 10164, "token_acc": 0.3331225143910727 }, { "epoch": 5.958663148636763, "grad_norm": 0.27981576317063545, "learning_rate": 0.00016823282923582019, "loss": 2.751741647720337, "step": 10165, "token_acc": 0.3335085809696918 }, { "epoch": 5.959249486953972, "grad_norm": 0.26273494458440705, "learning_rate": 0.00016822574355589465, "loss": 2.7154407501220703, "step": 10166, "token_acc": 0.33911059166832846 }, { "epoch": 5.959835825271181, "grad_norm": 0.2986589720688196, "learning_rate": 0.00016821865723508022, "loss": 2.7141833305358887, "step": 10167, "token_acc": 0.33851671780974774 }, { "epoch": 5.96042216358839, "grad_norm": 0.3029207539838716, "learning_rate": 0.0001682115702734434, "loss": 2.704883575439453, "step": 10168, "token_acc": 0.3398980263934775 }, { "epoch": 5.961008501905599, "grad_norm": 0.25979838496000174, "learning_rate": 0.0001682044826710508, "loss": 2.704939126968384, "step": 10169, "token_acc": 0.34141642824132973 }, { "epoch": 5.9615948402228085, "grad_norm": 0.395455216654233, "learning_rate": 0.00016819739442796902, "loss": 2.756603717803955, "step": 10170, "token_acc": 0.3303579438413923 }, { "epoch": 5.962181178540018, "grad_norm": 0.29430979921994643, "learning_rate": 0.00016819030554426459, "loss": 2.71714448928833, "step": 10171, "token_acc": 0.33845494645307483 }, { "epoch": 5.962767516857227, "grad_norm": 0.2950663206671342, "learning_rate": 0.00016818321602000413, "loss": 2.7282814979553223, "step": 10172, "token_acc": 0.3377189158696377 }, { "epoch": 5.963353855174436, "grad_norm": 0.32582751939322346, "learning_rate": 0.00016817612585525425, "loss": 2.7257494926452637, "step": 10173, "token_acc": 0.338313277181387 }, { "epoch": 5.963940193491645, "grad_norm": 0.26229508108503635, "learning_rate": 0.0001681690350500815, "loss": 2.739900827407837, "step": 10174, "token_acc": 0.3351960028580282 }, { "epoch": 5.964526531808854, "grad_norm": 0.316062294610996, "learning_rate": 0.00016816194360455255, "loss": 2.7042412757873535, "step": 10175, "token_acc": 0.33954629191566515 }, { "epoch": 5.965112870126063, "grad_norm": 0.24812786199407416, "learning_rate": 0.000168154851518734, "loss": 2.7209177017211914, "step": 10176, "token_acc": 0.3394989617487773 }, { "epoch": 5.965699208443271, "grad_norm": 0.3730941020562639, "learning_rate": 0.00016814775879269247, "loss": 2.7558434009552, "step": 10177, "token_acc": 0.3322239895591174 }, { "epoch": 5.9662855467604805, "grad_norm": 0.2582665561513139, "learning_rate": 0.00016814066542649453, "loss": 2.7265844345092773, "step": 10178, "token_acc": 0.3356349214731572 }, { "epoch": 5.96687188507769, "grad_norm": 0.3161310886194945, "learning_rate": 0.00016813357142020689, "loss": 2.7031235694885254, "step": 10179, "token_acc": 0.3423281820868433 }, { "epoch": 5.967458223394899, "grad_norm": 0.2545084193008516, "learning_rate": 0.00016812647677389616, "loss": 2.649871826171875, "step": 10180, "token_acc": 0.3487021071794765 }, { "epoch": 5.968044561712108, "grad_norm": 0.32581494917799214, "learning_rate": 0.00016811938148762897, "loss": 2.7154979705810547, "step": 10181, "token_acc": 0.3380478176340511 }, { "epoch": 5.968630900029317, "grad_norm": 0.2659309235677399, "learning_rate": 0.00016811228556147198, "loss": 2.7338480949401855, "step": 10182, "token_acc": 0.3361925209763037 }, { "epoch": 5.969217238346526, "grad_norm": 0.3053709233765362, "learning_rate": 0.00016810518899549188, "loss": 2.7382726669311523, "step": 10183, "token_acc": 0.335295700285298 }, { "epoch": 5.969803576663735, "grad_norm": 0.24617108457007933, "learning_rate": 0.00016809809178975526, "loss": 2.712703227996826, "step": 10184, "token_acc": 0.34116178867473895 }, { "epoch": 5.970389914980944, "grad_norm": 0.2840253339153727, "learning_rate": 0.00016809099394432883, "loss": 2.7629711627960205, "step": 10185, "token_acc": 0.3304271246147072 }, { "epoch": 5.970976253298153, "grad_norm": 0.2578011520710332, "learning_rate": 0.0001680838954592793, "loss": 2.7712347507476807, "step": 10186, "token_acc": 0.3316614953032686 }, { "epoch": 5.971562591615362, "grad_norm": 0.2797721129236242, "learning_rate": 0.0001680767963346733, "loss": 2.7358903884887695, "step": 10187, "token_acc": 0.3354225150676748 }, { "epoch": 5.972148929932571, "grad_norm": 0.26448160579548713, "learning_rate": 0.00016806969657057755, "loss": 2.7082791328430176, "step": 10188, "token_acc": 0.34017997585689724 }, { "epoch": 5.97273526824978, "grad_norm": 0.28639745722024423, "learning_rate": 0.00016806259616705872, "loss": 2.6926002502441406, "step": 10189, "token_acc": 0.34232629509111445 }, { "epoch": 5.973321606566989, "grad_norm": 0.25117330132637294, "learning_rate": 0.00016805549512418348, "loss": 2.7002205848693848, "step": 10190, "token_acc": 0.3402950698007703 }, { "epoch": 5.973907944884198, "grad_norm": 0.26488006377207257, "learning_rate": 0.0001680483934420186, "loss": 2.7291829586029053, "step": 10191, "token_acc": 0.33692652684342433 }, { "epoch": 5.974494283201407, "grad_norm": 0.2484832530770182, "learning_rate": 0.00016804129112063076, "loss": 2.7229056358337402, "step": 10192, "token_acc": 0.33814306863511123 }, { "epoch": 5.975080621518616, "grad_norm": 0.2508066362067838, "learning_rate": 0.00016803418816008667, "loss": 2.7284324169158936, "step": 10193, "token_acc": 0.3374094362961654 }, { "epoch": 5.975666959835825, "grad_norm": 0.24051638550463983, "learning_rate": 0.00016802708456045305, "loss": 2.7369537353515625, "step": 10194, "token_acc": 0.3355770618210907 }, { "epoch": 5.9762532981530345, "grad_norm": 0.24410167997947352, "learning_rate": 0.00016801998032179663, "loss": 2.72306752204895, "step": 10195, "token_acc": 0.33823716420282945 }, { "epoch": 5.976839636470244, "grad_norm": 0.24563474651349018, "learning_rate": 0.00016801287544418418, "loss": 2.742371082305908, "step": 10196, "token_acc": 0.33539072633367345 }, { "epoch": 5.977425974787453, "grad_norm": 0.25293949088856804, "learning_rate": 0.00016800576992768242, "loss": 2.742272138595581, "step": 10197, "token_acc": 0.33645290090167707 }, { "epoch": 5.978012313104662, "grad_norm": 0.2694217609201809, "learning_rate": 0.00016799866377235808, "loss": 2.7645301818847656, "step": 10198, "token_acc": 0.3323620306724383 }, { "epoch": 5.97859865142187, "grad_norm": 0.24555958465568534, "learning_rate": 0.0001679915569782779, "loss": 2.727877140045166, "step": 10199, "token_acc": 0.33814461196160267 }, { "epoch": 5.979184989739079, "grad_norm": 0.31705340735516746, "learning_rate": 0.00016798444954550868, "loss": 2.757765054702759, "step": 10200, "token_acc": 0.33307569338559995 }, { "epoch": 5.979771328056288, "grad_norm": 0.3158558866135394, "learning_rate": 0.0001679773414741172, "loss": 2.6718242168426514, "step": 10201, "token_acc": 0.3452937102394126 }, { "epoch": 5.980357666373497, "grad_norm": 0.24459783158543644, "learning_rate": 0.00016797023276417017, "loss": 2.762197494506836, "step": 10202, "token_acc": 0.33071679405081594 }, { "epoch": 5.9809440046907065, "grad_norm": 0.35245369449167063, "learning_rate": 0.00016796312341573434, "loss": 2.7274842262268066, "step": 10203, "token_acc": 0.33664886359811835 }, { "epoch": 5.981530343007916, "grad_norm": 0.3059113994481763, "learning_rate": 0.00016795601342887664, "loss": 2.7295093536376953, "step": 10204, "token_acc": 0.3367064850066305 }, { "epoch": 5.982116681325125, "grad_norm": 0.2800803667709282, "learning_rate": 0.0001679489028036637, "loss": 2.7052507400512695, "step": 10205, "token_acc": 0.3412273052645279 }, { "epoch": 5.982703019642334, "grad_norm": 0.3617578018394989, "learning_rate": 0.00016794179154016242, "loss": 2.7270894050598145, "step": 10206, "token_acc": 0.33678452723231583 }, { "epoch": 5.983289357959543, "grad_norm": 0.2446705153648211, "learning_rate": 0.00016793467963843953, "loss": 2.7204036712646484, "step": 10207, "token_acc": 0.3384542896407303 }, { "epoch": 5.983875696276751, "grad_norm": 0.2962658203937414, "learning_rate": 0.00016792756709856188, "loss": 2.753791570663452, "step": 10208, "token_acc": 0.33443397290771926 }, { "epoch": 5.98446203459396, "grad_norm": 0.2392133190442609, "learning_rate": 0.00016792045392059627, "loss": 2.73746919631958, "step": 10209, "token_acc": 0.3362744608298081 }, { "epoch": 5.985048372911169, "grad_norm": 0.27775266005779287, "learning_rate": 0.00016791334010460952, "loss": 2.734940528869629, "step": 10210, "token_acc": 0.3354403208942055 }, { "epoch": 5.9856347112283785, "grad_norm": 0.26276167013246793, "learning_rate": 0.00016790622565066848, "loss": 2.7292447090148926, "step": 10211, "token_acc": 0.3374857438949148 }, { "epoch": 5.986221049545588, "grad_norm": 0.2687631395065838, "learning_rate": 0.00016789911055883994, "loss": 2.7581610679626465, "step": 10212, "token_acc": 0.3334221451944592 }, { "epoch": 5.986807387862797, "grad_norm": 0.2501641565992109, "learning_rate": 0.00016789199482919071, "loss": 2.7086219787597656, "step": 10213, "token_acc": 0.339484948785041 }, { "epoch": 5.987393726180006, "grad_norm": 0.2799095182632129, "learning_rate": 0.00016788487846178772, "loss": 2.7407708168029785, "step": 10214, "token_acc": 0.33759326194651984 }, { "epoch": 5.987980064497215, "grad_norm": 0.26749523677276577, "learning_rate": 0.00016787776145669775, "loss": 2.722445011138916, "step": 10215, "token_acc": 0.33732403250733306 }, { "epoch": 5.988566402814424, "grad_norm": 0.2486040432261087, "learning_rate": 0.0001678706438139877, "loss": 2.7272770404815674, "step": 10216, "token_acc": 0.337614079660969 }, { "epoch": 5.989152741131633, "grad_norm": 0.27012289743636514, "learning_rate": 0.0001678635255337244, "loss": 2.744518995285034, "step": 10217, "token_acc": 0.3362216102449189 }, { "epoch": 5.989739079448842, "grad_norm": 0.25075233346649883, "learning_rate": 0.00016785640661597467, "loss": 2.710411548614502, "step": 10218, "token_acc": 0.34014015896523714 }, { "epoch": 5.990325417766051, "grad_norm": 0.2751869613546627, "learning_rate": 0.00016784928706080552, "loss": 2.7205638885498047, "step": 10219, "token_acc": 0.3381181954079432 }, { "epoch": 5.99091175608326, "grad_norm": 0.2510417853898608, "learning_rate": 0.0001678421668682837, "loss": 2.706225872039795, "step": 10220, "token_acc": 0.34136128655702164 }, { "epoch": 5.991498094400469, "grad_norm": 0.2627004683909609, "learning_rate": 0.00016783504603847614, "loss": 2.705753803253174, "step": 10221, "token_acc": 0.3402494442180625 }, { "epoch": 5.992084432717678, "grad_norm": 0.2561212305433454, "learning_rate": 0.0001678279245714498, "loss": 2.7705225944519043, "step": 10222, "token_acc": 0.3310022175901504 }, { "epoch": 5.992670771034887, "grad_norm": 0.268456607013711, "learning_rate": 0.00016782080246727143, "loss": 2.6961536407470703, "step": 10223, "token_acc": 0.34109681752023185 }, { "epoch": 5.993257109352096, "grad_norm": 0.2481218322257767, "learning_rate": 0.00016781367972600804, "loss": 2.6990301609039307, "step": 10224, "token_acc": 0.34103218169247207 }, { "epoch": 5.993843447669305, "grad_norm": 0.2596926162538201, "learning_rate": 0.0001678065563477265, "loss": 2.711716413497925, "step": 10225, "token_acc": 0.3403301953832597 }, { "epoch": 5.994429785986514, "grad_norm": 0.264815682982942, "learning_rate": 0.00016779943233249372, "loss": 2.745940685272217, "step": 10226, "token_acc": 0.33574427455955635 }, { "epoch": 5.995016124303723, "grad_norm": 0.25475044965603727, "learning_rate": 0.00016779230768037667, "loss": 2.7270400524139404, "step": 10227, "token_acc": 0.33782008491095034 }, { "epoch": 5.9956024626209325, "grad_norm": 0.2866190072061432, "learning_rate": 0.00016778518239144222, "loss": 2.698831081390381, "step": 10228, "token_acc": 0.3416493743318871 }, { "epoch": 5.996188800938142, "grad_norm": 0.27933860036912594, "learning_rate": 0.0001677780564657573, "loss": 2.727341651916504, "step": 10229, "token_acc": 0.33891827585854767 }, { "epoch": 5.99677513925535, "grad_norm": 0.2560384106467328, "learning_rate": 0.00016777092990338888, "loss": 2.720071792602539, "step": 10230, "token_acc": 0.33778281933362697 }, { "epoch": 5.997361477572559, "grad_norm": 0.26400750208288487, "learning_rate": 0.00016776380270440395, "loss": 2.7431836128234863, "step": 10231, "token_acc": 0.33481992703110036 }, { "epoch": 5.997947815889768, "grad_norm": 0.26428567522865265, "learning_rate": 0.00016775667486886936, "loss": 2.7432212829589844, "step": 10232, "token_acc": 0.33454362518865244 }, { "epoch": 5.998534154206977, "grad_norm": 0.2566944884692843, "learning_rate": 0.00016774954639685213, "loss": 2.712904453277588, "step": 10233, "token_acc": 0.3389582481905791 }, { "epoch": 5.999120492524186, "grad_norm": 0.24888684863633628, "learning_rate": 0.00016774241728841917, "loss": 2.7438740730285645, "step": 10234, "token_acc": 0.3357275316071155 }, { "epoch": 5.999706830841395, "grad_norm": 0.28585631802949457, "learning_rate": 0.00016773528754363755, "loss": 2.7697067260742188, "step": 10235, "token_acc": 0.33190997664064525 }, { "epoch": 6.0, "grad_norm": 0.33256047208254136, "learning_rate": 0.00016772815716257412, "loss": 2.7731783390045166, "step": 10236, "token_acc": 0.3298436325828933 }, { "epoch": 6.0, "eval_loss": 3.1927804946899414, "eval_runtime": 16.7898, "eval_samples_per_second": 15.247, "eval_steps_per_second": 1.906, "eval_token_acc": 0.2776745537640563, "step": 10236 } ], "logging_steps": 1, "max_steps": 34120, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": -34120, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7329911390339072.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }