{ "best_global_step": 11942, "best_metric": 3.06417274, "best_model_checkpoint": "/inspire/hdd/project/deepanalysis/guitao-25013/Muse/workspace/Finals/ckpt/Muse_0.6b_main_5e-4/v2-20251228-192522/checkpoint-11942", "epoch": 7.0, "eval_steps": 500, "global_step": 11942, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005863383172090296, "grad_norm": 406.2271101390586, "learning_rate": 2.9308323563892146e-07, "loss": 20.440696716308594, "step": 1, "token_acc": 0.005169762453967869 }, { "epoch": 0.0011726766344180592, "grad_norm": 412.16447882895346, "learning_rate": 5.861664712778429e-07, "loss": 20.451984405517578, "step": 2, "token_acc": 0.005461987590491526 }, { "epoch": 0.001759014951627089, "grad_norm": 418.20941122468446, "learning_rate": 8.792497069167644e-07, "loss": 20.480960845947266, "step": 3, "token_acc": 0.005607774672857289 }, { "epoch": 0.0023453532688361184, "grad_norm": 410.87151701566614, "learning_rate": 1.1723329425556858e-06, "loss": 20.430644989013672, "step": 4, "token_acc": 0.005623277022171703 }, { "epoch": 0.002931691586045148, "grad_norm": 409.3267148139974, "learning_rate": 1.4654161781946073e-06, "loss": 20.39999771118164, "step": 5, "token_acc": 0.005599030271045839 }, { "epoch": 0.003518029903254178, "grad_norm": 388.1571609711306, "learning_rate": 1.7584994138335288e-06, "loss": 20.17621612548828, "step": 6, "token_acc": 0.0054469173645299825 }, { "epoch": 0.004104368220463207, "grad_norm": 313.74906069914255, "learning_rate": 2.0515826494724504e-06, "loss": 19.58666229248047, "step": 7, "token_acc": 0.005114621384660843 }, { "epoch": 0.004690706537672237, "grad_norm": 281.5294972237458, "learning_rate": 2.3446658851113717e-06, "loss": 19.404088973999023, "step": 8, "token_acc": 0.005057536731744876 }, { "epoch": 0.005277044854881266, "grad_norm": 146.13008801719596, "learning_rate": 2.637749120750293e-06, "loss": 18.61431121826172, "step": 9, "token_acc": 0.005126710240583648 }, { "epoch": 0.005863383172090296, "grad_norm": 140.23502708044148, "learning_rate": 2.9308323563892146e-06, "loss": 18.53695297241211, "step": 10, "token_acc": 0.00499539431729611 }, { "epoch": 0.006449721489299325, "grad_norm": 127.48319880970146, "learning_rate": 3.2239155920281363e-06, "loss": 18.369373321533203, "step": 11, "token_acc": 0.005458429543062689 }, { "epoch": 0.007036059806508356, "grad_norm": 91.25765890361949, "learning_rate": 3.5169988276670575e-06, "loss": 17.680696487426758, "step": 12, "token_acc": 0.00642902841568142 }, { "epoch": 0.007622398123717385, "grad_norm": 84.66330309793499, "learning_rate": 3.810082063305979e-06, "loss": 17.540504455566406, "step": 13, "token_acc": 0.006386651475459239 }, { "epoch": 0.008208736440926415, "grad_norm": 84.53039848160421, "learning_rate": 4.103165298944901e-06, "loss": 17.426589965820312, "step": 14, "token_acc": 0.006393642687734901 }, { "epoch": 0.008795074758135445, "grad_norm": 86.68350785550199, "learning_rate": 4.396248534583822e-06, "loss": 17.20493507385254, "step": 15, "token_acc": 0.006940054885741413 }, { "epoch": 0.009381413075344474, "grad_norm": 93.15417829751227, "learning_rate": 4.689331770222743e-06, "loss": 16.8516845703125, "step": 16, "token_acc": 0.0071443162087631495 }, { "epoch": 0.009967751392553504, "grad_norm": 95.75185430500625, "learning_rate": 4.982415005861665e-06, "loss": 16.221324920654297, "step": 17, "token_acc": 0.007623914578918941 }, { "epoch": 0.010554089709762533, "grad_norm": 78.27687919781866, "learning_rate": 5.275498241500586e-06, "loss": 15.901122093200684, "step": 18, "token_acc": 0.00788330992196805 }, { "epoch": 0.011140428026971563, "grad_norm": 68.95359329805592, "learning_rate": 5.568581477139508e-06, "loss": 15.643229484558105, "step": 19, "token_acc": 0.007641320724639917 }, { "epoch": 0.011726766344180592, "grad_norm": 69.12087738108575, "learning_rate": 5.861664712778429e-06, "loss": 15.347555160522461, "step": 20, "token_acc": 0.008035478591415448 }, { "epoch": 0.012313104661389622, "grad_norm": 61.85968622002263, "learning_rate": 6.1547479484173505e-06, "loss": 15.043094635009766, "step": 21, "token_acc": 0.007827971204153596 }, { "epoch": 0.01289944297859865, "grad_norm": 49.45410654333595, "learning_rate": 6.447831184056273e-06, "loss": 14.705094337463379, "step": 22, "token_acc": 0.007564304131815609 }, { "epoch": 0.013485781295807681, "grad_norm": 40.375815666363415, "learning_rate": 6.740914419695194e-06, "loss": 14.266519546508789, "step": 23, "token_acc": 0.007753145017100947 }, { "epoch": 0.014072119613016711, "grad_norm": 37.38537767245995, "learning_rate": 7.033997655334115e-06, "loss": 13.957818031311035, "step": 24, "token_acc": 0.007307753542429549 }, { "epoch": 0.01465845793022574, "grad_norm": 33.6695480176108, "learning_rate": 7.327080890973036e-06, "loss": 13.675622940063477, "step": 25, "token_acc": 0.007562456005206327 }, { "epoch": 0.01524479624743477, "grad_norm": 28.340626533649406, "learning_rate": 7.620164126611958e-06, "loss": 13.420007705688477, "step": 26, "token_acc": 0.007324099751416719 }, { "epoch": 0.0158311345646438, "grad_norm": 25.85556440334715, "learning_rate": 7.913247362250878e-06, "loss": 13.171337127685547, "step": 27, "token_acc": 0.007610073360902352 }, { "epoch": 0.01641747288185283, "grad_norm": 22.00322568760864, "learning_rate": 8.206330597889802e-06, "loss": 12.96188735961914, "step": 28, "token_acc": 0.0071992851485550655 }, { "epoch": 0.017003811199061858, "grad_norm": 22.931083569226036, "learning_rate": 8.499413833528722e-06, "loss": 12.78353500366211, "step": 29, "token_acc": 0.007740123363203294 }, { "epoch": 0.01759014951627089, "grad_norm": 16.299858980962625, "learning_rate": 8.792497069167644e-06, "loss": 12.634174346923828, "step": 30, "token_acc": 0.007655337227827934 }, { "epoch": 0.01817648783347992, "grad_norm": 15.892915389936697, "learning_rate": 9.085580304806565e-06, "loss": 12.513818740844727, "step": 31, "token_acc": 0.00838201743396326 }, { "epoch": 0.018762826150688947, "grad_norm": 13.103249979050556, "learning_rate": 9.378663540445487e-06, "loss": 12.393245697021484, "step": 32, "token_acc": 0.0096021912022602 }, { "epoch": 0.019349164467897976, "grad_norm": 10.526629503930657, "learning_rate": 9.671746776084409e-06, "loss": 12.305274963378906, "step": 33, "token_acc": 0.009416344045408287 }, { "epoch": 0.019935502785107008, "grad_norm": 11.034153807902563, "learning_rate": 9.96483001172333e-06, "loss": 12.234249114990234, "step": 34, "token_acc": 0.009817216368435272 }, { "epoch": 0.020521841102316037, "grad_norm": 11.369897198382668, "learning_rate": 1.0257913247362251e-05, "loss": 12.171164512634277, "step": 35, "token_acc": 0.010525034982052687 }, { "epoch": 0.021108179419525065, "grad_norm": 7.9678722582631645, "learning_rate": 1.0550996483001172e-05, "loss": 12.120739936828613, "step": 36, "token_acc": 0.010051593165849991 }, { "epoch": 0.021694517736734097, "grad_norm": 6.368991130360932, "learning_rate": 1.0844079718640094e-05, "loss": 12.073944091796875, "step": 37, "token_acc": 0.009734524520581022 }, { "epoch": 0.022280856053943126, "grad_norm": 6.38252883921138, "learning_rate": 1.1137162954279016e-05, "loss": 12.034017562866211, "step": 38, "token_acc": 0.009590650620317526 }, { "epoch": 0.022867194371152155, "grad_norm": 7.924309869588609, "learning_rate": 1.1430246189917938e-05, "loss": 11.9993896484375, "step": 39, "token_acc": 0.010022894937674892 }, { "epoch": 0.023453532688361183, "grad_norm": 9.058949431991275, "learning_rate": 1.1723329425556858e-05, "loss": 11.971410751342773, "step": 40, "token_acc": 0.009991763546265915 }, { "epoch": 0.024039871005570215, "grad_norm": 5.575744466881461, "learning_rate": 1.2016412661195779e-05, "loss": 11.937782287597656, "step": 41, "token_acc": 0.009981918543302703 }, { "epoch": 0.024626209322779244, "grad_norm": 4.104321671559125, "learning_rate": 1.2309495896834701e-05, "loss": 11.91295337677002, "step": 42, "token_acc": 0.010011369961669247 }, { "epoch": 0.025212547639988273, "grad_norm": 4.428976160604956, "learning_rate": 1.2602579132473623e-05, "loss": 11.9051513671875, "step": 43, "token_acc": 0.008911209871034638 }, { "epoch": 0.0257988859571973, "grad_norm": 6.286880923171877, "learning_rate": 1.2895662368112545e-05, "loss": 11.88243293762207, "step": 44, "token_acc": 0.00932694862883434 }, { "epoch": 0.026385224274406333, "grad_norm": 2.7217172639780425, "learning_rate": 1.3188745603751466e-05, "loss": 11.856371879577637, "step": 45, "token_acc": 0.009457271649877254 }, { "epoch": 0.026971562591615362, "grad_norm": 3.5129047119292545, "learning_rate": 1.3481828839390388e-05, "loss": 11.833991050720215, "step": 46, "token_acc": 0.009954296567402753 }, { "epoch": 0.02755790090882439, "grad_norm": 3.631314687151238, "learning_rate": 1.3774912075029308e-05, "loss": 11.818042755126953, "step": 47, "token_acc": 0.00974159406980461 }, { "epoch": 0.028144239226033423, "grad_norm": 2.7061434715967256, "learning_rate": 1.406799531066823e-05, "loss": 11.795839309692383, "step": 48, "token_acc": 0.010114053269194533 }, { "epoch": 0.02873057754324245, "grad_norm": 2.025945906052117, "learning_rate": 1.4361078546307152e-05, "loss": 11.78170394897461, "step": 49, "token_acc": 0.00965653692444547 }, { "epoch": 0.02931691586045148, "grad_norm": 2.5478209922208266, "learning_rate": 1.4654161781946073e-05, "loss": 11.755483627319336, "step": 50, "token_acc": 0.010177322843888137 }, { "epoch": 0.02990325417766051, "grad_norm": 2.1008328982135307, "learning_rate": 1.4947245017584995e-05, "loss": 11.737262725830078, "step": 51, "token_acc": 0.00994983299427297 }, { "epoch": 0.03048959249486954, "grad_norm": 1.8469441975521643, "learning_rate": 1.5240328253223915e-05, "loss": 11.708173751831055, "step": 52, "token_acc": 0.010665230272017236 }, { "epoch": 0.03107593081207857, "grad_norm": 1.5613251133142243, "learning_rate": 1.553341148886284e-05, "loss": 11.68775749206543, "step": 53, "token_acc": 0.010381088131673077 }, { "epoch": 0.0316622691292876, "grad_norm": 1.4698207661359133, "learning_rate": 1.5826494724501756e-05, "loss": 11.66779613494873, "step": 54, "token_acc": 0.009833599549249058 }, { "epoch": 0.03224860744649663, "grad_norm": 1.8460961634365187, "learning_rate": 1.611957796014068e-05, "loss": 11.639435768127441, "step": 55, "token_acc": 0.009808991012105984 }, { "epoch": 0.03283494576370566, "grad_norm": 1.5322585399900048, "learning_rate": 1.6412661195779604e-05, "loss": 11.597869873046875, "step": 56, "token_acc": 0.010166980877996229 }, { "epoch": 0.03342128408091469, "grad_norm": 1.7763741255224632, "learning_rate": 1.6705744431418524e-05, "loss": 11.565065383911133, "step": 57, "token_acc": 0.009926121865523092 }, { "epoch": 0.034007622398123716, "grad_norm": 1.512973788254566, "learning_rate": 1.6998827667057444e-05, "loss": 11.529674530029297, "step": 58, "token_acc": 0.009541249531284511 }, { "epoch": 0.034593960715332744, "grad_norm": 1.7183466708739061, "learning_rate": 1.7291910902696368e-05, "loss": 11.476898193359375, "step": 59, "token_acc": 0.010312098545578402 }, { "epoch": 0.03518029903254178, "grad_norm": 1.580856942478647, "learning_rate": 1.758499413833529e-05, "loss": 11.43745231628418, "step": 60, "token_acc": 0.009987452006606464 }, { "epoch": 0.03576663734975081, "grad_norm": 1.688630094041116, "learning_rate": 1.787807737397421e-05, "loss": 11.390299797058105, "step": 61, "token_acc": 0.010081490959432815 }, { "epoch": 0.03635297566695984, "grad_norm": 1.5114146317310522, "learning_rate": 1.817116060961313e-05, "loss": 11.349411010742188, "step": 62, "token_acc": 0.009682069716251167 }, { "epoch": 0.036939313984168866, "grad_norm": 1.2803492701078971, "learning_rate": 1.846424384525205e-05, "loss": 11.30108642578125, "step": 63, "token_acc": 0.009598904815762525 }, { "epoch": 0.037525652301377894, "grad_norm": 1.3579777563209132, "learning_rate": 1.8757327080890974e-05, "loss": 11.251730918884277, "step": 64, "token_acc": 0.009494669682006818 }, { "epoch": 0.03811199061858692, "grad_norm": 1.4758649599916502, "learning_rate": 1.9050410316529894e-05, "loss": 11.187777519226074, "step": 65, "token_acc": 0.009765726590864169 }, { "epoch": 0.03869832893579595, "grad_norm": 1.685003032531641, "learning_rate": 1.9343493552168818e-05, "loss": 11.127204895019531, "step": 66, "token_acc": 0.009664113140836771 }, { "epoch": 0.03928466725300499, "grad_norm": 2.1922224414208324, "learning_rate": 1.9636576787807738e-05, "loss": 11.059297561645508, "step": 67, "token_acc": 0.009575511602198475 }, { "epoch": 0.039871005570214016, "grad_norm": 2.3194743247234113, "learning_rate": 1.992966002344666e-05, "loss": 10.98441219329834, "step": 68, "token_acc": 0.009746186611111684 }, { "epoch": 0.040457343887423045, "grad_norm": 1.5235055597217073, "learning_rate": 2.0222743259085582e-05, "loss": 10.910924911499023, "step": 69, "token_acc": 0.01034937890285831 }, { "epoch": 0.04104368220463207, "grad_norm": 1.9147325017366896, "learning_rate": 2.0515826494724503e-05, "loss": 10.82236385345459, "step": 70, "token_acc": 0.009599121765713144 }, { "epoch": 0.0416300205218411, "grad_norm": 2.8991858566970525, "learning_rate": 2.0808909730363423e-05, "loss": 10.746939659118652, "step": 71, "token_acc": 0.009460101329955669 }, { "epoch": 0.04221635883905013, "grad_norm": 2.246200722657236, "learning_rate": 2.1101992966002344e-05, "loss": 10.647573471069336, "step": 72, "token_acc": 0.010042107779976887 }, { "epoch": 0.04280269715625916, "grad_norm": 2.010490388482763, "learning_rate": 2.1395076201641264e-05, "loss": 10.567008972167969, "step": 73, "token_acc": 0.00965858873464549 }, { "epoch": 0.043389035473468195, "grad_norm": 2.1101483851767027, "learning_rate": 2.1688159437280188e-05, "loss": 10.481674194335938, "step": 74, "token_acc": 0.00989701499776354 }, { "epoch": 0.04397537379067722, "grad_norm": 2.6334525423699335, "learning_rate": 2.1981242672919108e-05, "loss": 10.381482124328613, "step": 75, "token_acc": 0.010316418404159646 }, { "epoch": 0.04456171210788625, "grad_norm": 2.912172133202095, "learning_rate": 2.2274325908558032e-05, "loss": 10.294354438781738, "step": 76, "token_acc": 0.009850675640149324 }, { "epoch": 0.04514805042509528, "grad_norm": 2.3797273274710036, "learning_rate": 2.2567409144196952e-05, "loss": 10.213769912719727, "step": 77, "token_acc": 0.010241388484334845 }, { "epoch": 0.04573438874230431, "grad_norm": 2.945535177249324, "learning_rate": 2.2860492379835876e-05, "loss": 10.099189758300781, "step": 78, "token_acc": 0.009869874397761462 }, { "epoch": 0.04632072705951334, "grad_norm": 2.971770277897452, "learning_rate": 2.3153575615474797e-05, "loss": 9.993583679199219, "step": 79, "token_acc": 0.010431967935535318 }, { "epoch": 0.046907065376722366, "grad_norm": 2.547720468269889, "learning_rate": 2.3446658851113717e-05, "loss": 9.909547805786133, "step": 80, "token_acc": 0.010044648712742459 }, { "epoch": 0.047493403693931395, "grad_norm": 2.1454924184368163, "learning_rate": 2.3739742086752637e-05, "loss": 9.846504211425781, "step": 81, "token_acc": 0.010159731663403445 }, { "epoch": 0.04807974201114043, "grad_norm": 2.2665870973230287, "learning_rate": 2.4032825322391558e-05, "loss": 9.754265785217285, "step": 82, "token_acc": 0.009307710734342977 }, { "epoch": 0.04866608032834946, "grad_norm": 2.2607988486840003, "learning_rate": 2.432590855803048e-05, "loss": 9.701478958129883, "step": 83, "token_acc": 0.009814831657847852 }, { "epoch": 0.04925241864555849, "grad_norm": 1.7210474669988394, "learning_rate": 2.4618991793669402e-05, "loss": 9.639317512512207, "step": 84, "token_acc": 0.009418293617988417 }, { "epoch": 0.049838756962767516, "grad_norm": 1.9291926373706727, "learning_rate": 2.4912075029308322e-05, "loss": 9.624462127685547, "step": 85, "token_acc": 0.00989620739097431 }, { "epoch": 0.050425095279976545, "grad_norm": 2.005482804094446, "learning_rate": 2.5205158264947246e-05, "loss": 9.52865219116211, "step": 86, "token_acc": 0.00879396344849723 }, { "epoch": 0.051011433597185574, "grad_norm": 1.516405972085273, "learning_rate": 2.5498241500586167e-05, "loss": 9.478717803955078, "step": 87, "token_acc": 0.00946927640135774 }, { "epoch": 0.0515977719143946, "grad_norm": 2.466584861081511, "learning_rate": 2.579132473622509e-05, "loss": 9.46435546875, "step": 88, "token_acc": 0.00986529307929467 }, { "epoch": 0.05218411023160364, "grad_norm": 1.4310252606493354, "learning_rate": 2.608440797186401e-05, "loss": 9.38080883026123, "step": 89, "token_acc": 0.009301760346752612 }, { "epoch": 0.052770448548812667, "grad_norm": 1.6057792268393976, "learning_rate": 2.637749120750293e-05, "loss": 9.322309494018555, "step": 90, "token_acc": 0.009803260379883084 }, { "epoch": 0.053356786866021695, "grad_norm": 1.2211016643267802, "learning_rate": 2.667057444314185e-05, "loss": 9.337228775024414, "step": 91, "token_acc": 0.010295949169358073 }, { "epoch": 0.053943125183230724, "grad_norm": 1.37593460058677, "learning_rate": 2.6963657678780775e-05, "loss": 9.242362022399902, "step": 92, "token_acc": 0.0102168605941473 }, { "epoch": 0.05452946350043975, "grad_norm": 1.348385407206309, "learning_rate": 2.7256740914419696e-05, "loss": 9.236753463745117, "step": 93, "token_acc": 0.01001013718997094 }, { "epoch": 0.05511580181764878, "grad_norm": 1.1206301898574862, "learning_rate": 2.7549824150058616e-05, "loss": 9.238260269165039, "step": 94, "token_acc": 0.009541506872784301 }, { "epoch": 0.05570214013485781, "grad_norm": 1.345623166185279, "learning_rate": 2.7842907385697537e-05, "loss": 9.263555526733398, "step": 95, "token_acc": 0.010272638240962079 }, { "epoch": 0.056288478452066845, "grad_norm": 1.1896193598625675, "learning_rate": 2.813599062133646e-05, "loss": 9.129605293273926, "step": 96, "token_acc": 0.012232874370405438 }, { "epoch": 0.056874816769275874, "grad_norm": 0.9356573544307287, "learning_rate": 2.8429073856975384e-05, "loss": 9.214315414428711, "step": 97, "token_acc": 0.011531789767618967 }, { "epoch": 0.0574611550864849, "grad_norm": 1.2848383130859589, "learning_rate": 2.8722157092614305e-05, "loss": 9.171603202819824, "step": 98, "token_acc": 0.011461781454034188 }, { "epoch": 0.05804749340369393, "grad_norm": 0.9553656306915715, "learning_rate": 2.9015240328253225e-05, "loss": 9.156946182250977, "step": 99, "token_acc": 0.011090126436273988 }, { "epoch": 0.05863383172090296, "grad_norm": 0.9865674826661757, "learning_rate": 2.9308323563892145e-05, "loss": 9.10972785949707, "step": 100, "token_acc": 0.01165684761034624 }, { "epoch": 0.05922017003811199, "grad_norm": 1.155982251271156, "learning_rate": 2.9601406799531066e-05, "loss": 9.144954681396484, "step": 101, "token_acc": 0.01193571507044243 }, { "epoch": 0.05980650835532102, "grad_norm": 0.8102995038859054, "learning_rate": 2.989449003516999e-05, "loss": 9.072721481323242, "step": 102, "token_acc": 0.01121621144663665 }, { "epoch": 0.06039284667253005, "grad_norm": 0.775186952799585, "learning_rate": 3.018757327080891e-05, "loss": 9.186348915100098, "step": 103, "token_acc": 0.011551146598398943 }, { "epoch": 0.06097918498973908, "grad_norm": 0.761217397882543, "learning_rate": 3.048065650644783e-05, "loss": 9.047115325927734, "step": 104, "token_acc": 0.011618798955613577 }, { "epoch": 0.06156552330694811, "grad_norm": 0.7213021351684632, "learning_rate": 3.077373974208675e-05, "loss": 9.156316757202148, "step": 105, "token_acc": 0.011375447209672786 }, { "epoch": 0.06215186162415714, "grad_norm": 0.7309509073181476, "learning_rate": 3.106682297772568e-05, "loss": 9.148033142089844, "step": 106, "token_acc": 0.012079490499722524 }, { "epoch": 0.06273819994136617, "grad_norm": 0.71620452889025, "learning_rate": 3.13599062133646e-05, "loss": 9.068413734436035, "step": 107, "token_acc": 0.011607522944027414 }, { "epoch": 0.0633245382585752, "grad_norm": 0.6771575928881343, "learning_rate": 3.165298944900351e-05, "loss": 9.069038391113281, "step": 108, "token_acc": 0.011099353621377666 }, { "epoch": 0.06391087657578423, "grad_norm": 0.6711069252190972, "learning_rate": 3.194607268464244e-05, "loss": 9.122047424316406, "step": 109, "token_acc": 0.011947090355173344 }, { "epoch": 0.06449721489299326, "grad_norm": 0.6158664940699149, "learning_rate": 3.223915592028136e-05, "loss": 9.059281349182129, "step": 110, "token_acc": 0.011221092233253866 }, { "epoch": 0.06508355321020229, "grad_norm": 0.6541226788161765, "learning_rate": 3.253223915592028e-05, "loss": 9.042486190795898, "step": 111, "token_acc": 0.011933036580314684 }, { "epoch": 0.06566989152741132, "grad_norm": 0.5883955641324334, "learning_rate": 3.282532239155921e-05, "loss": 9.05929946899414, "step": 112, "token_acc": 0.011732247975197016 }, { "epoch": 0.06625622984462035, "grad_norm": 0.7627940108714882, "learning_rate": 3.311840562719812e-05, "loss": 9.178930282592773, "step": 113, "token_acc": 0.011931200114896979 }, { "epoch": 0.06684256816182937, "grad_norm": 0.5941328029995522, "learning_rate": 3.341148886283705e-05, "loss": 9.0415620803833, "step": 114, "token_acc": 0.011746869853882949 }, { "epoch": 0.0674289064790384, "grad_norm": 0.5648062486746203, "learning_rate": 3.370457209847597e-05, "loss": 9.050302505493164, "step": 115, "token_acc": 0.01133997295476865 }, { "epoch": 0.06801524479624743, "grad_norm": 0.6145887557531267, "learning_rate": 3.399765533411489e-05, "loss": 9.054960250854492, "step": 116, "token_acc": 0.012100911975666616 }, { "epoch": 0.06860158311345646, "grad_norm": 0.6102056970676166, "learning_rate": 3.429073856975381e-05, "loss": 9.081972122192383, "step": 117, "token_acc": 0.01301256789860475 }, { "epoch": 0.06918792143066549, "grad_norm": 0.6291909427439372, "learning_rate": 3.4583821805392736e-05, "loss": 8.970478057861328, "step": 118, "token_acc": 0.01245429528011244 }, { "epoch": 0.06977425974787452, "grad_norm": 0.6645018971262254, "learning_rate": 3.487690504103165e-05, "loss": 9.07176399230957, "step": 119, "token_acc": 0.012789338134439861 }, { "epoch": 0.07036059806508356, "grad_norm": 0.6594780162967981, "learning_rate": 3.516998827667058e-05, "loss": 9.01324462890625, "step": 120, "token_acc": 0.011675599037382705 }, { "epoch": 0.07094693638229259, "grad_norm": 0.6039936107825037, "learning_rate": 3.54630715123095e-05, "loss": 9.027917861938477, "step": 121, "token_acc": 0.01210492997977365 }, { "epoch": 0.07153327469950162, "grad_norm": 0.5376271255533, "learning_rate": 3.575615474794842e-05, "loss": 9.073873519897461, "step": 122, "token_acc": 0.012944805841006463 }, { "epoch": 0.07211961301671065, "grad_norm": 0.5554818213186559, "learning_rate": 3.6049237983587345e-05, "loss": 9.039239883422852, "step": 123, "token_acc": 0.011444788680887667 }, { "epoch": 0.07270595133391967, "grad_norm": 0.5884672473219501, "learning_rate": 3.634232121922626e-05, "loss": 9.021541595458984, "step": 124, "token_acc": 0.011900584556713426 }, { "epoch": 0.0732922896511287, "grad_norm": 0.5396993704088476, "learning_rate": 3.6635404454865186e-05, "loss": 9.010172843933105, "step": 125, "token_acc": 0.011912235628030156 }, { "epoch": 0.07387862796833773, "grad_norm": 0.5989403347793538, "learning_rate": 3.69284876905041e-05, "loss": 9.069947242736816, "step": 126, "token_acc": 0.012838447957519643 }, { "epoch": 0.07446496628554676, "grad_norm": 0.6878989358274504, "learning_rate": 3.722157092614303e-05, "loss": 9.054113388061523, "step": 127, "token_acc": 0.01162454683969947 }, { "epoch": 0.07505130460275579, "grad_norm": 1.0466514286625217, "learning_rate": 3.751465416178195e-05, "loss": 9.123021125793457, "step": 128, "token_acc": 0.012989581607180084 }, { "epoch": 0.07563764291996482, "grad_norm": 1.4583369467166205, "learning_rate": 3.780773739742087e-05, "loss": 9.07336139678955, "step": 129, "token_acc": 0.012118592430957352 }, { "epoch": 0.07622398123717385, "grad_norm": 0.6613674916373863, "learning_rate": 3.810082063305979e-05, "loss": 9.000986099243164, "step": 130, "token_acc": 0.012802827245847347 }, { "epoch": 0.07681031955438287, "grad_norm": 0.798447665040762, "learning_rate": 3.839390386869871e-05, "loss": 9.099397659301758, "step": 131, "token_acc": 0.012345867804403122 }, { "epoch": 0.0773966578715919, "grad_norm": 1.1620166447954958, "learning_rate": 3.8686987104337636e-05, "loss": 9.042312622070312, "step": 132, "token_acc": 0.013380480085938385 }, { "epoch": 0.07798299618880093, "grad_norm": 0.6241317601800273, "learning_rate": 3.8980070339976556e-05, "loss": 9.096951484680176, "step": 133, "token_acc": 0.01236691735428148 }, { "epoch": 0.07856933450600997, "grad_norm": 0.644152757164242, "learning_rate": 3.9273153575615476e-05, "loss": 9.075241088867188, "step": 134, "token_acc": 0.013975434391851407 }, { "epoch": 0.079155672823219, "grad_norm": 0.6360182345956721, "learning_rate": 3.95662368112544e-05, "loss": 9.120025634765625, "step": 135, "token_acc": 0.013047885454446469 }, { "epoch": 0.07974201114042803, "grad_norm": 0.8471619740332605, "learning_rate": 3.985932004689332e-05, "loss": 8.983390808105469, "step": 136, "token_acc": 0.012065508987385343 }, { "epoch": 0.08032834945763706, "grad_norm": 1.5445162116932314, "learning_rate": 4.015240328253224e-05, "loss": 9.056979179382324, "step": 137, "token_acc": 0.011673870858445332 }, { "epoch": 0.08091468777484609, "grad_norm": 1.345636253725767, "learning_rate": 4.0445486518171165e-05, "loss": 9.058370590209961, "step": 138, "token_acc": 0.013804131801278749 }, { "epoch": 0.08150102609205512, "grad_norm": 1.7977199714744192, "learning_rate": 4.073856975381008e-05, "loss": 9.08059310913086, "step": 139, "token_acc": 0.013110879996704903 }, { "epoch": 0.08208736440926415, "grad_norm": 0.6622174014647327, "learning_rate": 4.1031652989449006e-05, "loss": 8.98945426940918, "step": 140, "token_acc": 0.01424087982832618 }, { "epoch": 0.08267370272647317, "grad_norm": 27.670994701344057, "learning_rate": 4.1324736225087926e-05, "loss": 9.153547286987305, "step": 141, "token_acc": 0.013166422844021407 }, { "epoch": 0.0832600410436822, "grad_norm": 0.9503391387441521, "learning_rate": 4.1617819460726846e-05, "loss": 9.067795753479004, "step": 142, "token_acc": 0.01426748347364399 }, { "epoch": 0.08384637936089123, "grad_norm": 0.8234029150083744, "learning_rate": 4.1910902696365774e-05, "loss": 9.054396629333496, "step": 143, "token_acc": 0.014940769333155864 }, { "epoch": 0.08443271767810026, "grad_norm": 1.1606118933038128, "learning_rate": 4.220398593200469e-05, "loss": 8.98237419128418, "step": 144, "token_acc": 0.015627841275948674 }, { "epoch": 0.08501905599530929, "grad_norm": 0.7555726774032652, "learning_rate": 4.2497069167643614e-05, "loss": 9.117998123168945, "step": 145, "token_acc": 0.01568815131729457 }, { "epoch": 0.08560539431251832, "grad_norm": 3.368266363746793, "learning_rate": 4.279015240328253e-05, "loss": 9.014965057373047, "step": 146, "token_acc": 0.015468345677977644 }, { "epoch": 0.08619173262972735, "grad_norm": 0.6560979521936137, "learning_rate": 4.3083235638921455e-05, "loss": 8.998909950256348, "step": 147, "token_acc": 0.013764487051080269 }, { "epoch": 0.08677807094693639, "grad_norm": 1.0923013975992322, "learning_rate": 4.3376318874560376e-05, "loss": 9.04861831665039, "step": 148, "token_acc": 0.013260634881591713 }, { "epoch": 0.08736440926414542, "grad_norm": 0.6015653002957679, "learning_rate": 4.3669402110199296e-05, "loss": 9.02303695678711, "step": 149, "token_acc": 0.01542785262839678 }, { "epoch": 0.08795074758135445, "grad_norm": 1.5446096175911532, "learning_rate": 4.3962485345838216e-05, "loss": 9.037572860717773, "step": 150, "token_acc": 0.01518732292035231 }, { "epoch": 0.08853708589856348, "grad_norm": 0.6411615461501838, "learning_rate": 4.4255568581477144e-05, "loss": 9.064362525939941, "step": 151, "token_acc": 0.015274854043740214 }, { "epoch": 0.0891234242157725, "grad_norm": 1.0535392960185859, "learning_rate": 4.4548651817116064e-05, "loss": 9.030740737915039, "step": 152, "token_acc": 0.014803713909681665 }, { "epoch": 0.08970976253298153, "grad_norm": 1.0571200978375455, "learning_rate": 4.4841735052754984e-05, "loss": 9.012420654296875, "step": 153, "token_acc": 0.015575135558928999 }, { "epoch": 0.09029610085019056, "grad_norm": 2.2788284348010657, "learning_rate": 4.5134818288393905e-05, "loss": 9.037130355834961, "step": 154, "token_acc": 0.01606599742626949 }, { "epoch": 0.09088243916739959, "grad_norm": 0.8269007597807994, "learning_rate": 4.5427901524032825e-05, "loss": 9.056532859802246, "step": 155, "token_acc": 0.014675794875192046 }, { "epoch": 0.09146877748460862, "grad_norm": 1.6203791781845984, "learning_rate": 4.572098475967175e-05, "loss": 8.994211196899414, "step": 156, "token_acc": 0.015230111957664211 }, { "epoch": 0.09205511580181765, "grad_norm": 3.0794533802847983, "learning_rate": 4.6014067995310666e-05, "loss": 9.0416898727417, "step": 157, "token_acc": 0.014912312972004267 }, { "epoch": 0.09264145411902668, "grad_norm": 1.891714166389983, "learning_rate": 4.630715123094959e-05, "loss": 8.981319427490234, "step": 158, "token_acc": 0.014646135062310542 }, { "epoch": 0.0932277924362357, "grad_norm": 10.774120420203591, "learning_rate": 4.660023446658851e-05, "loss": 9.010435104370117, "step": 159, "token_acc": 0.015444536067503721 }, { "epoch": 0.09381413075344473, "grad_norm": 7.45957162719553, "learning_rate": 4.6893317702227434e-05, "loss": 9.080111503601074, "step": 160, "token_acc": 0.01499811001489156 }, { "epoch": 0.09440046907065376, "grad_norm": 0.8937305353273864, "learning_rate": 4.718640093786636e-05, "loss": 9.031442642211914, "step": 161, "token_acc": 0.015406480668060142 }, { "epoch": 0.09498680738786279, "grad_norm": 3.5487521889087907, "learning_rate": 4.7479484173505275e-05, "loss": 9.011468887329102, "step": 162, "token_acc": 0.015158708989063751 }, { "epoch": 0.09557314570507183, "grad_norm": 2.959279305380981, "learning_rate": 4.77725674091442e-05, "loss": 9.005142211914062, "step": 163, "token_acc": 0.013494043376438244 }, { "epoch": 0.09615948402228086, "grad_norm": 3.2622271122520994, "learning_rate": 4.8065650644783116e-05, "loss": 8.998067855834961, "step": 164, "token_acc": 0.013487252744424283 }, { "epoch": 0.09674582233948989, "grad_norm": 2.027402533781415, "learning_rate": 4.835873388042204e-05, "loss": 9.002195358276367, "step": 165, "token_acc": 0.015836285272474035 }, { "epoch": 0.09733216065669892, "grad_norm": 2.9061635664284533, "learning_rate": 4.865181711606096e-05, "loss": 9.032505989074707, "step": 166, "token_acc": 0.014648964429502346 }, { "epoch": 0.09791849897390795, "grad_norm": 1.3054722296281038, "learning_rate": 4.8944900351699884e-05, "loss": 8.937246322631836, "step": 167, "token_acc": 0.01595735133097593 }, { "epoch": 0.09850483729111698, "grad_norm": 3.730000570838039, "learning_rate": 4.9237983587338804e-05, "loss": 8.91473388671875, "step": 168, "token_acc": 0.015526105569324676 }, { "epoch": 0.099091175608326, "grad_norm": 1.1265081450438386, "learning_rate": 4.9531066822977724e-05, "loss": 8.87942886352539, "step": 169, "token_acc": 0.01571133998300599 }, { "epoch": 0.09967751392553503, "grad_norm": 4.572016892486631, "learning_rate": 4.9824150058616645e-05, "loss": 8.914380073547363, "step": 170, "token_acc": 0.013701915556992253 }, { "epoch": 0.10026385224274406, "grad_norm": 1.551018304245375, "learning_rate": 5.011723329425557e-05, "loss": 8.881754875183105, "step": 171, "token_acc": 0.01519993986381227 }, { "epoch": 0.10085019055995309, "grad_norm": 11.730889156050635, "learning_rate": 5.041031652989449e-05, "loss": 8.938714027404785, "step": 172, "token_acc": 0.014901633339907332 }, { "epoch": 0.10143652887716212, "grad_norm": 13.232236209459126, "learning_rate": 5.070339976553341e-05, "loss": 8.957929611206055, "step": 173, "token_acc": 0.01576526516449063 }, { "epoch": 0.10202286719437115, "grad_norm": 4.295964381097921, "learning_rate": 5.099648300117233e-05, "loss": 8.853195190429688, "step": 174, "token_acc": 0.014773312860853744 }, { "epoch": 0.10260920551158018, "grad_norm": 16.44930152861039, "learning_rate": 5.1289566236811254e-05, "loss": 8.970190048217773, "step": 175, "token_acc": 0.014638800968162353 }, { "epoch": 0.1031955438287892, "grad_norm": 18.684173259328407, "learning_rate": 5.158264947245018e-05, "loss": 9.034139633178711, "step": 176, "token_acc": 0.015680815876515986 }, { "epoch": 0.10378188214599825, "grad_norm": 12.684678314847286, "learning_rate": 5.1875732708089094e-05, "loss": 8.913228988647461, "step": 177, "token_acc": 0.014684177351452724 }, { "epoch": 0.10436822046320728, "grad_norm": 1.8227889064380007, "learning_rate": 5.216881594372802e-05, "loss": 8.800104141235352, "step": 178, "token_acc": 0.016324880312539435 }, { "epoch": 0.1049545587804163, "grad_norm": 9.010683810362792, "learning_rate": 5.2461899179366935e-05, "loss": 8.902030944824219, "step": 179, "token_acc": 0.016581966851631345 }, { "epoch": 0.10554089709762533, "grad_norm": 6.100096475661366, "learning_rate": 5.275498241500586e-05, "loss": 8.933889389038086, "step": 180, "token_acc": 0.015566585867296236 }, { "epoch": 0.10612723541483436, "grad_norm": 3.529948177834921, "learning_rate": 5.304806565064479e-05, "loss": 8.916126251220703, "step": 181, "token_acc": 0.01492761684280152 }, { "epoch": 0.10671357373204339, "grad_norm": 1.2590959787599525, "learning_rate": 5.33411488862837e-05, "loss": 8.729218482971191, "step": 182, "token_acc": 0.015315456723284795 }, { "epoch": 0.10729991204925242, "grad_norm": 3.1380519156783886, "learning_rate": 5.363423212192263e-05, "loss": 8.827041625976562, "step": 183, "token_acc": 0.0161524568000338 }, { "epoch": 0.10788625036646145, "grad_norm": 2.763974886897374, "learning_rate": 5.392731535756155e-05, "loss": 8.771095275878906, "step": 184, "token_acc": 0.016393485567288472 }, { "epoch": 0.10847258868367048, "grad_norm": 1.9384487957952075, "learning_rate": 5.422039859320047e-05, "loss": 8.778203964233398, "step": 185, "token_acc": 0.016288290889591273 }, { "epoch": 0.1090589270008795, "grad_norm": 4.471488914801324, "learning_rate": 5.451348182883939e-05, "loss": 8.763311386108398, "step": 186, "token_acc": 0.016823097406277413 }, { "epoch": 0.10964526531808853, "grad_norm": 4.93635206450147, "learning_rate": 5.480656506447831e-05, "loss": 8.744799613952637, "step": 187, "token_acc": 0.015184437426726559 }, { "epoch": 0.11023160363529756, "grad_norm": 4.522895153957665, "learning_rate": 5.509964830011723e-05, "loss": 8.70407772064209, "step": 188, "token_acc": 0.016358883997253115 }, { "epoch": 0.11081794195250659, "grad_norm": 3.7313033196719037, "learning_rate": 5.539273153575616e-05, "loss": 8.730113983154297, "step": 189, "token_acc": 0.016440375650633842 }, { "epoch": 0.11140428026971562, "grad_norm": 5.052261907074615, "learning_rate": 5.568581477139507e-05, "loss": 8.744132995605469, "step": 190, "token_acc": 0.01658494451496016 }, { "epoch": 0.11199061858692466, "grad_norm": 3.31067778798652, "learning_rate": 5.5978898007034e-05, "loss": 8.700679779052734, "step": 191, "token_acc": 0.01586239342257869 }, { "epoch": 0.11257695690413369, "grad_norm": 1.0260062681026476, "learning_rate": 5.627198124267292e-05, "loss": 8.780962944030762, "step": 192, "token_acc": 0.016017765772317602 }, { "epoch": 0.11316329522134272, "grad_norm": 7.458556736196419, "learning_rate": 5.656506447831184e-05, "loss": 8.744999885559082, "step": 193, "token_acc": 0.014912581848799301 }, { "epoch": 0.11374963353855175, "grad_norm": 10.079846221465687, "learning_rate": 5.685814771395077e-05, "loss": 8.803315162658691, "step": 194, "token_acc": 0.014887918854515932 }, { "epoch": 0.11433597185576078, "grad_norm": 3.477254786052486, "learning_rate": 5.715123094958968e-05, "loss": 8.727133750915527, "step": 195, "token_acc": 0.015802094702375955 }, { "epoch": 0.1149223101729698, "grad_norm": 5.0618517385679525, "learning_rate": 5.744431418522861e-05, "loss": 8.644700050354004, "step": 196, "token_acc": 0.016661720327876602 }, { "epoch": 0.11550864849017883, "grad_norm": 11.88288450557306, "learning_rate": 5.773739742086752e-05, "loss": 8.746366500854492, "step": 197, "token_acc": 0.016478598027581125 }, { "epoch": 0.11609498680738786, "grad_norm": 5.731286437963412, "learning_rate": 5.803048065650645e-05, "loss": 8.639501571655273, "step": 198, "token_acc": 0.01787763083632972 }, { "epoch": 0.11668132512459689, "grad_norm": 16.386161788718045, "learning_rate": 5.832356389214537e-05, "loss": 8.684814453125, "step": 199, "token_acc": 0.017497375004861104 }, { "epoch": 0.11726766344180592, "grad_norm": 16.38806388393325, "learning_rate": 5.861664712778429e-05, "loss": 8.64936637878418, "step": 200, "token_acc": 0.016449116853381306 }, { "epoch": 0.11785400175901495, "grad_norm": 5.212003127584709, "learning_rate": 5.890973036342322e-05, "loss": 8.724567413330078, "step": 201, "token_acc": 0.01695317162588456 }, { "epoch": 0.11844034007622398, "grad_norm": 9.156993737651222, "learning_rate": 5.920281359906213e-05, "loss": 8.577198028564453, "step": 202, "token_acc": 0.01718782633842265 }, { "epoch": 0.119026678393433, "grad_norm": 9.101926016859572, "learning_rate": 5.949589683470106e-05, "loss": 8.692757606506348, "step": 203, "token_acc": 0.016880875238676457 }, { "epoch": 0.11961301671064203, "grad_norm": 3.3144327424040925, "learning_rate": 5.978898007033998e-05, "loss": 8.529800415039062, "step": 204, "token_acc": 0.017974216518853565 }, { "epoch": 0.12019935502785108, "grad_norm": 19.142046024227387, "learning_rate": 6.00820633059789e-05, "loss": 8.707862854003906, "step": 205, "token_acc": 0.01619655515954679 }, { "epoch": 0.1207856933450601, "grad_norm": 17.117850232171193, "learning_rate": 6.037514654161782e-05, "loss": 8.636751174926758, "step": 206, "token_acc": 0.017083395959032056 }, { "epoch": 0.12137203166226913, "grad_norm": 2.5671023748067876, "learning_rate": 6.066822977725674e-05, "loss": 8.624223709106445, "step": 207, "token_acc": 0.016383109467759928 }, { "epoch": 0.12195836997947816, "grad_norm": 17.705045927553492, "learning_rate": 6.096131301289566e-05, "loss": 8.70902156829834, "step": 208, "token_acc": 0.016802597196927706 }, { "epoch": 0.12254470829668719, "grad_norm": 18.93270872158918, "learning_rate": 6.125439624853459e-05, "loss": 8.715855598449707, "step": 209, "token_acc": 0.016338521390424136 }, { "epoch": 0.12313104661389622, "grad_norm": 7.289969427086145, "learning_rate": 6.15474794841735e-05, "loss": 8.569097518920898, "step": 210, "token_acc": 0.01744045387717773 }, { "epoch": 0.12371738493110525, "grad_norm": 13.724303951548745, "learning_rate": 6.184056271981243e-05, "loss": 8.679459571838379, "step": 211, "token_acc": 0.017568939332338336 }, { "epoch": 0.12430372324831428, "grad_norm": 13.474857729860062, "learning_rate": 6.213364595545136e-05, "loss": 8.554481506347656, "step": 212, "token_acc": 0.017222620894002175 }, { "epoch": 0.1248900615655233, "grad_norm": 3.8323785919939715, "learning_rate": 6.242672919109027e-05, "loss": 8.478784561157227, "step": 213, "token_acc": 0.018962274757509785 }, { "epoch": 0.12547639988273235, "grad_norm": 3.043740645972869, "learning_rate": 6.27198124267292e-05, "loss": 8.605936050415039, "step": 214, "token_acc": 0.017647326139754706 }, { "epoch": 0.12606273819994138, "grad_norm": 23.383653643283452, "learning_rate": 6.301289566236812e-05, "loss": 8.636078834533691, "step": 215, "token_acc": 0.01640215832670891 }, { "epoch": 0.1266490765171504, "grad_norm": 21.752726012141736, "learning_rate": 6.330597889800702e-05, "loss": 8.577640533447266, "step": 216, "token_acc": 0.017832840456819418 }, { "epoch": 0.12723541483435943, "grad_norm": 4.094310847547717, "learning_rate": 6.359906213364595e-05, "loss": 8.488836288452148, "step": 217, "token_acc": 0.017452771819831712 }, { "epoch": 0.12782175315156846, "grad_norm": 3.9601952228084842, "learning_rate": 6.389214536928488e-05, "loss": 8.578311920166016, "step": 218, "token_acc": 0.016829314478811332 }, { "epoch": 0.1284080914687775, "grad_norm": 11.737225536193094, "learning_rate": 6.41852286049238e-05, "loss": 8.462087631225586, "step": 219, "token_acc": 0.017304168067840263 }, { "epoch": 0.12899442978598652, "grad_norm": 10.142299779472314, "learning_rate": 6.447831184056272e-05, "loss": 8.476770401000977, "step": 220, "token_acc": 0.017343309836713363 }, { "epoch": 0.12958076810319555, "grad_norm": 11.20711605649483, "learning_rate": 6.477139507620163e-05, "loss": 8.518610954284668, "step": 221, "token_acc": 0.01845935963564089 }, { "epoch": 0.13016710642040458, "grad_norm": 8.319735186212462, "learning_rate": 6.506447831184056e-05, "loss": 8.442079544067383, "step": 222, "token_acc": 0.018927895621336226 }, { "epoch": 0.1307534447376136, "grad_norm": 13.346414566879151, "learning_rate": 6.535756154747949e-05, "loss": 8.54383659362793, "step": 223, "token_acc": 0.017246862225140155 }, { "epoch": 0.13133978305482263, "grad_norm": 8.198788362666994, "learning_rate": 6.565064478311841e-05, "loss": 8.526226043701172, "step": 224, "token_acc": 0.017177822512137484 }, { "epoch": 0.13192612137203166, "grad_norm": 12.2174533699686, "learning_rate": 6.594372801875733e-05, "loss": 8.415727615356445, "step": 225, "token_acc": 0.018047791791998554 }, { "epoch": 0.1325124596892407, "grad_norm": 9.152416241421493, "learning_rate": 6.623681125439624e-05, "loss": 8.444255828857422, "step": 226, "token_acc": 0.017586869005900726 }, { "epoch": 0.13309879800644972, "grad_norm": 10.40590711692462, "learning_rate": 6.652989449003517e-05, "loss": 8.471542358398438, "step": 227, "token_acc": 0.016466763159575147 }, { "epoch": 0.13368513632365875, "grad_norm": 11.55576830229065, "learning_rate": 6.68229777256741e-05, "loss": 8.469377517700195, "step": 228, "token_acc": 0.016281522371486484 }, { "epoch": 0.13427147464086778, "grad_norm": 4.957683145602286, "learning_rate": 6.711606096131301e-05, "loss": 8.450397491455078, "step": 229, "token_acc": 0.017184085943190094 }, { "epoch": 0.1348578129580768, "grad_norm": 4.417961684691017, "learning_rate": 6.740914419695194e-05, "loss": 8.386655807495117, "step": 230, "token_acc": 0.01818479561630031 }, { "epoch": 0.13544415127528583, "grad_norm": 10.883855260440926, "learning_rate": 6.770222743259086e-05, "loss": 8.469613075256348, "step": 231, "token_acc": 0.016472601851611732 }, { "epoch": 0.13603048959249486, "grad_norm": 8.959714325000101, "learning_rate": 6.799531066822978e-05, "loss": 8.390556335449219, "step": 232, "token_acc": 0.016688635783613407 }, { "epoch": 0.1366168279097039, "grad_norm": 5.46317976744313, "learning_rate": 6.82883939038687e-05, "loss": 8.366771697998047, "step": 233, "token_acc": 0.01893216334795815 }, { "epoch": 0.13720316622691292, "grad_norm": 5.171782104398142, "learning_rate": 6.858147713950762e-05, "loss": 8.392815589904785, "step": 234, "token_acc": 0.018358918534072472 }, { "epoch": 0.13778950454412195, "grad_norm": 7.597631537523283, "learning_rate": 6.887456037514655e-05, "loss": 8.311612129211426, "step": 235, "token_acc": 0.018695744891112694 }, { "epoch": 0.13837584286133098, "grad_norm": 4.025975897904792, "learning_rate": 6.916764361078547e-05, "loss": 8.252484321594238, "step": 236, "token_acc": 0.017489714597891913 }, { "epoch": 0.13896218117854, "grad_norm": 12.63744324347212, "learning_rate": 6.946072684642439e-05, "loss": 8.398983001708984, "step": 237, "token_acc": 0.01723557211573884 }, { "epoch": 0.13954851949574903, "grad_norm": 12.001682900315135, "learning_rate": 6.97538100820633e-05, "loss": 8.364505767822266, "step": 238, "token_acc": 0.019183627514700537 }, { "epoch": 0.14013485781295806, "grad_norm": 3.0690319680586584, "learning_rate": 7.004689331770223e-05, "loss": 8.331502914428711, "step": 239, "token_acc": 0.018484823422380284 }, { "epoch": 0.14072119613016712, "grad_norm": 12.133847265694815, "learning_rate": 7.033997655334115e-05, "loss": 8.34527587890625, "step": 240, "token_acc": 0.017558171153323233 }, { "epoch": 0.14130753444737615, "grad_norm": 12.355574260271677, "learning_rate": 7.063305978898008e-05, "loss": 8.383485794067383, "step": 241, "token_acc": 0.01869788664355479 }, { "epoch": 0.14189387276458518, "grad_norm": 2.806155418914126, "learning_rate": 7.0926143024619e-05, "loss": 8.341289520263672, "step": 242, "token_acc": 0.01852517130434323 }, { "epoch": 0.1424802110817942, "grad_norm": 15.330216005338592, "learning_rate": 7.121922626025791e-05, "loss": 8.337993621826172, "step": 243, "token_acc": 0.018042335944819347 }, { "epoch": 0.14306654939900323, "grad_norm": 19.931118361452103, "learning_rate": 7.151230949589684e-05, "loss": 8.49669075012207, "step": 244, "token_acc": 0.017280365421119104 }, { "epoch": 0.14365288771621226, "grad_norm": 10.74510387049553, "learning_rate": 7.180539273153576e-05, "loss": 8.496963500976562, "step": 245, "token_acc": 0.017016856246551425 }, { "epoch": 0.1442392260334213, "grad_norm": 7.392613548328144, "learning_rate": 7.209847596717469e-05, "loss": 8.245037078857422, "step": 246, "token_acc": 0.018232677186264666 }, { "epoch": 0.14482556435063032, "grad_norm": 14.144941718304963, "learning_rate": 7.239155920281359e-05, "loss": 8.253929138183594, "step": 247, "token_acc": 0.01806724960295295 }, { "epoch": 0.14541190266783935, "grad_norm": 18.210836124188965, "learning_rate": 7.268464243845252e-05, "loss": 8.385154724121094, "step": 248, "token_acc": 0.017011478807361737 }, { "epoch": 0.14599824098504838, "grad_norm": 8.848093798836455, "learning_rate": 7.297772567409144e-05, "loss": 8.348281860351562, "step": 249, "token_acc": 0.01845634040755992 }, { "epoch": 0.1465845793022574, "grad_norm": 17.71281655108162, "learning_rate": 7.327080890973037e-05, "loss": 8.335196495056152, "step": 250, "token_acc": 0.01910326214137506 }, { "epoch": 0.14717091761946643, "grad_norm": 21.5179146789294, "learning_rate": 7.35638921453693e-05, "loss": 8.454710006713867, "step": 251, "token_acc": 0.016284138015118264 }, { "epoch": 0.14775725593667546, "grad_norm": 13.758264497854526, "learning_rate": 7.38569753810082e-05, "loss": 8.32026481628418, "step": 252, "token_acc": 0.019163544261402883 }, { "epoch": 0.1483435942538845, "grad_norm": 2.4753180663240912, "learning_rate": 7.415005861664713e-05, "loss": 8.14482307434082, "step": 253, "token_acc": 0.019855293699420757 }, { "epoch": 0.14892993257109352, "grad_norm": 10.851512709145455, "learning_rate": 7.444314185228605e-05, "loss": 8.293392181396484, "step": 254, "token_acc": 0.01991583835159254 }, { "epoch": 0.14951627088830255, "grad_norm": 12.717966175460814, "learning_rate": 7.473622508792498e-05, "loss": 8.399085998535156, "step": 255, "token_acc": 0.019179935154781818 }, { "epoch": 0.15010260920551158, "grad_norm": 7.183095352986484, "learning_rate": 7.50293083235639e-05, "loss": 8.30589485168457, "step": 256, "token_acc": 0.017908344007417372 }, { "epoch": 0.1506889475227206, "grad_norm": 5.023779912680333, "learning_rate": 7.532239155920281e-05, "loss": 8.213706970214844, "step": 257, "token_acc": 0.018848616466637234 }, { "epoch": 0.15127528583992964, "grad_norm": 3.621674151339373, "learning_rate": 7.561547479484174e-05, "loss": 8.140850067138672, "step": 258, "token_acc": 0.020262012018062013 }, { "epoch": 0.15186162415713866, "grad_norm": 3.6894373755328913, "learning_rate": 7.590855803048066e-05, "loss": 8.16871166229248, "step": 259, "token_acc": 0.020408534006005258 }, { "epoch": 0.1524479624743477, "grad_norm": 2.8075728014756134, "learning_rate": 7.620164126611958e-05, "loss": 8.118350982666016, "step": 260, "token_acc": 0.02136015078361003 }, { "epoch": 0.15303430079155672, "grad_norm": 7.877261405761018, "learning_rate": 7.64947245017585e-05, "loss": 8.198387145996094, "step": 261, "token_acc": 0.02049476271997793 }, { "epoch": 0.15362063910876575, "grad_norm": 3.3493707278793172, "learning_rate": 7.678780773739742e-05, "loss": 8.124204635620117, "step": 262, "token_acc": 0.021180212958395 }, { "epoch": 0.15420697742597478, "grad_norm": 11.11112658219854, "learning_rate": 7.708089097303634e-05, "loss": 8.171187400817871, "step": 263, "token_acc": 0.020974568045039797 }, { "epoch": 0.1547933157431838, "grad_norm": 10.659399873600039, "learning_rate": 7.737397420867527e-05, "loss": 8.115743637084961, "step": 264, "token_acc": 0.02047971949956172 }, { "epoch": 0.15537965406039284, "grad_norm": 6.822965896088382, "learning_rate": 7.766705744431418e-05, "loss": 8.126745223999023, "step": 265, "token_acc": 0.021024674916348633 }, { "epoch": 0.15596599237760186, "grad_norm": 8.330780930249595, "learning_rate": 7.796014067995311e-05, "loss": 8.091007232666016, "step": 266, "token_acc": 0.02025383767213807 }, { "epoch": 0.1565523306948109, "grad_norm": 8.047149462638302, "learning_rate": 7.825322391559203e-05, "loss": 8.087890625, "step": 267, "token_acc": 0.021566429280016775 }, { "epoch": 0.15713866901201995, "grad_norm": 7.575106694004534, "learning_rate": 7.854630715123095e-05, "loss": 8.138514518737793, "step": 268, "token_acc": 0.020341918429767968 }, { "epoch": 0.15772500732922898, "grad_norm": 7.071751041121472, "learning_rate": 7.883939038686987e-05, "loss": 7.99616813659668, "step": 269, "token_acc": 0.021258970001438397 }, { "epoch": 0.158311345646438, "grad_norm": 7.791014013059152, "learning_rate": 7.91324736225088e-05, "loss": 8.087705612182617, "step": 270, "token_acc": 0.020033058210134052 }, { "epoch": 0.15889768396364704, "grad_norm": 4.228416297373905, "learning_rate": 7.942555685814772e-05, "loss": 7.922218322753906, "step": 271, "token_acc": 0.023088780633423147 }, { "epoch": 0.15948402228085606, "grad_norm": 2.317191701919628, "learning_rate": 7.971864009378663e-05, "loss": 7.988491058349609, "step": 272, "token_acc": 0.022586674800372273 }, { "epoch": 0.1600703605980651, "grad_norm": 3.1131941773042953, "learning_rate": 8.001172332942556e-05, "loss": 7.979279041290283, "step": 273, "token_acc": 0.022202807978539518 }, { "epoch": 0.16065669891527412, "grad_norm": 5.1091131725230134, "learning_rate": 8.030480656506448e-05, "loss": 8.028542518615723, "step": 274, "token_acc": 0.022238672700770037 }, { "epoch": 0.16124303723248315, "grad_norm": 4.085072181013333, "learning_rate": 8.05978898007034e-05, "loss": 7.9908447265625, "step": 275, "token_acc": 0.02197045568352529 }, { "epoch": 0.16182937554969218, "grad_norm": 2.288307429583844, "learning_rate": 8.089097303634233e-05, "loss": 7.983156681060791, "step": 276, "token_acc": 0.02247299609471938 }, { "epoch": 0.1624157138669012, "grad_norm": 7.895985902368486, "learning_rate": 8.118405627198124e-05, "loss": 7.926133155822754, "step": 277, "token_acc": 0.02331120092378753 }, { "epoch": 0.16300205218411024, "grad_norm": 3.820383773726855, "learning_rate": 8.147713950762016e-05, "loss": 7.868941307067871, "step": 278, "token_acc": 0.024034693695067878 }, { "epoch": 0.16358839050131926, "grad_norm": 6.9392128157306265, "learning_rate": 8.177022274325908e-05, "loss": 7.8979411125183105, "step": 279, "token_acc": 0.02393419479226478 }, { "epoch": 0.1641747288185283, "grad_norm": 3.815135016073319, "learning_rate": 8.206330597889801e-05, "loss": 7.842163562774658, "step": 280, "token_acc": 0.024602287452519262 }, { "epoch": 0.16476106713573732, "grad_norm": 3.6512547425180597, "learning_rate": 8.235638921453694e-05, "loss": 7.815593719482422, "step": 281, "token_acc": 0.025859780940211397 }, { "epoch": 0.16534740545294635, "grad_norm": 7.268224507531726, "learning_rate": 8.264947245017585e-05, "loss": 7.821690559387207, "step": 282, "token_acc": 0.025709757477233232 }, { "epoch": 0.16593374377015538, "grad_norm": 3.272034002598221, "learning_rate": 8.294255568581477e-05, "loss": 7.8845014572143555, "step": 283, "token_acc": 0.02380989510235058 }, { "epoch": 0.1665200820873644, "grad_norm": 4.105038652483503, "learning_rate": 8.323563892145369e-05, "loss": 7.85194730758667, "step": 284, "token_acc": 0.02499056039968893 }, { "epoch": 0.16710642040457344, "grad_norm": 7.653802822717913, "learning_rate": 8.352872215709262e-05, "loss": 7.733232021331787, "step": 285, "token_acc": 0.026103846756814927 }, { "epoch": 0.16769275872178246, "grad_norm": 1.8435773358723115, "learning_rate": 8.382180539273155e-05, "loss": 7.811696529388428, "step": 286, "token_acc": 0.026082955468500982 }, { "epoch": 0.1682790970389915, "grad_norm": 14.044582027839947, "learning_rate": 8.411488862837045e-05, "loss": 7.8016557693481445, "step": 287, "token_acc": 0.026037299444329053 }, { "epoch": 0.16886543535620052, "grad_norm": 11.461243976306463, "learning_rate": 8.440797186400937e-05, "loss": 7.900874137878418, "step": 288, "token_acc": 0.022702794701528854 }, { "epoch": 0.16945177367340955, "grad_norm": 5.488517541389156, "learning_rate": 8.47010550996483e-05, "loss": 7.884481430053711, "step": 289, "token_acc": 0.023360143394362504 }, { "epoch": 0.17003811199061858, "grad_norm": 8.484756413355848, "learning_rate": 8.499413833528723e-05, "loss": 7.763012886047363, "step": 290, "token_acc": 0.02676448188501531 }, { "epoch": 0.1706244503078276, "grad_norm": 4.421983111333443, "learning_rate": 8.528722157092614e-05, "loss": 7.755222797393799, "step": 291, "token_acc": 0.025426050882831206 }, { "epoch": 0.17121078862503664, "grad_norm": 12.721238963219005, "learning_rate": 8.558030480656506e-05, "loss": 7.662722110748291, "step": 292, "token_acc": 0.026705650005930004 }, { "epoch": 0.17179712694224566, "grad_norm": 4.460130019396442, "learning_rate": 8.587338804220398e-05, "loss": 7.782173156738281, "step": 293, "token_acc": 0.02529513539407869 }, { "epoch": 0.1723834652594547, "grad_norm": 11.492135230545891, "learning_rate": 8.616647127784291e-05, "loss": 7.726858139038086, "step": 294, "token_acc": 0.025093573450926356 }, { "epoch": 0.17296980357666372, "grad_norm": 6.705542705901324, "learning_rate": 8.645955451348184e-05, "loss": 7.73845100402832, "step": 295, "token_acc": 0.026246969769304625 }, { "epoch": 0.17355614189387278, "grad_norm": 15.321151135680177, "learning_rate": 8.675263774912075e-05, "loss": 7.725770950317383, "step": 296, "token_acc": 0.026485684548575326 }, { "epoch": 0.1741424802110818, "grad_norm": 10.711760137487351, "learning_rate": 8.704572098475968e-05, "loss": 7.694210052490234, "step": 297, "token_acc": 0.026845782417523133 }, { "epoch": 0.17472881852829084, "grad_norm": 15.335026968597955, "learning_rate": 8.733880422039859e-05, "loss": 7.765105724334717, "step": 298, "token_acc": 0.026298105737273206 }, { "epoch": 0.17531515684549986, "grad_norm": 12.9802950210738, "learning_rate": 8.763188745603752e-05, "loss": 7.57515811920166, "step": 299, "token_acc": 0.029251924574345965 }, { "epoch": 0.1759014951627089, "grad_norm": 9.798442465078873, "learning_rate": 8.792497069167643e-05, "loss": 7.5946364402771, "step": 300, "token_acc": 0.027993413314514232 }, { "epoch": 0.17648783347991792, "grad_norm": 11.152273327422026, "learning_rate": 8.821805392731536e-05, "loss": 7.743631362915039, "step": 301, "token_acc": 0.027415174088690656 }, { "epoch": 0.17707417179712695, "grad_norm": 13.056494508565278, "learning_rate": 8.851113716295429e-05, "loss": 7.624645233154297, "step": 302, "token_acc": 0.029570342053916298 }, { "epoch": 0.17766051011433598, "grad_norm": 10.280070828201312, "learning_rate": 8.88042203985932e-05, "loss": 7.5809431076049805, "step": 303, "token_acc": 0.029406342668429397 }, { "epoch": 0.178246848431545, "grad_norm": 14.994484623702146, "learning_rate": 8.909730363423213e-05, "loss": 7.604696273803711, "step": 304, "token_acc": 0.028297901565079473 }, { "epoch": 0.17883318674875404, "grad_norm": 16.895591916059605, "learning_rate": 8.939038686987104e-05, "loss": 7.581826686859131, "step": 305, "token_acc": 0.028195779819885084 }, { "epoch": 0.17941952506596306, "grad_norm": 2.238952058149123, "learning_rate": 8.968347010550997e-05, "loss": 7.470364093780518, "step": 306, "token_acc": 0.03223189912907838 }, { "epoch": 0.1800058633831721, "grad_norm": 4.763228035828373, "learning_rate": 8.99765533411489e-05, "loss": 7.580644130706787, "step": 307, "token_acc": 0.030268167972149696 }, { "epoch": 0.18059220170038112, "grad_norm": 3.757399084390193, "learning_rate": 9.026963657678781e-05, "loss": 7.576322555541992, "step": 308, "token_acc": 0.029256197097563497 }, { "epoch": 0.18117854001759015, "grad_norm": 9.51844999336912, "learning_rate": 9.056271981242672e-05, "loss": 7.5692853927612305, "step": 309, "token_acc": 0.030617597597288722 }, { "epoch": 0.18176487833479918, "grad_norm": 3.427442184887727, "learning_rate": 9.085580304806565e-05, "loss": 7.4781341552734375, "step": 310, "token_acc": 0.03173392227244986 }, { "epoch": 0.1823512166520082, "grad_norm": 8.371967792200746, "learning_rate": 9.114888628370458e-05, "loss": 7.501359462738037, "step": 311, "token_acc": 0.03140233413607855 }, { "epoch": 0.18293755496921724, "grad_norm": 6.312168582950612, "learning_rate": 9.14419695193435e-05, "loss": 7.405187129974365, "step": 312, "token_acc": 0.03208052461153087 }, { "epoch": 0.18352389328642627, "grad_norm": 3.7641563791734614, "learning_rate": 9.173505275498242e-05, "loss": 7.448599815368652, "step": 313, "token_acc": 0.03280546749262446 }, { "epoch": 0.1841102316036353, "grad_norm": 9.448631141997817, "learning_rate": 9.202813599062133e-05, "loss": 7.440299987792969, "step": 314, "token_acc": 0.03276126768285996 }, { "epoch": 0.18469656992084432, "grad_norm": 3.90500189305802, "learning_rate": 9.232121922626026e-05, "loss": 7.399839878082275, "step": 315, "token_acc": 0.03215483883860879 }, { "epoch": 0.18528290823805335, "grad_norm": 7.098742387259987, "learning_rate": 9.261430246189919e-05, "loss": 7.342225074768066, "step": 316, "token_acc": 0.03405625994558801 }, { "epoch": 0.18586924655526238, "grad_norm": 6.579085040673552, "learning_rate": 9.290738569753811e-05, "loss": 7.373745918273926, "step": 317, "token_acc": 0.0345059090023882 }, { "epoch": 0.1864555848724714, "grad_norm": 2.9704252232035913, "learning_rate": 9.320046893317701e-05, "loss": 7.324457168579102, "step": 318, "token_acc": 0.03474796385485953 }, { "epoch": 0.18704192318968044, "grad_norm": 6.623089072181961, "learning_rate": 9.349355216881594e-05, "loss": 7.3864240646362305, "step": 319, "token_acc": 0.03265260121973398 }, { "epoch": 0.18762826150688947, "grad_norm": 5.704289522288452, "learning_rate": 9.378663540445487e-05, "loss": 7.358420372009277, "step": 320, "token_acc": 0.03454702320598669 }, { "epoch": 0.1882145998240985, "grad_norm": 3.4302450755989344, "learning_rate": 9.40797186400938e-05, "loss": 7.35070276260376, "step": 321, "token_acc": 0.03472312289401742 }, { "epoch": 0.18880093814130752, "grad_norm": 8.555791589944487, "learning_rate": 9.437280187573272e-05, "loss": 7.284658908843994, "step": 322, "token_acc": 0.03561939072511846 }, { "epoch": 0.18938727645851655, "grad_norm": 3.9378425757811066, "learning_rate": 9.466588511137162e-05, "loss": 7.287924289703369, "step": 323, "token_acc": 0.03677401988726328 }, { "epoch": 0.18997361477572558, "grad_norm": 6.8059336192624915, "learning_rate": 9.495896834701055e-05, "loss": 7.250253677368164, "step": 324, "token_acc": 0.03618896842636764 }, { "epoch": 0.19055995309293464, "grad_norm": 3.4663918608481024, "learning_rate": 9.525205158264948e-05, "loss": 7.141970157623291, "step": 325, "token_acc": 0.03980846498516945 }, { "epoch": 0.19114629141014366, "grad_norm": 6.494804356916958, "learning_rate": 9.55451348182884e-05, "loss": 7.144181251525879, "step": 326, "token_acc": 0.03971016146516803 }, { "epoch": 0.1917326297273527, "grad_norm": 7.4809562091856545, "learning_rate": 9.583821805392732e-05, "loss": 7.219866752624512, "step": 327, "token_acc": 0.03793140071339942 }, { "epoch": 0.19231896804456172, "grad_norm": 4.6179864476947525, "learning_rate": 9.613130128956623e-05, "loss": 7.127732753753662, "step": 328, "token_acc": 0.04009841262331149 }, { "epoch": 0.19290530636177075, "grad_norm": 3.4159232581083847, "learning_rate": 9.642438452520516e-05, "loss": 7.109224319458008, "step": 329, "token_acc": 0.039040996668115314 }, { "epoch": 0.19349164467897978, "grad_norm": 11.101823923649848, "learning_rate": 9.671746776084409e-05, "loss": 7.121427536010742, "step": 330, "token_acc": 0.04288325556784838 }, { "epoch": 0.1940779829961888, "grad_norm": 4.4398761624838174, "learning_rate": 9.7010550996483e-05, "loss": 7.084875106811523, "step": 331, "token_acc": 0.04085425697774236 }, { "epoch": 0.19466432131339784, "grad_norm": 10.534448841630402, "learning_rate": 9.730363423212193e-05, "loss": 7.209066390991211, "step": 332, "token_acc": 0.03748962545692137 }, { "epoch": 0.19525065963060687, "grad_norm": 7.457683337075591, "learning_rate": 9.759671746776084e-05, "loss": 7.057285308837891, "step": 333, "token_acc": 0.042235192153104534 }, { "epoch": 0.1958369979478159, "grad_norm": 9.769992005909325, "learning_rate": 9.788980070339977e-05, "loss": 7.133115768432617, "step": 334, "token_acc": 0.040370114596897703 }, { "epoch": 0.19642333626502492, "grad_norm": 8.514258762313188, "learning_rate": 9.81828839390387e-05, "loss": 7.023362159729004, "step": 335, "token_acc": 0.04553765713707722 }, { "epoch": 0.19700967458223395, "grad_norm": 10.687729322728085, "learning_rate": 9.847596717467761e-05, "loss": 7.071666240692139, "step": 336, "token_acc": 0.04284351406487803 }, { "epoch": 0.19759601289944298, "grad_norm": 8.975853867533473, "learning_rate": 9.876905041031654e-05, "loss": 6.967942237854004, "step": 337, "token_acc": 0.04838330530563111 }, { "epoch": 0.198182351216652, "grad_norm": 8.085078352833444, "learning_rate": 9.906213364595545e-05, "loss": 6.97743558883667, "step": 338, "token_acc": 0.046405126285870346 }, { "epoch": 0.19876868953386104, "grad_norm": 6.473983769929311, "learning_rate": 9.935521688159438e-05, "loss": 7.011883735656738, "step": 339, "token_acc": 0.04605891806198015 }, { "epoch": 0.19935502785107007, "grad_norm": 9.353614949786364, "learning_rate": 9.964830011723329e-05, "loss": 6.90988302230835, "step": 340, "token_acc": 0.049334454030279966 }, { "epoch": 0.1999413661682791, "grad_norm": 8.191913891200802, "learning_rate": 9.994138335287222e-05, "loss": 7.003641128540039, "step": 341, "token_acc": 0.04576521646864575 }, { "epoch": 0.20052770448548812, "grad_norm": 6.53629218369338, "learning_rate": 0.00010023446658851114, "loss": 6.881128787994385, "step": 342, "token_acc": 0.04841722337155768 }, { "epoch": 0.20111404280269715, "grad_norm": 5.102785860731727, "learning_rate": 0.00010052754982415006, "loss": 6.955008029937744, "step": 343, "token_acc": 0.049230626908014434 }, { "epoch": 0.20170038111990618, "grad_norm": 8.215374373674763, "learning_rate": 0.00010082063305978898, "loss": 6.877338409423828, "step": 344, "token_acc": 0.051758990749952376 }, { "epoch": 0.2022867194371152, "grad_norm": 6.619862417211111, "learning_rate": 0.0001011137162954279, "loss": 6.9279584884643555, "step": 345, "token_acc": 0.04782156210250414 }, { "epoch": 0.20287305775432424, "grad_norm": 6.871229992803177, "learning_rate": 0.00010140679953106683, "loss": 6.894254684448242, "step": 346, "token_acc": 0.05075188976926333 }, { "epoch": 0.20345939607153327, "grad_norm": 7.020359484608488, "learning_rate": 0.00010169988276670575, "loss": 6.808967113494873, "step": 347, "token_acc": 0.05406310677071067 }, { "epoch": 0.2040457343887423, "grad_norm": 5.743485111044436, "learning_rate": 0.00010199296600234467, "loss": 6.807111740112305, "step": 348, "token_acc": 0.05539717470796795 }, { "epoch": 0.20463207270595132, "grad_norm": 6.647518761433814, "learning_rate": 0.00010228604923798358, "loss": 6.821077346801758, "step": 349, "token_acc": 0.05351821521958795 }, { "epoch": 0.20521841102316035, "grad_norm": 12.054921021367194, "learning_rate": 0.00010257913247362251, "loss": 6.858293533325195, "step": 350, "token_acc": 0.05407264933218066 }, { "epoch": 0.20580474934036938, "grad_norm": 6.711263415199636, "learning_rate": 0.00010287221570926143, "loss": 6.742191314697266, "step": 351, "token_acc": 0.05734490517518483 }, { "epoch": 0.2063910876575784, "grad_norm": 8.223553120178638, "learning_rate": 0.00010316529894490036, "loss": 6.734842300415039, "step": 352, "token_acc": 0.0565855038670826 }, { "epoch": 0.20697742597478747, "grad_norm": 8.348030935749714, "learning_rate": 0.00010345838218053928, "loss": 6.833942413330078, "step": 353, "token_acc": 0.05438101608165877 }, { "epoch": 0.2075637642919965, "grad_norm": 6.625395594162651, "learning_rate": 0.00010375146541617819, "loss": 6.7424821853637695, "step": 354, "token_acc": 0.05685099120952248 }, { "epoch": 0.20815010260920552, "grad_norm": 10.177371253947637, "learning_rate": 0.00010404454865181712, "loss": 6.679440975189209, "step": 355, "token_acc": 0.060094043887147335 }, { "epoch": 0.20873644092641455, "grad_norm": 8.4730174808182, "learning_rate": 0.00010433763188745604, "loss": 6.7848052978515625, "step": 356, "token_acc": 0.056823985378563 }, { "epoch": 0.20932277924362358, "grad_norm": 7.646075916543815, "learning_rate": 0.00010463071512309497, "loss": 6.736171245574951, "step": 357, "token_acc": 0.05594884082743405 }, { "epoch": 0.2099091175608326, "grad_norm": 10.782082900980125, "learning_rate": 0.00010492379835873387, "loss": 6.678497791290283, "step": 358, "token_acc": 0.06489317333530427 }, { "epoch": 0.21049545587804164, "grad_norm": 5.034854056114249, "learning_rate": 0.0001052168815943728, "loss": 6.620523929595947, "step": 359, "token_acc": 0.06533077781094371 }, { "epoch": 0.21108179419525067, "grad_norm": 11.317390689825455, "learning_rate": 0.00010550996483001172, "loss": 6.652674674987793, "step": 360, "token_acc": 0.059921380688700926 }, { "epoch": 0.2116681325124597, "grad_norm": 9.08330521548402, "learning_rate": 0.00010580304806565065, "loss": 6.76340389251709, "step": 361, "token_acc": 0.05683870749766746 }, { "epoch": 0.21225447082966872, "grad_norm": 4.192956432983848, "learning_rate": 0.00010609613130128958, "loss": 6.675985336303711, "step": 362, "token_acc": 0.061936511821048 }, { "epoch": 0.21284080914687775, "grad_norm": 4.929739288408198, "learning_rate": 0.00010638921453692848, "loss": 6.55964469909668, "step": 363, "token_acc": 0.06749772520473157 }, { "epoch": 0.21342714746408678, "grad_norm": 5.661717824511602, "learning_rate": 0.0001066822977725674, "loss": 6.6542816162109375, "step": 364, "token_acc": 0.06293617594069043 }, { "epoch": 0.2140134857812958, "grad_norm": 9.191242956620586, "learning_rate": 0.00010697538100820633, "loss": 6.536946773529053, "step": 365, "token_acc": 0.07048085066340454 }, { "epoch": 0.21459982409850484, "grad_norm": 3.8615696252789444, "learning_rate": 0.00010726846424384526, "loss": 6.442357063293457, "step": 366, "token_acc": 0.07190959032798268 }, { "epoch": 0.21518616241571387, "grad_norm": 7.489777371921469, "learning_rate": 0.00010756154747948417, "loss": 6.4940104484558105, "step": 367, "token_acc": 0.071526452620929 }, { "epoch": 0.2157725007329229, "grad_norm": 5.125394613804369, "learning_rate": 0.0001078546307151231, "loss": 6.479306221008301, "step": 368, "token_acc": 0.06973716851955572 }, { "epoch": 0.21635883905013192, "grad_norm": 7.661570494600948, "learning_rate": 0.00010814771395076202, "loss": 6.42294454574585, "step": 369, "token_acc": 0.07784033575807714 }, { "epoch": 0.21694517736734095, "grad_norm": 3.941237775704587, "learning_rate": 0.00010844079718640094, "loss": 6.3986897468566895, "step": 370, "token_acc": 0.07585258816686935 }, { "epoch": 0.21753151568454998, "grad_norm": 10.062581230596951, "learning_rate": 0.00010873388042203986, "loss": 6.433681964874268, "step": 371, "token_acc": 0.0742224188653176 }, { "epoch": 0.218117854001759, "grad_norm": 6.612360630538697, "learning_rate": 0.00010902696365767878, "loss": 6.455165386199951, "step": 372, "token_acc": 0.07427043767122381 }, { "epoch": 0.21870419231896804, "grad_norm": 7.044091251652804, "learning_rate": 0.00010932004689331771, "loss": 6.425081729888916, "step": 373, "token_acc": 0.0744649742234545 }, { "epoch": 0.21929053063617707, "grad_norm": 5.932340105583805, "learning_rate": 0.00010961313012895662, "loss": 6.407991409301758, "step": 374, "token_acc": 0.07746613139274158 }, { "epoch": 0.2198768689533861, "grad_norm": 5.892266088269297, "learning_rate": 0.00010990621336459555, "loss": 6.425785064697266, "step": 375, "token_acc": 0.07493806663140087 }, { "epoch": 0.22046320727059512, "grad_norm": 4.389198671491138, "learning_rate": 0.00011019929660023446, "loss": 6.331266403198242, "step": 376, "token_acc": 0.08358848984821347 }, { "epoch": 0.22104954558780415, "grad_norm": 5.671827948944255, "learning_rate": 0.00011049237983587339, "loss": 6.38395881652832, "step": 377, "token_acc": 0.0792953962153578 }, { "epoch": 0.22163588390501318, "grad_norm": 8.012472306467243, "learning_rate": 0.00011078546307151232, "loss": 6.316666126251221, "step": 378, "token_acc": 0.08367280922988721 }, { "epoch": 0.2222222222222222, "grad_norm": 4.066955994948399, "learning_rate": 0.00011107854630715123, "loss": 6.273752689361572, "step": 379, "token_acc": 0.08612666064057206 }, { "epoch": 0.22280856053943124, "grad_norm": 9.76137199232559, "learning_rate": 0.00011137162954279015, "loss": 6.331414222717285, "step": 380, "token_acc": 0.08063501627975812 }, { "epoch": 0.2233948988566403, "grad_norm": 5.712229411662863, "learning_rate": 0.00011166471277842907, "loss": 6.2297797203063965, "step": 381, "token_acc": 0.08709685939935022 }, { "epoch": 0.22398123717384932, "grad_norm": 7.143148053425372, "learning_rate": 0.000111957796014068, "loss": 6.208853244781494, "step": 382, "token_acc": 0.08924345350364703 }, { "epoch": 0.22456757549105835, "grad_norm": 4.872547665438867, "learning_rate": 0.00011225087924970693, "loss": 6.216014862060547, "step": 383, "token_acc": 0.08970910293501103 }, { "epoch": 0.22515391380826738, "grad_norm": 3.830088671315554, "learning_rate": 0.00011254396248534584, "loss": 6.143113136291504, "step": 384, "token_acc": 0.09522872445099217 }, { "epoch": 0.2257402521254764, "grad_norm": 4.079606120494896, "learning_rate": 0.00011283704572098476, "loss": 6.1694207191467285, "step": 385, "token_acc": 0.09036978551213279 }, { "epoch": 0.22632659044268544, "grad_norm": 6.619930436486503, "learning_rate": 0.00011313012895662368, "loss": 6.212316513061523, "step": 386, "token_acc": 0.09043562015199764 }, { "epoch": 0.22691292875989447, "grad_norm": 3.821554462672562, "learning_rate": 0.00011342321219226261, "loss": 6.152814865112305, "step": 387, "token_acc": 0.08981522863043875 }, { "epoch": 0.2274992670771035, "grad_norm": 6.269206135361615, "learning_rate": 0.00011371629542790154, "loss": 6.163750648498535, "step": 388, "token_acc": 0.09439272740663972 }, { "epoch": 0.22808560539431252, "grad_norm": 4.347102457246547, "learning_rate": 0.00011400937866354044, "loss": 6.199976921081543, "step": 389, "token_acc": 0.09205683538266946 }, { "epoch": 0.22867194371152155, "grad_norm": 5.599273769887971, "learning_rate": 0.00011430246189917936, "loss": 6.06398868560791, "step": 390, "token_acc": 0.09922983679060975 }, { "epoch": 0.22925828202873058, "grad_norm": 3.6439645079874134, "learning_rate": 0.00011459554513481829, "loss": 6.026261329650879, "step": 391, "token_acc": 0.10002185377978236 }, { "epoch": 0.2298446203459396, "grad_norm": 4.434991454044366, "learning_rate": 0.00011488862837045722, "loss": 6.05257511138916, "step": 392, "token_acc": 0.0997451107489922 }, { "epoch": 0.23043095866314864, "grad_norm": 5.0655362788626075, "learning_rate": 0.00011518171160609615, "loss": 6.026375770568848, "step": 393, "token_acc": 0.10212818069036754 }, { "epoch": 0.23101729698035767, "grad_norm": 2.665448202349342, "learning_rate": 0.00011547479484173505, "loss": 5.91487455368042, "step": 394, "token_acc": 0.10893922910333187 }, { "epoch": 0.2316036352975667, "grad_norm": 9.06084480222526, "learning_rate": 0.00011576787807737397, "loss": 6.025001525878906, "step": 395, "token_acc": 0.09560397704138593 }, { "epoch": 0.23218997361477572, "grad_norm": 4.673902059041878, "learning_rate": 0.0001160609613130129, "loss": 5.972105979919434, "step": 396, "token_acc": 0.10496248382923674 }, { "epoch": 0.23277631193198475, "grad_norm": 5.941030418706131, "learning_rate": 0.00011635404454865183, "loss": 6.013332843780518, "step": 397, "token_acc": 0.10335180193018191 }, { "epoch": 0.23336265024919378, "grad_norm": 6.045884946008961, "learning_rate": 0.00011664712778429074, "loss": 6.084906578063965, "step": 398, "token_acc": 0.0951407001049074 }, { "epoch": 0.2339489885664028, "grad_norm": 4.883329485507978, "learning_rate": 0.00011694021101992965, "loss": 5.929563522338867, "step": 399, "token_acc": 0.10884320825369688 }, { "epoch": 0.23453532688361184, "grad_norm": 3.717426479172605, "learning_rate": 0.00011723329425556858, "loss": 5.903749465942383, "step": 400, "token_acc": 0.10962016385501569 }, { "epoch": 0.23512166520082087, "grad_norm": 5.287308426237174, "learning_rate": 0.00011752637749120751, "loss": 5.854625701904297, "step": 401, "token_acc": 0.11044980023881325 }, { "epoch": 0.2357080035180299, "grad_norm": 4.8190976688777925, "learning_rate": 0.00011781946072684644, "loss": 5.83117151260376, "step": 402, "token_acc": 0.11495696625648988 }, { "epoch": 0.23629434183523892, "grad_norm": 7.065135868946098, "learning_rate": 0.00011811254396248535, "loss": 5.819321632385254, "step": 403, "token_acc": 0.11133195636605188 }, { "epoch": 0.23688068015244795, "grad_norm": 3.253187254276713, "learning_rate": 0.00011840562719812426, "loss": 5.825146198272705, "step": 404, "token_acc": 0.11440163551033493 }, { "epoch": 0.23746701846965698, "grad_norm": 7.519045532066019, "learning_rate": 0.00011869871043376319, "loss": 5.731228351593018, "step": 405, "token_acc": 0.12204654047717918 }, { "epoch": 0.238053356786866, "grad_norm": 4.9885069960570725, "learning_rate": 0.00011899179366940212, "loss": 5.820338249206543, "step": 406, "token_acc": 0.11491831572603985 }, { "epoch": 0.23863969510407504, "grad_norm": 4.72737283734716, "learning_rate": 0.00011928487690504103, "loss": 5.732571601867676, "step": 407, "token_acc": 0.11993047359719426 }, { "epoch": 0.23922603342128407, "grad_norm": 5.431511393200514, "learning_rate": 0.00011957796014067996, "loss": 5.79659366607666, "step": 408, "token_acc": 0.11445042926524408 }, { "epoch": 0.23981237173849312, "grad_norm": 3.4479967425078395, "learning_rate": 0.00011987104337631887, "loss": 5.674437522888184, "step": 409, "token_acc": 0.12330751174325962 }, { "epoch": 0.24039871005570215, "grad_norm": 3.5530065378300013, "learning_rate": 0.0001201641266119578, "loss": 5.674881458282471, "step": 410, "token_acc": 0.12472383795608989 }, { "epoch": 0.24098504837291118, "grad_norm": 5.900044462285062, "learning_rate": 0.00012045720984759671, "loss": 5.541609764099121, "step": 411, "token_acc": 0.13444416534092496 }, { "epoch": 0.2415713866901202, "grad_norm": 3.1343076342691605, "learning_rate": 0.00012075029308323564, "loss": 5.634923934936523, "step": 412, "token_acc": 0.12509247986210442 }, { "epoch": 0.24215772500732924, "grad_norm": 5.272992968186741, "learning_rate": 0.00012104337631887457, "loss": 5.641106128692627, "step": 413, "token_acc": 0.124788313913987 }, { "epoch": 0.24274406332453827, "grad_norm": 5.1129186705206076, "learning_rate": 0.00012133645955451348, "loss": 5.602048873901367, "step": 414, "token_acc": 0.12926272099443858 }, { "epoch": 0.2433304016417473, "grad_norm": 5.17456541351753, "learning_rate": 0.00012162954279015241, "loss": 5.635060787200928, "step": 415, "token_acc": 0.12416406893501389 }, { "epoch": 0.24391673995895632, "grad_norm": 6.50594811222227, "learning_rate": 0.00012192262602579132, "loss": 5.699517726898193, "step": 416, "token_acc": 0.11834356482794695 }, { "epoch": 0.24450307827616535, "grad_norm": 3.001109057680427, "learning_rate": 0.00012221570926143025, "loss": 5.493686199188232, "step": 417, "token_acc": 0.13267444406217746 }, { "epoch": 0.24508941659337438, "grad_norm": 5.560878125283581, "learning_rate": 0.00012250879249706918, "loss": 5.516180992126465, "step": 418, "token_acc": 0.13398374313032077 }, { "epoch": 0.2456757549105834, "grad_norm": 3.538854348735928, "learning_rate": 0.0001228018757327081, "loss": 5.538155555725098, "step": 419, "token_acc": 0.12949818185604256 }, { "epoch": 0.24626209322779244, "grad_norm": 6.284893238784886, "learning_rate": 0.000123094958968347, "loss": 5.549795150756836, "step": 420, "token_acc": 0.1259016342565413 }, { "epoch": 0.24684843154500147, "grad_norm": 4.829788058367907, "learning_rate": 0.00012338804220398593, "loss": 5.550993919372559, "step": 421, "token_acc": 0.1300300552139439 }, { "epoch": 0.2474347698622105, "grad_norm": 4.630256565786252, "learning_rate": 0.00012368112543962486, "loss": 5.513519763946533, "step": 422, "token_acc": 0.13096768376717888 }, { "epoch": 0.24802110817941952, "grad_norm": 4.508957731839889, "learning_rate": 0.00012397420867526378, "loss": 5.520076751708984, "step": 423, "token_acc": 0.13224918754586434 }, { "epoch": 0.24860744649662855, "grad_norm": 3.723587986939637, "learning_rate": 0.0001242672919109027, "loss": 5.342349052429199, "step": 424, "token_acc": 0.14435009797517961 }, { "epoch": 0.24919378481383758, "grad_norm": 5.648678919123222, "learning_rate": 0.0001245603751465416, "loss": 5.36069393157959, "step": 425, "token_acc": 0.14558231572792513 }, { "epoch": 0.2497801231310466, "grad_norm": 3.801985637338194, "learning_rate": 0.00012485345838218054, "loss": 5.387795448303223, "step": 426, "token_acc": 0.14029441563859588 }, { "epoch": 0.25036646144825564, "grad_norm": 3.9776691412757303, "learning_rate": 0.00012514654161781947, "loss": 5.389639854431152, "step": 427, "token_acc": 0.1379084034812384 }, { "epoch": 0.2509527997654647, "grad_norm": 5.036749741166907, "learning_rate": 0.0001254396248534584, "loss": 5.404719829559326, "step": 428, "token_acc": 0.1399757166626547 }, { "epoch": 0.2515391380826737, "grad_norm": 3.005253859269899, "learning_rate": 0.00012573270808909732, "loss": 5.345599174499512, "step": 429, "token_acc": 0.14372790345304234 }, { "epoch": 0.25212547639988275, "grad_norm": 7.125839984359642, "learning_rate": 0.00012602579132473625, "loss": 5.4137797355651855, "step": 430, "token_acc": 0.13999409500926474 }, { "epoch": 0.25271181471709175, "grad_norm": 4.434683344779012, "learning_rate": 0.00012631887456037515, "loss": 5.269342422485352, "step": 431, "token_acc": 0.14908022973169852 }, { "epoch": 0.2532981530343008, "grad_norm": 7.551460242481809, "learning_rate": 0.00012661195779601405, "loss": 5.3578643798828125, "step": 432, "token_acc": 0.1418907453417722 }, { "epoch": 0.2538844913515098, "grad_norm": 3.6294283005438133, "learning_rate": 0.00012690504103165298, "loss": 5.3116960525512695, "step": 433, "token_acc": 0.14279131091511885 }, { "epoch": 0.25447082966871887, "grad_norm": 7.059565815092824, "learning_rate": 0.0001271981242672919, "loss": 5.324859142303467, "step": 434, "token_acc": 0.14112250264249973 }, { "epoch": 0.25505716798592787, "grad_norm": 4.587438686951819, "learning_rate": 0.00012749120750293083, "loss": 5.269500732421875, "step": 435, "token_acc": 0.14550385486596573 }, { "epoch": 0.2556435063031369, "grad_norm": 4.278257181215779, "learning_rate": 0.00012778429073856976, "loss": 5.255743026733398, "step": 436, "token_acc": 0.1474247030161913 }, { "epoch": 0.2562298446203459, "grad_norm": 5.488653213285053, "learning_rate": 0.00012807737397420868, "loss": 5.346251487731934, "step": 437, "token_acc": 0.14210230942724933 }, { "epoch": 0.256816182937555, "grad_norm": 4.0312762992681845, "learning_rate": 0.0001283704572098476, "loss": 5.282540321350098, "step": 438, "token_acc": 0.14412451198412093 }, { "epoch": 0.257402521254764, "grad_norm": 4.6300663375694135, "learning_rate": 0.00012866354044548654, "loss": 5.18112850189209, "step": 439, "token_acc": 0.15508663307693524 }, { "epoch": 0.25798885957197304, "grad_norm": 3.7038026719717814, "learning_rate": 0.00012895662368112544, "loss": 5.1809892654418945, "step": 440, "token_acc": 0.15596789965814592 }, { "epoch": 0.25857519788918204, "grad_norm": 4.25864334099672, "learning_rate": 0.00012924970691676437, "loss": 5.202960968017578, "step": 441, "token_acc": 0.15038723513588623 }, { "epoch": 0.2591615362063911, "grad_norm": 7.2267914422965225, "learning_rate": 0.00012954279015240327, "loss": 5.208362579345703, "step": 442, "token_acc": 0.1490746610182594 }, { "epoch": 0.2597478745236001, "grad_norm": 3.4195045685076977, "learning_rate": 0.0001298358733880422, "loss": 5.213512897491455, "step": 443, "token_acc": 0.14599194512346145 }, { "epoch": 0.26033421284080915, "grad_norm": 6.422194572580395, "learning_rate": 0.00013012895662368112, "loss": 5.196712017059326, "step": 444, "token_acc": 0.14994420899885275 }, { "epoch": 0.26092055115801815, "grad_norm": 3.3713794363129006, "learning_rate": 0.00013042203985932005, "loss": 5.15119743347168, "step": 445, "token_acc": 0.15283893677732643 }, { "epoch": 0.2615068894752272, "grad_norm": 6.796911619094949, "learning_rate": 0.00013071512309495897, "loss": 5.200589179992676, "step": 446, "token_acc": 0.14966023193871295 }, { "epoch": 0.2620932277924362, "grad_norm": 4.4739001822028035, "learning_rate": 0.0001310082063305979, "loss": 5.130384922027588, "step": 447, "token_acc": 0.1567592307631606 }, { "epoch": 0.26267956610964527, "grad_norm": 3.5174103629453457, "learning_rate": 0.00013130128956623683, "loss": 5.191955089569092, "step": 448, "token_acc": 0.14917900104740844 }, { "epoch": 0.26326590442685427, "grad_norm": 4.7302865891328505, "learning_rate": 0.00013159437280187573, "loss": 5.087374687194824, "step": 449, "token_acc": 0.15747861805378852 }, { "epoch": 0.2638522427440633, "grad_norm": 3.42149699764738, "learning_rate": 0.00013188745603751466, "loss": 5.169719696044922, "step": 450, "token_acc": 0.1484520241550834 }, { "epoch": 0.2644385810612723, "grad_norm": 4.3774170350702715, "learning_rate": 0.00013218053927315358, "loss": 5.158553123474121, "step": 451, "token_acc": 0.15196783623354493 }, { "epoch": 0.2650249193784814, "grad_norm": 5.697101553122713, "learning_rate": 0.00013247362250879248, "loss": 5.155243873596191, "step": 452, "token_acc": 0.152841854995793 }, { "epoch": 0.26561125769569044, "grad_norm": 4.18377075223577, "learning_rate": 0.0001327667057444314, "loss": 5.048179626464844, "step": 453, "token_acc": 0.15982089928738097 }, { "epoch": 0.26619759601289944, "grad_norm": 4.365075888953617, "learning_rate": 0.00013305978898007034, "loss": 4.967833995819092, "step": 454, "token_acc": 0.16580578245860272 }, { "epoch": 0.2667839343301085, "grad_norm": 3.6152317842258745, "learning_rate": 0.00013335287221570926, "loss": 5.094614505767822, "step": 455, "token_acc": 0.1549353852172373 }, { "epoch": 0.2673702726473175, "grad_norm": 6.368131028673277, "learning_rate": 0.0001336459554513482, "loss": 5.008109092712402, "step": 456, "token_acc": 0.16077867009315772 }, { "epoch": 0.26795661096452655, "grad_norm": 3.1043652406502407, "learning_rate": 0.00013393903868698712, "loss": 5.056596755981445, "step": 457, "token_acc": 0.159609351413082 }, { "epoch": 0.26854294928173555, "grad_norm": 5.99537880938208, "learning_rate": 0.00013423212192262602, "loss": 5.066808223724365, "step": 458, "token_acc": 0.15492473345855712 }, { "epoch": 0.2691292875989446, "grad_norm": 4.018389706152762, "learning_rate": 0.00013452520515826495, "loss": 5.08259916305542, "step": 459, "token_acc": 0.15455270631581955 }, { "epoch": 0.2697156259161536, "grad_norm": 5.212084138386232, "learning_rate": 0.00013481828839390387, "loss": 5.064886569976807, "step": 460, "token_acc": 0.15543550494113842 }, { "epoch": 0.27030196423336267, "grad_norm": 3.972706194092152, "learning_rate": 0.0001351113716295428, "loss": 5.083024024963379, "step": 461, "token_acc": 0.1531443466927338 }, { "epoch": 0.27088830255057167, "grad_norm": 3.6225981166084416, "learning_rate": 0.00013540445486518173, "loss": 4.947565078735352, "step": 462, "token_acc": 0.16832664583762866 }, { "epoch": 0.2714746408677807, "grad_norm": 4.149631590130578, "learning_rate": 0.00013569753810082063, "loss": 4.913524627685547, "step": 463, "token_acc": 0.1697766859721047 }, { "epoch": 0.2720609791849897, "grad_norm": 3.171155110106739, "learning_rate": 0.00013599062133645955, "loss": 4.927159309387207, "step": 464, "token_acc": 0.16713602620664147 }, { "epoch": 0.2726473175021988, "grad_norm": 4.074171411318694, "learning_rate": 0.00013628370457209848, "loss": 5.02904748916626, "step": 465, "token_acc": 0.15604169678279445 }, { "epoch": 0.2732336558194078, "grad_norm": 4.747758175097898, "learning_rate": 0.0001365767878077374, "loss": 4.936007022857666, "step": 466, "token_acc": 0.1657910727883883 }, { "epoch": 0.27381999413661684, "grad_norm": 4.091176120075038, "learning_rate": 0.0001368698710433763, "loss": 4.963770866394043, "step": 467, "token_acc": 0.16243046835206842 }, { "epoch": 0.27440633245382584, "grad_norm": 4.007489318793903, "learning_rate": 0.00013716295427901524, "loss": 4.950592517852783, "step": 468, "token_acc": 0.1655808786833166 }, { "epoch": 0.2749926707710349, "grad_norm": 2.914476230383308, "learning_rate": 0.00013745603751465416, "loss": 4.875804424285889, "step": 469, "token_acc": 0.16606682310164045 }, { "epoch": 0.2755790090882439, "grad_norm": 3.9232989048126585, "learning_rate": 0.0001377491207502931, "loss": 4.926511764526367, "step": 470, "token_acc": 0.16421727409512665 }, { "epoch": 0.27616534740545295, "grad_norm": 3.6144053806786967, "learning_rate": 0.00013804220398593202, "loss": 4.87507438659668, "step": 471, "token_acc": 0.1707926773812153 }, { "epoch": 0.27675168572266196, "grad_norm": 5.396727216600319, "learning_rate": 0.00013833528722157095, "loss": 4.926799297332764, "step": 472, "token_acc": 0.163388599276186 }, { "epoch": 0.277338024039871, "grad_norm": 2.987639672318001, "learning_rate": 0.00013862837045720985, "loss": 4.893725872039795, "step": 473, "token_acc": 0.1669549369529036 }, { "epoch": 0.27792436235708, "grad_norm": 4.989929850230553, "learning_rate": 0.00013892145369284877, "loss": 4.840160846710205, "step": 474, "token_acc": 0.17184066359218794 }, { "epoch": 0.27851070067428907, "grad_norm": 3.6110403848522625, "learning_rate": 0.0001392145369284877, "loss": 4.768781661987305, "step": 475, "token_acc": 0.17762787195163693 }, { "epoch": 0.27909703899149807, "grad_norm": 4.650105290437723, "learning_rate": 0.0001395076201641266, "loss": 4.8848748207092285, "step": 476, "token_acc": 0.1663902528915049 }, { "epoch": 0.2796833773087071, "grad_norm": 3.3139178413774797, "learning_rate": 0.00013980070339976553, "loss": 4.829568862915039, "step": 477, "token_acc": 0.169737405414885 }, { "epoch": 0.2802697156259161, "grad_norm": 3.909762394566097, "learning_rate": 0.00014009378663540445, "loss": 4.835160255432129, "step": 478, "token_acc": 0.17160266965770152 }, { "epoch": 0.2808560539431252, "grad_norm": 5.21933032998255, "learning_rate": 0.00014038686987104338, "loss": 4.901421070098877, "step": 479, "token_acc": 0.16438791649370235 }, { "epoch": 0.28144239226033424, "grad_norm": 2.863127004079764, "learning_rate": 0.0001406799531066823, "loss": 4.756311416625977, "step": 480, "token_acc": 0.17576272293721826 }, { "epoch": 0.28202873057754324, "grad_norm": 6.043684580459418, "learning_rate": 0.00014097303634232124, "loss": 4.799533843994141, "step": 481, "token_acc": 0.171276678914862 }, { "epoch": 0.2826150688947523, "grad_norm": 3.4298587559569516, "learning_rate": 0.00014126611957796016, "loss": 4.817670822143555, "step": 482, "token_acc": 0.16861697080869098 }, { "epoch": 0.2832014072119613, "grad_norm": 4.7766752965713914, "learning_rate": 0.00014155920281359906, "loss": 4.818942070007324, "step": 483, "token_acc": 0.17058975480918767 }, { "epoch": 0.28378774552917035, "grad_norm": 2.9046544792376454, "learning_rate": 0.000141852286049238, "loss": 4.799318790435791, "step": 484, "token_acc": 0.17104853684090415 }, { "epoch": 0.28437408384637936, "grad_norm": 4.580249712240795, "learning_rate": 0.0001421453692848769, "loss": 4.807289123535156, "step": 485, "token_acc": 0.17053504809736608 }, { "epoch": 0.2849604221635884, "grad_norm": 3.44091745195971, "learning_rate": 0.00014243845252051582, "loss": 4.7312235832214355, "step": 486, "token_acc": 0.17689699428010078 }, { "epoch": 0.2855467604807974, "grad_norm": 4.388778649894042, "learning_rate": 0.00014273153575615474, "loss": 4.7546892166137695, "step": 487, "token_acc": 0.17446729876108408 }, { "epoch": 0.28613309879800647, "grad_norm": 3.31251169387694, "learning_rate": 0.00014302461899179367, "loss": 4.791693687438965, "step": 488, "token_acc": 0.17254942921705546 }, { "epoch": 0.28671943711521547, "grad_norm": 4.17733589463874, "learning_rate": 0.0001433177022274326, "loss": 4.675629615783691, "step": 489, "token_acc": 0.18031281183909964 }, { "epoch": 0.2873057754324245, "grad_norm": 2.374452580098422, "learning_rate": 0.00014361078546307153, "loss": 4.753551483154297, "step": 490, "token_acc": 0.1734588895392462 }, { "epoch": 0.2878921137496335, "grad_norm": 4.126681562736818, "learning_rate": 0.00014390386869871045, "loss": 4.665050029754639, "step": 491, "token_acc": 0.1825853501330672 }, { "epoch": 0.2884784520668426, "grad_norm": 3.2581736524192833, "learning_rate": 0.00014419695193434938, "loss": 4.734525203704834, "step": 492, "token_acc": 0.17628407539496407 }, { "epoch": 0.2890647903840516, "grad_norm": 4.195978878494247, "learning_rate": 0.00014449003516998828, "loss": 4.720024585723877, "step": 493, "token_acc": 0.17701848379319582 }, { "epoch": 0.28965112870126064, "grad_norm": 3.6177234625347356, "learning_rate": 0.00014478311840562718, "loss": 4.724521636962891, "step": 494, "token_acc": 0.1753375245325339 }, { "epoch": 0.29023746701846964, "grad_norm": 5.118608365216558, "learning_rate": 0.0001450762016412661, "loss": 4.729095458984375, "step": 495, "token_acc": 0.17579916941202026 }, { "epoch": 0.2908238053356787, "grad_norm": 3.2562283272992527, "learning_rate": 0.00014536928487690504, "loss": 4.689156532287598, "step": 496, "token_acc": 0.17751681708030417 }, { "epoch": 0.2914101436528877, "grad_norm": 3.780850825032303, "learning_rate": 0.00014566236811254396, "loss": 4.734624862670898, "step": 497, "token_acc": 0.17406583503173662 }, { "epoch": 0.29199648197009676, "grad_norm": 2.826405194074065, "learning_rate": 0.0001459554513481829, "loss": 4.635134696960449, "step": 498, "token_acc": 0.1823283880751012 }, { "epoch": 0.29258282028730576, "grad_norm": 5.399876723900705, "learning_rate": 0.00014624853458382182, "loss": 4.663265705108643, "step": 499, "token_acc": 0.18205944086874704 }, { "epoch": 0.2931691586045148, "grad_norm": 2.2487711399775905, "learning_rate": 0.00014654161781946074, "loss": 4.7593278884887695, "step": 500, "token_acc": 0.17025784507648223 }, { "epoch": 0.2937554969217238, "grad_norm": 6.120777372840855, "learning_rate": 0.00014683470105509967, "loss": 4.659954071044922, "step": 501, "token_acc": 0.18130114849434723 }, { "epoch": 0.29434183523893287, "grad_norm": 3.507488648517917, "learning_rate": 0.0001471277842907386, "loss": 4.660583972930908, "step": 502, "token_acc": 0.1801478194306521 }, { "epoch": 0.29492817355614187, "grad_norm": 4.311213813931897, "learning_rate": 0.00014742086752637747, "loss": 4.589162826538086, "step": 503, "token_acc": 0.18823028781911766 }, { "epoch": 0.2955145118733509, "grad_norm": 3.6903797707518136, "learning_rate": 0.0001477139507620164, "loss": 4.702823638916016, "step": 504, "token_acc": 0.17542404941931272 }, { "epoch": 0.2961008501905599, "grad_norm": 3.999987914497177, "learning_rate": 0.00014800703399765533, "loss": 4.7104949951171875, "step": 505, "token_acc": 0.17259399415633728 }, { "epoch": 0.296687188507769, "grad_norm": 3.5066109164601493, "learning_rate": 0.00014830011723329425, "loss": 4.665492057800293, "step": 506, "token_acc": 0.17730312938611967 }, { "epoch": 0.297273526824978, "grad_norm": 4.236748264188352, "learning_rate": 0.00014859320046893318, "loss": 4.674029350280762, "step": 507, "token_acc": 0.17897982681661173 }, { "epoch": 0.29785986514218704, "grad_norm": 2.9556318702165614, "learning_rate": 0.0001488862837045721, "loss": 4.589700222015381, "step": 508, "token_acc": 0.18660020687325846 }, { "epoch": 0.2984462034593961, "grad_norm": 4.6849527569842255, "learning_rate": 0.00014917936694021103, "loss": 4.662505149841309, "step": 509, "token_acc": 0.17884270052942447 }, { "epoch": 0.2990325417766051, "grad_norm": 2.8180560272428927, "learning_rate": 0.00014947245017584996, "loss": 4.572309494018555, "step": 510, "token_acc": 0.1850498285811631 }, { "epoch": 0.29961888009381415, "grad_norm": 5.387123123783376, "learning_rate": 0.00014976553341148886, "loss": 4.636224746704102, "step": 511, "token_acc": 0.18315400590684713 }, { "epoch": 0.30020521841102316, "grad_norm": 2.9832438924506883, "learning_rate": 0.0001500586166471278, "loss": 4.695883750915527, "step": 512, "token_acc": 0.173301160852899 }, { "epoch": 0.3007915567282322, "grad_norm": 3.8761121383422603, "learning_rate": 0.0001503516998827667, "loss": 4.663463115692139, "step": 513, "token_acc": 0.17561182598916533 }, { "epoch": 0.3013778950454412, "grad_norm": 3.128858579933917, "learning_rate": 0.00015064478311840562, "loss": 4.602659225463867, "step": 514, "token_acc": 0.1811925587177552 }, { "epoch": 0.30196423336265027, "grad_norm": 4.0941899353636, "learning_rate": 0.00015093786635404454, "loss": 4.607230186462402, "step": 515, "token_acc": 0.18291015931873172 }, { "epoch": 0.30255057167985927, "grad_norm": 2.796167830470198, "learning_rate": 0.00015123094958968347, "loss": 4.67079496383667, "step": 516, "token_acc": 0.17750250983016816 }, { "epoch": 0.3031369099970683, "grad_norm": 2.9856667127973027, "learning_rate": 0.0001515240328253224, "loss": 4.563554763793945, "step": 517, "token_acc": 0.18527267098680805 }, { "epoch": 0.3037232483142773, "grad_norm": 3.5670505137011355, "learning_rate": 0.00015181711606096132, "loss": 4.562885284423828, "step": 518, "token_acc": 0.1843352118555625 }, { "epoch": 0.3043095866314864, "grad_norm": 4.594787663944211, "learning_rate": 0.00015211019929660025, "loss": 4.633196830749512, "step": 519, "token_acc": 0.17984546399342943 }, { "epoch": 0.3048959249486954, "grad_norm": 2.9036673221380154, "learning_rate": 0.00015240328253223915, "loss": 4.570888996124268, "step": 520, "token_acc": 0.1806706297843537 }, { "epoch": 0.30548226326590444, "grad_norm": 3.9279495400431843, "learning_rate": 0.00015269636576787808, "loss": 4.625429153442383, "step": 521, "token_acc": 0.17875706272781958 }, { "epoch": 0.30606860158311344, "grad_norm": 2.677651938949762, "learning_rate": 0.000152989449003517, "loss": 4.521848678588867, "step": 522, "token_acc": 0.18934240957410545 }, { "epoch": 0.3066549399003225, "grad_norm": 2.916817592188344, "learning_rate": 0.0001532825322391559, "loss": 4.53693962097168, "step": 523, "token_acc": 0.18639868637110016 }, { "epoch": 0.3072412782175315, "grad_norm": 3.6160515179985886, "learning_rate": 0.00015357561547479483, "loss": 4.605867385864258, "step": 524, "token_acc": 0.17898636395385836 }, { "epoch": 0.30782761653474056, "grad_norm": 2.3900981644929695, "learning_rate": 0.00015386869871043376, "loss": 4.502622604370117, "step": 525, "token_acc": 0.18960608354056294 }, { "epoch": 0.30841395485194956, "grad_norm": 4.384180131112934, "learning_rate": 0.0001541617819460727, "loss": 4.515637397766113, "step": 526, "token_acc": 0.18843714218207594 }, { "epoch": 0.3090002931691586, "grad_norm": 2.223020506544334, "learning_rate": 0.00015445486518171161, "loss": 4.538787841796875, "step": 527, "token_acc": 0.18645295901879946 }, { "epoch": 0.3095866314863676, "grad_norm": 3.0828032954448212, "learning_rate": 0.00015474794841735054, "loss": 4.515827178955078, "step": 528, "token_acc": 0.18863375736725604 }, { "epoch": 0.31017296980357667, "grad_norm": 3.5244506426862072, "learning_rate": 0.00015504103165298944, "loss": 4.562204360961914, "step": 529, "token_acc": 0.18436511640640685 }, { "epoch": 0.31075930812078567, "grad_norm": 3.480444321801198, "learning_rate": 0.00015533411488862837, "loss": 4.504839897155762, "step": 530, "token_acc": 0.1886315175002635 }, { "epoch": 0.3113456464379947, "grad_norm": 3.373181923178309, "learning_rate": 0.0001556271981242673, "loss": 4.509557247161865, "step": 531, "token_acc": 0.1863521138277864 }, { "epoch": 0.31193198475520373, "grad_norm": 3.4565189514168075, "learning_rate": 0.00015592028135990622, "loss": 4.46580696105957, "step": 532, "token_acc": 0.1925276481780916 }, { "epoch": 0.3125183230724128, "grad_norm": 3.6008447696143775, "learning_rate": 0.00015621336459554515, "loss": 4.435978889465332, "step": 533, "token_acc": 0.19529056900029182 }, { "epoch": 0.3131046613896218, "grad_norm": 2.6080844785138124, "learning_rate": 0.00015650644783118405, "loss": 4.429786205291748, "step": 534, "token_acc": 0.195933408321611 }, { "epoch": 0.31369099970683084, "grad_norm": 3.9659555466611365, "learning_rate": 0.00015679953106682298, "loss": 4.470489978790283, "step": 535, "token_acc": 0.19054969739230412 }, { "epoch": 0.3142773380240399, "grad_norm": 3.190122712935279, "learning_rate": 0.0001570926143024619, "loss": 4.506174087524414, "step": 536, "token_acc": 0.1851689226060097 }, { "epoch": 0.3148636763412489, "grad_norm": 3.6036123067173675, "learning_rate": 0.00015738569753810083, "loss": 4.515082359313965, "step": 537, "token_acc": 0.18538086563940442 }, { "epoch": 0.31545001465845796, "grad_norm": 3.6821928414589697, "learning_rate": 0.00015767878077373973, "loss": 4.4421234130859375, "step": 538, "token_acc": 0.19192258497952222 }, { "epoch": 0.31603635297566696, "grad_norm": 4.579216954552315, "learning_rate": 0.00015797186400937866, "loss": 4.4645490646362305, "step": 539, "token_acc": 0.19084683586215595 }, { "epoch": 0.316622691292876, "grad_norm": 2.157258419917712, "learning_rate": 0.0001582649472450176, "loss": 4.472905158996582, "step": 540, "token_acc": 0.19049191918931263 }, { "epoch": 0.317209029610085, "grad_norm": 5.125895666636859, "learning_rate": 0.00015855803048065651, "loss": 4.477203369140625, "step": 541, "token_acc": 0.1892764893158076 }, { "epoch": 0.31779536792729407, "grad_norm": 2.4641923245822013, "learning_rate": 0.00015885111371629544, "loss": 4.522824287414551, "step": 542, "token_acc": 0.1858015348256725 }, { "epoch": 0.31838170624450307, "grad_norm": 4.113866472139571, "learning_rate": 0.00015914419695193437, "loss": 4.524702072143555, "step": 543, "token_acc": 0.18356269733107675 }, { "epoch": 0.3189680445617121, "grad_norm": 3.2474792562328254, "learning_rate": 0.00015943728018757327, "loss": 4.578799247741699, "step": 544, "token_acc": 0.17872985772194935 }, { "epoch": 0.31955438287892113, "grad_norm": 3.0974908436309, "learning_rate": 0.0001597303634232122, "loss": 4.3994879722595215, "step": 545, "token_acc": 0.19697764501283002 }, { "epoch": 0.3201407211961302, "grad_norm": 3.0481663775573913, "learning_rate": 0.00016002344665885112, "loss": 4.469366550445557, "step": 546, "token_acc": 0.1881582026679148 }, { "epoch": 0.3207270595133392, "grad_norm": 2.4321962734241316, "learning_rate": 0.00016031652989449002, "loss": 4.466395854949951, "step": 547, "token_acc": 0.1915929492623904 }, { "epoch": 0.32131339783054824, "grad_norm": 3.7179095396605657, "learning_rate": 0.00016060961313012895, "loss": 4.5207414627075195, "step": 548, "token_acc": 0.18468413558425256 }, { "epoch": 0.32189973614775724, "grad_norm": 2.833110587710605, "learning_rate": 0.00016090269636576788, "loss": 4.469472885131836, "step": 549, "token_acc": 0.18806375661174515 }, { "epoch": 0.3224860744649663, "grad_norm": 3.7109472767311766, "learning_rate": 0.0001611957796014068, "loss": 4.402301788330078, "step": 550, "token_acc": 0.19425649739490036 }, { "epoch": 0.3230724127821753, "grad_norm": 2.6285673973131027, "learning_rate": 0.00016148886283704573, "loss": 4.452491760253906, "step": 551, "token_acc": 0.18925668332399154 }, { "epoch": 0.32365875109938436, "grad_norm": 3.959979555121441, "learning_rate": 0.00016178194607268466, "loss": 4.421535491943359, "step": 552, "token_acc": 0.19194938166322772 }, { "epoch": 0.32424508941659336, "grad_norm": 3.0536149646835398, "learning_rate": 0.00016207502930832359, "loss": 4.468191623687744, "step": 553, "token_acc": 0.1887056466466493 }, { "epoch": 0.3248314277338024, "grad_norm": 1.9387140243529846, "learning_rate": 0.00016236811254396249, "loss": 4.35836124420166, "step": 554, "token_acc": 0.20117667843532056 }, { "epoch": 0.3254177660510114, "grad_norm": 3.4994199637137653, "learning_rate": 0.0001626611957796014, "loss": 4.385231971740723, "step": 555, "token_acc": 0.19869304672345237 }, { "epoch": 0.32600410436822047, "grad_norm": 3.312716764496414, "learning_rate": 0.0001629542790152403, "loss": 4.452750205993652, "step": 556, "token_acc": 0.1890747709967253 }, { "epoch": 0.32659044268542947, "grad_norm": 3.42770904470029, "learning_rate": 0.00016324736225087924, "loss": 4.490422248840332, "step": 557, "token_acc": 0.1838231117582968 }, { "epoch": 0.32717678100263853, "grad_norm": 2.7378232095594153, "learning_rate": 0.00016354044548651817, "loss": 4.38887882232666, "step": 558, "token_acc": 0.1926649779893164 }, { "epoch": 0.32776311931984753, "grad_norm": 3.151619286716409, "learning_rate": 0.0001638335287221571, "loss": 4.399745464324951, "step": 559, "token_acc": 0.1940473683399508 }, { "epoch": 0.3283494576370566, "grad_norm": 2.323076613373229, "learning_rate": 0.00016412661195779602, "loss": 4.426732063293457, "step": 560, "token_acc": 0.19175233375274034 }, { "epoch": 0.3289357959542656, "grad_norm": 3.015180056428602, "learning_rate": 0.00016441969519343495, "loss": 4.401934623718262, "step": 561, "token_acc": 0.19396697702772409 }, { "epoch": 0.32952213427147464, "grad_norm": 2.9430845511558554, "learning_rate": 0.00016471277842907388, "loss": 4.353907585144043, "step": 562, "token_acc": 0.19816314268547394 }, { "epoch": 0.33010847258868364, "grad_norm": 3.1828030565555006, "learning_rate": 0.0001650058616647128, "loss": 4.3354268074035645, "step": 563, "token_acc": 0.20195604316287127 }, { "epoch": 0.3306948109058927, "grad_norm": 2.888642548657088, "learning_rate": 0.0001652989449003517, "loss": 4.373227119445801, "step": 564, "token_acc": 0.19680482395342389 }, { "epoch": 0.33128114922310176, "grad_norm": 3.727612892595648, "learning_rate": 0.0001655920281359906, "loss": 4.3680419921875, "step": 565, "token_acc": 0.1959753619077607 }, { "epoch": 0.33186748754031076, "grad_norm": 2.1111682287067963, "learning_rate": 0.00016588511137162953, "loss": 4.369876861572266, "step": 566, "token_acc": 0.19430340072089014 }, { "epoch": 0.3324538258575198, "grad_norm": 4.386398150910521, "learning_rate": 0.00016617819460726846, "loss": 4.440284252166748, "step": 567, "token_acc": 0.18975706525690428 }, { "epoch": 0.3330401641747288, "grad_norm": 2.1936625491596784, "learning_rate": 0.00016647127784290739, "loss": 4.39591121673584, "step": 568, "token_acc": 0.19313192965279477 }, { "epoch": 0.33362650249193787, "grad_norm": 3.2652331006577655, "learning_rate": 0.0001667643610785463, "loss": 4.384404182434082, "step": 569, "token_acc": 0.19393213633751824 }, { "epoch": 0.33421284080914687, "grad_norm": 2.9407242805369176, "learning_rate": 0.00016705744431418524, "loss": 4.441839218139648, "step": 570, "token_acc": 0.187965974278624 }, { "epoch": 0.33479917912635593, "grad_norm": 2.020857634519983, "learning_rate": 0.00016735052754982417, "loss": 4.312097549438477, "step": 571, "token_acc": 0.20122706531290696 }, { "epoch": 0.33538551744356493, "grad_norm": 3.835885045193653, "learning_rate": 0.0001676436107854631, "loss": 4.369815826416016, "step": 572, "token_acc": 0.19539235749128023 }, { "epoch": 0.335971855760774, "grad_norm": 2.347547928381657, "learning_rate": 0.00016793669402110202, "loss": 4.357950210571289, "step": 573, "token_acc": 0.1969721098618764 }, { "epoch": 0.336558194077983, "grad_norm": 3.178535706394947, "learning_rate": 0.0001682297772567409, "loss": 4.401962757110596, "step": 574, "token_acc": 0.19254838608488348 }, { "epoch": 0.33714453239519204, "grad_norm": 2.9710820442098504, "learning_rate": 0.00016852286049237982, "loss": 4.346654891967773, "step": 575, "token_acc": 0.19889154413195606 }, { "epoch": 0.33773087071240104, "grad_norm": 2.453938641919291, "learning_rate": 0.00016881594372801875, "loss": 4.352444648742676, "step": 576, "token_acc": 0.1979722464027856 }, { "epoch": 0.3383172090296101, "grad_norm": 3.4726701501672452, "learning_rate": 0.00016910902696365768, "loss": 4.365447044372559, "step": 577, "token_acc": 0.19561060986647677 }, { "epoch": 0.3389035473468191, "grad_norm": 2.857005802336948, "learning_rate": 0.0001694021101992966, "loss": 4.408263206481934, "step": 578, "token_acc": 0.19032571101545198 }, { "epoch": 0.33948988566402816, "grad_norm": 2.8735270224763916, "learning_rate": 0.00016969519343493553, "loss": 4.34467077255249, "step": 579, "token_acc": 0.1974039596171496 }, { "epoch": 0.34007622398123716, "grad_norm": 2.478860741700152, "learning_rate": 0.00016998827667057446, "loss": 4.283778190612793, "step": 580, "token_acc": 0.20183136785016081 }, { "epoch": 0.3406625622984462, "grad_norm": 3.2839133701779444, "learning_rate": 0.00017028135990621338, "loss": 4.350898742675781, "step": 581, "token_acc": 0.19648146771847494 }, { "epoch": 0.3412489006156552, "grad_norm": 2.3226703308053547, "learning_rate": 0.00017057444314185228, "loss": 4.3489179611206055, "step": 582, "token_acc": 0.1970700570292471 }, { "epoch": 0.34183523893286427, "grad_norm": 3.370819504708132, "learning_rate": 0.0001708675263774912, "loss": 4.374782562255859, "step": 583, "token_acc": 0.19299956099124407 }, { "epoch": 0.3424215772500733, "grad_norm": 2.1288858852401185, "learning_rate": 0.0001711606096131301, "loss": 4.364096641540527, "step": 584, "token_acc": 0.19417908127096736 }, { "epoch": 0.34300791556728233, "grad_norm": 4.065434395585224, "learning_rate": 0.00017145369284876904, "loss": 4.389575958251953, "step": 585, "token_acc": 0.19208265079562506 }, { "epoch": 0.34359425388449133, "grad_norm": 2.359015143636616, "learning_rate": 0.00017174677608440797, "loss": 4.342007160186768, "step": 586, "token_acc": 0.19773779136999545 }, { "epoch": 0.3441805922017004, "grad_norm": 3.283454040141777, "learning_rate": 0.0001720398593200469, "loss": 4.32462215423584, "step": 587, "token_acc": 0.19869578104930344 }, { "epoch": 0.3447669305189094, "grad_norm": 2.6925374365327404, "learning_rate": 0.00017233294255568582, "loss": 4.30461311340332, "step": 588, "token_acc": 0.19965218236661814 }, { "epoch": 0.34535326883611844, "grad_norm": 3.203168425369097, "learning_rate": 0.00017262602579132475, "loss": 4.323164463043213, "step": 589, "token_acc": 0.1986490328422959 }, { "epoch": 0.34593960715332744, "grad_norm": 2.1656274852807917, "learning_rate": 0.00017291910902696367, "loss": 4.297807693481445, "step": 590, "token_acc": 0.19995629763878714 }, { "epoch": 0.3465259454705365, "grad_norm": 4.069325046705623, "learning_rate": 0.00017321219226260257, "loss": 4.383184909820557, "step": 591, "token_acc": 0.19115910507504608 }, { "epoch": 0.34711228378774556, "grad_norm": 2.2855486485486205, "learning_rate": 0.0001735052754982415, "loss": 4.2431535720825195, "step": 592, "token_acc": 0.20432408538963573 }, { "epoch": 0.34769862210495456, "grad_norm": 3.1184100370119503, "learning_rate": 0.00017379835873388043, "loss": 4.280085563659668, "step": 593, "token_acc": 0.20095056735844813 }, { "epoch": 0.3482849604221636, "grad_norm": 2.4848425502508524, "learning_rate": 0.00017409144196951936, "loss": 4.318235397338867, "step": 594, "token_acc": 0.19635401074574188 }, { "epoch": 0.3488712987393726, "grad_norm": 1.8658468156999546, "learning_rate": 0.00017438452520515826, "loss": 4.249464511871338, "step": 595, "token_acc": 0.20389160030824557 }, { "epoch": 0.34945763705658167, "grad_norm": 3.8716497641936853, "learning_rate": 0.00017467760844079718, "loss": 4.287879943847656, "step": 596, "token_acc": 0.20160418120694523 }, { "epoch": 0.3500439753737907, "grad_norm": 2.0078757419361355, "learning_rate": 0.0001749706916764361, "loss": 4.318107604980469, "step": 597, "token_acc": 0.19689672905707276 }, { "epoch": 0.35063031369099973, "grad_norm": 2.884315560846559, "learning_rate": 0.00017526377491207504, "loss": 4.311124801635742, "step": 598, "token_acc": 0.19751258977310998 }, { "epoch": 0.35121665200820873, "grad_norm": 2.7484929202368944, "learning_rate": 0.00017555685814771397, "loss": 4.3280205726623535, "step": 599, "token_acc": 0.19439567476013922 }, { "epoch": 0.3518029903254178, "grad_norm": 2.2047357723896215, "learning_rate": 0.00017584994138335287, "loss": 4.284904479980469, "step": 600, "token_acc": 0.2003533531799161 }, { "epoch": 0.3523893286426268, "grad_norm": 3.1107430335760817, "learning_rate": 0.0001761430246189918, "loss": 4.276244640350342, "step": 601, "token_acc": 0.20092996600544433 }, { "epoch": 0.35297566695983584, "grad_norm": 2.0833190258734384, "learning_rate": 0.00017643610785463072, "loss": 4.340874671936035, "step": 602, "token_acc": 0.1932120958569206 }, { "epoch": 0.35356200527704484, "grad_norm": 2.514109913489446, "learning_rate": 0.00017672919109026965, "loss": 4.243865013122559, "step": 603, "token_acc": 0.2062694576927058 }, { "epoch": 0.3541483435942539, "grad_norm": 2.103358985280864, "learning_rate": 0.00017702227432590857, "loss": 4.242303848266602, "step": 604, "token_acc": 0.20248158859914006 }, { "epoch": 0.3547346819114629, "grad_norm": 2.7013474943179427, "learning_rate": 0.00017731535756154747, "loss": 4.238546371459961, "step": 605, "token_acc": 0.2064178454785494 }, { "epoch": 0.35532102022867196, "grad_norm": 2.559143552036905, "learning_rate": 0.0001776084407971864, "loss": 4.238079071044922, "step": 606, "token_acc": 0.2062076735148546 }, { "epoch": 0.35590735854588096, "grad_norm": 3.261877485645797, "learning_rate": 0.00017790152403282533, "loss": 4.278648376464844, "step": 607, "token_acc": 0.20108211677410812 }, { "epoch": 0.35649369686309, "grad_norm": 2.317306909803825, "learning_rate": 0.00017819460726846426, "loss": 4.323906421661377, "step": 608, "token_acc": 0.19419433440189857 }, { "epoch": 0.357080035180299, "grad_norm": 2.779401267702493, "learning_rate": 0.00017848769050410316, "loss": 4.259009838104248, "step": 609, "token_acc": 0.20273544964205653 }, { "epoch": 0.35766637349750807, "grad_norm": 2.425582147671826, "learning_rate": 0.00017878077373974208, "loss": 4.300567626953125, "step": 610, "token_acc": 0.19641605305305818 }, { "epoch": 0.3582527118147171, "grad_norm": 3.840767861503941, "learning_rate": 0.000179073856975381, "loss": 4.302745819091797, "step": 611, "token_acc": 0.19933901918976546 }, { "epoch": 0.35883905013192613, "grad_norm": 2.0539525690315923, "learning_rate": 0.00017936694021101994, "loss": 4.270225524902344, "step": 612, "token_acc": 0.20239351806149336 }, { "epoch": 0.35942538844913513, "grad_norm": 2.757055231102099, "learning_rate": 0.00017966002344665886, "loss": 4.273060321807861, "step": 613, "token_acc": 0.20284586436350519 }, { "epoch": 0.3600117267663442, "grad_norm": 2.4590772409139188, "learning_rate": 0.0001799531066822978, "loss": 4.348200798034668, "step": 614, "token_acc": 0.19452398829712866 }, { "epoch": 0.3605980650835532, "grad_norm": 2.351065205116467, "learning_rate": 0.0001802461899179367, "loss": 4.231278896331787, "step": 615, "token_acc": 0.20566704885304 }, { "epoch": 0.36118440340076224, "grad_norm": 2.6967747274489073, "learning_rate": 0.00018053927315357562, "loss": 4.202358245849609, "step": 616, "token_acc": 0.20790187042558958 }, { "epoch": 0.36177074171797124, "grad_norm": 2.457169452104043, "learning_rate": 0.00018083235638921455, "loss": 4.189783096313477, "step": 617, "token_acc": 0.2092393323817407 }, { "epoch": 0.3623570800351803, "grad_norm": 2.6406543537554494, "learning_rate": 0.00018112543962485345, "loss": 4.248398780822754, "step": 618, "token_acc": 0.20415585384439128 }, { "epoch": 0.3629434183523893, "grad_norm": 2.636979858364093, "learning_rate": 0.00018141852286049237, "loss": 4.2956647872924805, "step": 619, "token_acc": 0.19798279583544948 }, { "epoch": 0.36352975666959836, "grad_norm": 2.1239145706551477, "learning_rate": 0.0001817116060961313, "loss": 4.238051414489746, "step": 620, "token_acc": 0.20078229568697775 }, { "epoch": 0.3641160949868074, "grad_norm": 3.3368681844812147, "learning_rate": 0.00018200468933177023, "loss": 4.244442939758301, "step": 621, "token_acc": 0.20395911836060496 }, { "epoch": 0.3647024333040164, "grad_norm": 1.6621377139225848, "learning_rate": 0.00018229777256740915, "loss": 4.1781325340271, "step": 622, "token_acc": 0.20918979862408785 }, { "epoch": 0.36528877162122547, "grad_norm": 4.310592781243259, "learning_rate": 0.00018259085580304808, "loss": 4.3004279136657715, "step": 623, "token_acc": 0.20038858582515773 }, { "epoch": 0.3658751099384345, "grad_norm": 2.093247029693665, "learning_rate": 0.000182883939038687, "loss": 4.260926723480225, "step": 624, "token_acc": 0.19990207701121143 }, { "epoch": 0.36646144825564353, "grad_norm": 2.875707055580328, "learning_rate": 0.0001831770222743259, "loss": 4.207454681396484, "step": 625, "token_acc": 0.20758294094486066 }, { "epoch": 0.36704778657285253, "grad_norm": 2.6211863582176185, "learning_rate": 0.00018347010550996484, "loss": 4.265021324157715, "step": 626, "token_acc": 0.20177751420488696 }, { "epoch": 0.3676341248900616, "grad_norm": 1.9973754906759469, "learning_rate": 0.00018376318874560374, "loss": 4.224093437194824, "step": 627, "token_acc": 0.2052583200410933 }, { "epoch": 0.3682204632072706, "grad_norm": 3.055874514068887, "learning_rate": 0.00018405627198124266, "loss": 4.233010292053223, "step": 628, "token_acc": 0.20451630232201198 }, { "epoch": 0.36880680152447964, "grad_norm": 2.4864402857444987, "learning_rate": 0.0001843493552168816, "loss": 4.277772426605225, "step": 629, "token_acc": 0.1984967330453631 }, { "epoch": 0.36939313984168864, "grad_norm": 2.8483866732175764, "learning_rate": 0.00018464243845252052, "loss": 4.214406967163086, "step": 630, "token_acc": 0.2045053015060296 }, { "epoch": 0.3699794781588977, "grad_norm": 2.3936202913166955, "learning_rate": 0.00018493552168815945, "loss": 4.237961292266846, "step": 631, "token_acc": 0.2022509319896603 }, { "epoch": 0.3705658164761067, "grad_norm": 3.009788283392778, "learning_rate": 0.00018522860492379837, "loss": 4.20882511138916, "step": 632, "token_acc": 0.20298626039816806 }, { "epoch": 0.37115215479331576, "grad_norm": 2.4938104821910785, "learning_rate": 0.0001855216881594373, "loss": 4.223635673522949, "step": 633, "token_acc": 0.20166257528877618 }, { "epoch": 0.37173849311052476, "grad_norm": 2.5118341234503103, "learning_rate": 0.00018581477139507623, "loss": 4.199278831481934, "step": 634, "token_acc": 0.20652520926610862 }, { "epoch": 0.3723248314277338, "grad_norm": 2.6770372173940804, "learning_rate": 0.00018610785463071513, "loss": 4.208103179931641, "step": 635, "token_acc": 0.20533533771131682 }, { "epoch": 0.3729111697449428, "grad_norm": 1.8882324339634855, "learning_rate": 0.00018640093786635403, "loss": 4.210339546203613, "step": 636, "token_acc": 0.20622000492226916 }, { "epoch": 0.3734975080621519, "grad_norm": 2.3713912485881776, "learning_rate": 0.00018669402110199295, "loss": 4.169193267822266, "step": 637, "token_acc": 0.2085651312236603 }, { "epoch": 0.3740838463793609, "grad_norm": 2.603907089459109, "learning_rate": 0.00018698710433763188, "loss": 4.230314254760742, "step": 638, "token_acc": 0.20190862220638509 }, { "epoch": 0.37467018469656993, "grad_norm": 1.5405340251070139, "learning_rate": 0.0001872801875732708, "loss": 4.1755571365356445, "step": 639, "token_acc": 0.20599684323248932 }, { "epoch": 0.37525652301377893, "grad_norm": 3.2916341186210762, "learning_rate": 0.00018757327080890974, "loss": 4.271075248718262, "step": 640, "token_acc": 0.19831048402698068 }, { "epoch": 0.375842861330988, "grad_norm": 1.811396346941996, "learning_rate": 0.00018786635404454866, "loss": 4.204448699951172, "step": 641, "token_acc": 0.2036325982459875 }, { "epoch": 0.376429199648197, "grad_norm": 3.0144909423445023, "learning_rate": 0.0001881594372801876, "loss": 4.224557399749756, "step": 642, "token_acc": 0.20237541943952156 }, { "epoch": 0.37701553796540604, "grad_norm": 1.8952182270429252, "learning_rate": 0.00018845252051582652, "loss": 4.130260467529297, "step": 643, "token_acc": 0.21324102862728378 }, { "epoch": 0.37760187628261505, "grad_norm": 3.1569638666645696, "learning_rate": 0.00018874560375146544, "loss": 4.187472343444824, "step": 644, "token_acc": 0.20633202372931822 }, { "epoch": 0.3781882145998241, "grad_norm": 2.0198294319321546, "learning_rate": 0.00018903868698710432, "loss": 4.226292610168457, "step": 645, "token_acc": 0.20234970628671417 }, { "epoch": 0.3787745529170331, "grad_norm": 2.2042089620698837, "learning_rate": 0.00018933177022274324, "loss": 4.2882232666015625, "step": 646, "token_acc": 0.19653220827330725 }, { "epoch": 0.37936089123424216, "grad_norm": 2.2112993057427555, "learning_rate": 0.00018962485345838217, "loss": 4.209702491760254, "step": 647, "token_acc": 0.2061230998192835 }, { "epoch": 0.37994722955145116, "grad_norm": 2.0707954712151477, "learning_rate": 0.0001899179366940211, "loss": 4.175907135009766, "step": 648, "token_acc": 0.20961082661613126 }, { "epoch": 0.3805335678686602, "grad_norm": 2.5413261692069367, "learning_rate": 0.00019021101992966003, "loss": 4.276764869689941, "step": 649, "token_acc": 0.19810358943200368 }, { "epoch": 0.3811199061858693, "grad_norm": 1.9520696277506953, "learning_rate": 0.00019050410316529895, "loss": 4.206478118896484, "step": 650, "token_acc": 0.20120135590426244 }, { "epoch": 0.3817062445030783, "grad_norm": 2.592106622393218, "learning_rate": 0.00019079718640093788, "loss": 4.13508415222168, "step": 651, "token_acc": 0.21329034596661586 }, { "epoch": 0.38229258282028733, "grad_norm": 2.1197731947154654, "learning_rate": 0.0001910902696365768, "loss": 4.264552593231201, "step": 652, "token_acc": 0.19786214849640735 }, { "epoch": 0.38287892113749633, "grad_norm": 2.529222559482183, "learning_rate": 0.00019138335287221573, "loss": 4.22831392288208, "step": 653, "token_acc": 0.20204209385635905 }, { "epoch": 0.3834652594547054, "grad_norm": 1.9404252012608736, "learning_rate": 0.00019167643610785463, "loss": 4.157746315002441, "step": 654, "token_acc": 0.20975458975344685 }, { "epoch": 0.3840515977719144, "grad_norm": 2.512600821555176, "learning_rate": 0.00019196951934349353, "loss": 4.170214653015137, "step": 655, "token_acc": 0.20631282485127453 }, { "epoch": 0.38463793608912344, "grad_norm": 2.701434050883918, "learning_rate": 0.00019226260257913246, "loss": 4.170098304748535, "step": 656, "token_acc": 0.20622947980064404 }, { "epoch": 0.38522427440633245, "grad_norm": 2.4431766897358385, "learning_rate": 0.0001925556858147714, "loss": 4.179371356964111, "step": 657, "token_acc": 0.205891707492241 }, { "epoch": 0.3858106127235415, "grad_norm": 2.0730537353964187, "learning_rate": 0.00019284876905041032, "loss": 4.178879737854004, "step": 658, "token_acc": 0.2054048947415381 }, { "epoch": 0.3863969510407505, "grad_norm": 2.6491191267137, "learning_rate": 0.00019314185228604924, "loss": 4.181436538696289, "step": 659, "token_acc": 0.20544109301439537 }, { "epoch": 0.38698328935795956, "grad_norm": 1.8885377245486985, "learning_rate": 0.00019343493552168817, "loss": 4.13986873626709, "step": 660, "token_acc": 0.20963559195035902 }, { "epoch": 0.38756962767516856, "grad_norm": 2.2991514894392853, "learning_rate": 0.0001937280187573271, "loss": 4.190619468688965, "step": 661, "token_acc": 0.20524464103072138 }, { "epoch": 0.3881559659923776, "grad_norm": 2.362925115009617, "learning_rate": 0.000194021101992966, "loss": 4.182242393493652, "step": 662, "token_acc": 0.20488680661232828 }, { "epoch": 0.3887423043095866, "grad_norm": 2.640894521065949, "learning_rate": 0.00019431418522860493, "loss": 4.087883949279785, "step": 663, "token_acc": 0.2144021990511306 }, { "epoch": 0.3893286426267957, "grad_norm": 1.6606458079428112, "learning_rate": 0.00019460726846424385, "loss": 4.110154151916504, "step": 664, "token_acc": 0.212002778562323 }, { "epoch": 0.3899149809440047, "grad_norm": 2.1844881931565054, "learning_rate": 0.00019490035169988278, "loss": 4.138330459594727, "step": 665, "token_acc": 0.21075519561346842 }, { "epoch": 0.39050131926121373, "grad_norm": 2.2601177318952486, "learning_rate": 0.00019519343493552168, "loss": 4.164782524108887, "step": 666, "token_acc": 0.20874460868147818 }, { "epoch": 0.39108765757842273, "grad_norm": 2.497526862000262, "learning_rate": 0.0001954865181711606, "loss": 4.187143325805664, "step": 667, "token_acc": 0.20432454563823338 }, { "epoch": 0.3916739958956318, "grad_norm": 2.557495163505733, "learning_rate": 0.00019577960140679953, "loss": 4.090910911560059, "step": 668, "token_acc": 0.21460647173811437 }, { "epoch": 0.3922603342128408, "grad_norm": 2.0140974221078505, "learning_rate": 0.00019607268464243846, "loss": 4.16569185256958, "step": 669, "token_acc": 0.20671116569120165 }, { "epoch": 0.39284667253004985, "grad_norm": 2.9724759185931, "learning_rate": 0.0001963657678780774, "loss": 4.216405868530273, "step": 670, "token_acc": 0.2014534242441915 }, { "epoch": 0.39343301084725885, "grad_norm": 1.6546005712160807, "learning_rate": 0.0001966588511137163, "loss": 4.114252090454102, "step": 671, "token_acc": 0.2104150753874002 }, { "epoch": 0.3940193491644679, "grad_norm": 2.8122543091915144, "learning_rate": 0.00019695193434935522, "loss": 4.128007888793945, "step": 672, "token_acc": 0.21066403190755523 }, { "epoch": 0.3946056874816769, "grad_norm": 1.5690590844479788, "learning_rate": 0.00019724501758499414, "loss": 4.1066789627075195, "step": 673, "token_acc": 0.21355748085737752 }, { "epoch": 0.39519202579888596, "grad_norm": 2.3403342855611395, "learning_rate": 0.00019753810082063307, "loss": 4.1145219802856445, "step": 674, "token_acc": 0.21464247988419558 }, { "epoch": 0.39577836411609496, "grad_norm": 2.202178188154737, "learning_rate": 0.000197831184056272, "loss": 4.1572265625, "step": 675, "token_acc": 0.2079692853238772 }, { "epoch": 0.396364702433304, "grad_norm": 1.5476916958439135, "learning_rate": 0.0001981242672919109, "loss": 4.0672760009765625, "step": 676, "token_acc": 0.2172768555814968 }, { "epoch": 0.3969510407505131, "grad_norm": 2.347658204195443, "learning_rate": 0.00019841735052754982, "loss": 4.104122161865234, "step": 677, "token_acc": 0.21262940536317215 }, { "epoch": 0.3975373790677221, "grad_norm": 2.217887236862945, "learning_rate": 0.00019871043376318875, "loss": 4.072694778442383, "step": 678, "token_acc": 0.21492755265842337 }, { "epoch": 0.39812371738493113, "grad_norm": 2.5696013306792467, "learning_rate": 0.00019900351699882768, "loss": 4.1688995361328125, "step": 679, "token_acc": 0.20589306705592084 }, { "epoch": 0.39871005570214013, "grad_norm": 2.7365388251139335, "learning_rate": 0.00019929660023446658, "loss": 4.068920612335205, "step": 680, "token_acc": 0.21451251669284382 }, { "epoch": 0.3992963940193492, "grad_norm": 1.8692370350576855, "learning_rate": 0.0001995896834701055, "loss": 4.217334747314453, "step": 681, "token_acc": 0.20007284423389154 }, { "epoch": 0.3998827323365582, "grad_norm": 3.320321895015174, "learning_rate": 0.00019988276670574443, "loss": 4.143980979919434, "step": 682, "token_acc": 0.20586730825088617 }, { "epoch": 0.40046907065376725, "grad_norm": 1.7168458465318832, "learning_rate": 0.00020017584994138336, "loss": 4.147307395935059, "step": 683, "token_acc": 0.20697672622220162 }, { "epoch": 0.40105540897097625, "grad_norm": 1.9489371333865713, "learning_rate": 0.0002004689331770223, "loss": 4.114063262939453, "step": 684, "token_acc": 0.21234300050067564 }, { "epoch": 0.4016417472881853, "grad_norm": 2.113578416771434, "learning_rate": 0.00020076201641266121, "loss": 4.083517074584961, "step": 685, "token_acc": 0.21295801050374152 }, { "epoch": 0.4022280856053943, "grad_norm": 1.5800534078436324, "learning_rate": 0.00020105509964830011, "loss": 4.111965179443359, "step": 686, "token_acc": 0.2108251211817903 }, { "epoch": 0.40281442392260336, "grad_norm": 1.8988496429416992, "learning_rate": 0.00020134818288393904, "loss": 4.109745979309082, "step": 687, "token_acc": 0.2105494365060982 }, { "epoch": 0.40340076223981236, "grad_norm": 2.404042301757026, "learning_rate": 0.00020164126611957797, "loss": 4.127511501312256, "step": 688, "token_acc": 0.2111118657882225 }, { "epoch": 0.4039871005570214, "grad_norm": 2.216419345224772, "learning_rate": 0.00020193434935521687, "loss": 4.121830940246582, "step": 689, "token_acc": 0.21161733556519877 }, { "epoch": 0.4045734388742304, "grad_norm": 2.032865459940097, "learning_rate": 0.0002022274325908558, "loss": 4.079171657562256, "step": 690, "token_acc": 0.21245708353035506 }, { "epoch": 0.4051597771914395, "grad_norm": 2.439975321293924, "learning_rate": 0.00020252051582649472, "loss": 4.1306328773498535, "step": 691, "token_acc": 0.2077358432999756 }, { "epoch": 0.4057461155086485, "grad_norm": 1.4006861575532985, "learning_rate": 0.00020281359906213365, "loss": 4.150385856628418, "step": 692, "token_acc": 0.204880960103795 }, { "epoch": 0.40633245382585753, "grad_norm": 2.5333265490794483, "learning_rate": 0.00020310668229777258, "loss": 4.1458282470703125, "step": 693, "token_acc": 0.20784267167283782 }, { "epoch": 0.40691879214306653, "grad_norm": 1.935447343788751, "learning_rate": 0.0002033997655334115, "loss": 4.165396213531494, "step": 694, "token_acc": 0.2063111892190331 }, { "epoch": 0.4075051304602756, "grad_norm": 2.277939455747646, "learning_rate": 0.00020369284876905043, "loss": 4.051935195922852, "step": 695, "token_acc": 0.21691328870691245 }, { "epoch": 0.4080914687774846, "grad_norm": 1.9818369549169748, "learning_rate": 0.00020398593200468933, "loss": 4.093563079833984, "step": 696, "token_acc": 0.2116513357303584 }, { "epoch": 0.40867780709469365, "grad_norm": 1.75615010635649, "learning_rate": 0.00020427901524032826, "loss": 4.061014652252197, "step": 697, "token_acc": 0.2163832424849383 }, { "epoch": 0.40926414541190265, "grad_norm": 2.0761384136039487, "learning_rate": 0.00020457209847596716, "loss": 4.1219072341918945, "step": 698, "token_acc": 0.2075109321380693 }, { "epoch": 0.4098504837291117, "grad_norm": 2.084426291121808, "learning_rate": 0.0002048651817116061, "loss": 4.014046669006348, "step": 699, "token_acc": 0.219447500207435 }, { "epoch": 0.4104368220463207, "grad_norm": 2.0836725884319014, "learning_rate": 0.00020515826494724501, "loss": 4.119354248046875, "step": 700, "token_acc": 0.20690766883869174 }, { "epoch": 0.41102316036352976, "grad_norm": 2.361919740078177, "learning_rate": 0.00020545134818288394, "loss": 4.129726409912109, "step": 701, "token_acc": 0.20816018930121397 }, { "epoch": 0.41160949868073876, "grad_norm": 1.7317770889794688, "learning_rate": 0.00020574443141852287, "loss": 4.121204376220703, "step": 702, "token_acc": 0.21044066880173024 }, { "epoch": 0.4121958369979478, "grad_norm": 2.2494175244386403, "learning_rate": 0.0002060375146541618, "loss": 4.0933942794799805, "step": 703, "token_acc": 0.2119768174373189 }, { "epoch": 0.4127821753151568, "grad_norm": 2.1567664120244676, "learning_rate": 0.00020633059788980072, "loss": 4.113862991333008, "step": 704, "token_acc": 0.20816850067688936 }, { "epoch": 0.4133685136323659, "grad_norm": 1.9985908835457784, "learning_rate": 0.00020662368112543965, "loss": 4.14725399017334, "step": 705, "token_acc": 0.20447180140171065 }, { "epoch": 0.41395485194957493, "grad_norm": 2.571306310754256, "learning_rate": 0.00020691676436107855, "loss": 4.0604705810546875, "step": 706, "token_acc": 0.21314476825532225 }, { "epoch": 0.41454119026678393, "grad_norm": 1.7564910695849216, "learning_rate": 0.00020720984759671745, "loss": 4.050766944885254, "step": 707, "token_acc": 0.2154999615512778 }, { "epoch": 0.415127528583993, "grad_norm": 2.731482760416182, "learning_rate": 0.00020750293083235638, "loss": 4.046940326690674, "step": 708, "token_acc": 0.21555105350520207 }, { "epoch": 0.415713866901202, "grad_norm": 1.9118279706971355, "learning_rate": 0.0002077960140679953, "loss": 4.081516265869141, "step": 709, "token_acc": 0.21448180268806766 }, { "epoch": 0.41630020521841105, "grad_norm": 1.8197667369067003, "learning_rate": 0.00020808909730363423, "loss": 4.071291446685791, "step": 710, "token_acc": 0.2139945038297782 }, { "epoch": 0.41688654353562005, "grad_norm": 2.0462060621382965, "learning_rate": 0.00020838218053927316, "loss": 4.06553840637207, "step": 711, "token_acc": 0.21349837574240058 }, { "epoch": 0.4174728818528291, "grad_norm": 2.1720148383857003, "learning_rate": 0.00020867526377491209, "loss": 4.117403984069824, "step": 712, "token_acc": 0.20813482943056724 }, { "epoch": 0.4180592201700381, "grad_norm": 2.1483037378863634, "learning_rate": 0.000208968347010551, "loss": 4.112849712371826, "step": 713, "token_acc": 0.21048822886268712 }, { "epoch": 0.41864555848724716, "grad_norm": 1.8091475844278968, "learning_rate": 0.00020926143024618994, "loss": 4.067354202270508, "step": 714, "token_acc": 0.21204839866497155 }, { "epoch": 0.41923189680445616, "grad_norm": 1.9319472139802623, "learning_rate": 0.00020955451348182887, "loss": 4.059874534606934, "step": 715, "token_acc": 0.21449142188318154 }, { "epoch": 0.4198182351216652, "grad_norm": 2.4975280097721315, "learning_rate": 0.00020984759671746774, "loss": 4.029237270355225, "step": 716, "token_acc": 0.21732570020118605 }, { "epoch": 0.4204045734388742, "grad_norm": 1.9164801344004896, "learning_rate": 0.00021014067995310667, "loss": 4.080502510070801, "step": 717, "token_acc": 0.21237672406253696 }, { "epoch": 0.4209909117560833, "grad_norm": 2.1400704047185934, "learning_rate": 0.0002104337631887456, "loss": 4.098213195800781, "step": 718, "token_acc": 0.2090646176153848 }, { "epoch": 0.4215772500732923, "grad_norm": 2.1614696940446176, "learning_rate": 0.00021072684642438452, "loss": 4.03939151763916, "step": 719, "token_acc": 0.2161053818673158 }, { "epoch": 0.42216358839050133, "grad_norm": 1.8201702413374024, "learning_rate": 0.00021101992966002345, "loss": 4.116702556610107, "step": 720, "token_acc": 0.20861004726205998 }, { "epoch": 0.42274992670771033, "grad_norm": 2.133483432451525, "learning_rate": 0.00021131301289566238, "loss": 3.985278606414795, "step": 721, "token_acc": 0.22097369563868727 }, { "epoch": 0.4233362650249194, "grad_norm": 1.9164653947577601, "learning_rate": 0.0002116060961313013, "loss": 4.088165283203125, "step": 722, "token_acc": 0.20935368847412422 }, { "epoch": 0.4239226033421284, "grad_norm": 2.4035328307719053, "learning_rate": 0.00021189917936694023, "loss": 4.082188129425049, "step": 723, "token_acc": 0.2121592905888603 }, { "epoch": 0.42450894165933745, "grad_norm": 1.7137457048751554, "learning_rate": 0.00021219226260257916, "loss": 3.997396230697632, "step": 724, "token_acc": 0.21972841037383326 }, { "epoch": 0.42509527997654645, "grad_norm": 2.2470061122133567, "learning_rate": 0.00021248534583821806, "loss": 4.1175689697265625, "step": 725, "token_acc": 0.2067081742268474 }, { "epoch": 0.4256816182937555, "grad_norm": 2.3229221372335567, "learning_rate": 0.00021277842907385696, "loss": 4.1208415031433105, "step": 726, "token_acc": 0.20549858835910434 }, { "epoch": 0.4262679566109645, "grad_norm": 1.8310992649134992, "learning_rate": 0.00021307151230949589, "loss": 4.040855884552002, "step": 727, "token_acc": 0.21452379133083374 }, { "epoch": 0.42685429492817356, "grad_norm": 1.7348197747209888, "learning_rate": 0.0002133645955451348, "loss": 4.058550834655762, "step": 728, "token_acc": 0.21420323472808148 }, { "epoch": 0.42744063324538256, "grad_norm": 2.0396949777502584, "learning_rate": 0.00021365767878077374, "loss": 4.041525363922119, "step": 729, "token_acc": 0.21365123140071832 }, { "epoch": 0.4280269715625916, "grad_norm": 1.9526226295230678, "learning_rate": 0.00021395076201641267, "loss": 4.05921745300293, "step": 730, "token_acc": 0.21601703895110597 }, { "epoch": 0.4286133098798006, "grad_norm": 1.8738785478939408, "learning_rate": 0.0002142438452520516, "loss": 4.072822570800781, "step": 731, "token_acc": 0.2113496143958869 }, { "epoch": 0.4291996481970097, "grad_norm": 1.6557007612296912, "learning_rate": 0.00021453692848769052, "loss": 4.027461051940918, "step": 732, "token_acc": 0.21646072492207008 }, { "epoch": 0.42978598651421873, "grad_norm": 2.3622858426424944, "learning_rate": 0.00021483001172332945, "loss": 4.029733657836914, "step": 733, "token_acc": 0.2170796281378818 }, { "epoch": 0.43037232483142773, "grad_norm": 1.5324235551213214, "learning_rate": 0.00021512309495896835, "loss": 4.010272979736328, "step": 734, "token_acc": 0.21741305979484613 }, { "epoch": 0.4309586631486368, "grad_norm": 2.3386073862547914, "learning_rate": 0.00021541617819460728, "loss": 4.052709579467773, "step": 735, "token_acc": 0.2140196321216496 }, { "epoch": 0.4315450014658458, "grad_norm": 1.4599089111534616, "learning_rate": 0.0002157092614302462, "loss": 3.989351511001587, "step": 736, "token_acc": 0.2223892787838745 }, { "epoch": 0.43213133978305485, "grad_norm": 1.9077388670200224, "learning_rate": 0.0002160023446658851, "loss": 4.063753128051758, "step": 737, "token_acc": 0.2143941338770098 }, { "epoch": 0.43271767810026385, "grad_norm": 1.6787276252209509, "learning_rate": 0.00021629542790152403, "loss": 4.056057453155518, "step": 738, "token_acc": 0.2125789944081055 }, { "epoch": 0.4333040164174729, "grad_norm": 2.19216933069188, "learning_rate": 0.00021658851113716296, "loss": 4.056268692016602, "step": 739, "token_acc": 0.21214584271107967 }, { "epoch": 0.4338903547346819, "grad_norm": 1.4126912322007483, "learning_rate": 0.00021688159437280188, "loss": 4.005537033081055, "step": 740, "token_acc": 0.21921170577379384 }, { "epoch": 0.43447669305189096, "grad_norm": 2.5008048589762844, "learning_rate": 0.0002171746776084408, "loss": 4.111293792724609, "step": 741, "token_acc": 0.20807779685783462 }, { "epoch": 0.43506303136909996, "grad_norm": 1.679513752408952, "learning_rate": 0.0002174677608440797, "loss": 4.003476142883301, "step": 742, "token_acc": 0.21912503568009417 }, { "epoch": 0.435649369686309, "grad_norm": 1.9614447718753012, "learning_rate": 0.00021776084407971864, "loss": 4.041287422180176, "step": 743, "token_acc": 0.21623992270693307 }, { "epoch": 0.436235708003518, "grad_norm": 1.704727283799899, "learning_rate": 0.00021805392731535757, "loss": 4.006258964538574, "step": 744, "token_acc": 0.2168786741505862 }, { "epoch": 0.4368220463207271, "grad_norm": 1.777120123403354, "learning_rate": 0.0002183470105509965, "loss": 4.042520523071289, "step": 745, "token_acc": 0.21516022325248732 }, { "epoch": 0.4374083846379361, "grad_norm": 1.7565510746502324, "learning_rate": 0.00021864009378663542, "loss": 4.07253360748291, "step": 746, "token_acc": 0.21072772067738174 }, { "epoch": 0.43799472295514513, "grad_norm": 1.900008782040298, "learning_rate": 0.00021893317702227432, "loss": 4.014519691467285, "step": 747, "token_acc": 0.21650056101907464 }, { "epoch": 0.43858106127235413, "grad_norm": 1.6277034347422095, "learning_rate": 0.00021922626025791325, "loss": 4.0982561111450195, "step": 748, "token_acc": 0.2068687987495277 }, { "epoch": 0.4391673995895632, "grad_norm": 1.774838004056762, "learning_rate": 0.00021951934349355217, "loss": 4.032931327819824, "step": 749, "token_acc": 0.2158822078242466 }, { "epoch": 0.4397537379067722, "grad_norm": 2.3355340328229817, "learning_rate": 0.0002198124267291911, "loss": 4.0619354248046875, "step": 750, "token_acc": 0.21238170097382386 }, { "epoch": 0.44034007622398125, "grad_norm": 1.7957200090688348, "learning_rate": 0.00022010550996483, "loss": 4.074429035186768, "step": 751, "token_acc": 0.20913684027135032 }, { "epoch": 0.44092641454119025, "grad_norm": 1.8291746297036837, "learning_rate": 0.00022039859320046893, "loss": 4.036952972412109, "step": 752, "token_acc": 0.21266663261657898 }, { "epoch": 0.4415127528583993, "grad_norm": 1.7351075902549933, "learning_rate": 0.00022069167643610786, "loss": 4.024649620056152, "step": 753, "token_acc": 0.2182853040713773 }, { "epoch": 0.4420990911756083, "grad_norm": 1.68521167471349, "learning_rate": 0.00022098475967174678, "loss": 3.986966133117676, "step": 754, "token_acc": 0.2194137588479565 }, { "epoch": 0.44268542949281736, "grad_norm": 1.7226967559828836, "learning_rate": 0.0002212778429073857, "loss": 4.0455827713012695, "step": 755, "token_acc": 0.21069507928671158 }, { "epoch": 0.44327176781002636, "grad_norm": 1.717343244747412, "learning_rate": 0.00022157092614302464, "loss": 4.025557518005371, "step": 756, "token_acc": 0.21723849839890502 }, { "epoch": 0.4438581061272354, "grad_norm": 2.006092086695301, "learning_rate": 0.00022186400937866354, "loss": 4.047937870025635, "step": 757, "token_acc": 0.21306945846076414 }, { "epoch": 0.4444444444444444, "grad_norm": 1.4330332716195198, "learning_rate": 0.00022215709261430247, "loss": 4.046126365661621, "step": 758, "token_acc": 0.211544689119171 }, { "epoch": 0.4450307827616535, "grad_norm": 2.4784736645670744, "learning_rate": 0.0002224501758499414, "loss": 4.0084381103515625, "step": 759, "token_acc": 0.21681487845085792 }, { "epoch": 0.4456171210788625, "grad_norm": 1.5606073263465277, "learning_rate": 0.0002227432590855803, "loss": 4.0421671867370605, "step": 760, "token_acc": 0.21304208308964717 }, { "epoch": 0.44620345939607153, "grad_norm": 1.9342978153963148, "learning_rate": 0.00022303634232121922, "loss": 4.01188850402832, "step": 761, "token_acc": 0.21594870238676395 }, { "epoch": 0.4467897977132806, "grad_norm": 1.8247554518599955, "learning_rate": 0.00022332942555685815, "loss": 4.003640651702881, "step": 762, "token_acc": 0.21659994232522856 }, { "epoch": 0.4473761360304896, "grad_norm": 1.4198825641000967, "learning_rate": 0.00022362250879249707, "loss": 3.9982781410217285, "step": 763, "token_acc": 0.21839969279488272 }, { "epoch": 0.44796247434769865, "grad_norm": 2.2293971828528427, "learning_rate": 0.000223915592028136, "loss": 3.9771862030029297, "step": 764, "token_acc": 0.22031586030372197 }, { "epoch": 0.44854881266490765, "grad_norm": 1.3034303537416057, "learning_rate": 0.00022420867526377493, "loss": 4.025883197784424, "step": 765, "token_acc": 0.21496806316009734 }, { "epoch": 0.4491351509821167, "grad_norm": 2.2050447448546726, "learning_rate": 0.00022450175849941386, "loss": 3.986133098602295, "step": 766, "token_acc": 0.22015319052101326 }, { "epoch": 0.4497214892993257, "grad_norm": 1.3875255518866465, "learning_rate": 0.00022479484173505276, "loss": 4.023779392242432, "step": 767, "token_acc": 0.2153660157407185 }, { "epoch": 0.45030782761653476, "grad_norm": 1.831923129459966, "learning_rate": 0.00022508792497069168, "loss": 4.017499923706055, "step": 768, "token_acc": 0.21423496601380657 }, { "epoch": 0.45089416593374376, "grad_norm": 1.5963945653154585, "learning_rate": 0.00022538100820633058, "loss": 4.066218376159668, "step": 769, "token_acc": 0.20941322234455975 }, { "epoch": 0.4514805042509528, "grad_norm": 1.9272891640295549, "learning_rate": 0.0002256740914419695, "loss": 3.9980664253234863, "step": 770, "token_acc": 0.2180391748009331 }, { "epoch": 0.4520668425681618, "grad_norm": 1.612232335848192, "learning_rate": 0.00022596717467760844, "loss": 3.987384557723999, "step": 771, "token_acc": 0.21802794300306277 }, { "epoch": 0.4526531808853709, "grad_norm": 1.5710388494163687, "learning_rate": 0.00022626025791324736, "loss": 3.9475903511047363, "step": 772, "token_acc": 0.22294018774578206 }, { "epoch": 0.4532395192025799, "grad_norm": 1.9439372962106505, "learning_rate": 0.0002265533411488863, "loss": 4.011725902557373, "step": 773, "token_acc": 0.2174230031999703 }, { "epoch": 0.45382585751978893, "grad_norm": 1.490390064617704, "learning_rate": 0.00022684642438452522, "loss": 4.037602424621582, "step": 774, "token_acc": 0.21332604707443695 }, { "epoch": 0.45441219583699793, "grad_norm": 1.5222723737978927, "learning_rate": 0.00022713950762016415, "loss": 4.036128044128418, "step": 775, "token_acc": 0.21326276709190725 }, { "epoch": 0.454998534154207, "grad_norm": 1.901559354248794, "learning_rate": 0.00022743259085580307, "loss": 3.994436502456665, "step": 776, "token_acc": 0.21889277748371924 }, { "epoch": 0.455584872471416, "grad_norm": 1.599926779973861, "learning_rate": 0.00022772567409144197, "loss": 3.9802608489990234, "step": 777, "token_acc": 0.21836872811511085 }, { "epoch": 0.45617121078862505, "grad_norm": 1.7743235548118104, "learning_rate": 0.00022801875732708087, "loss": 3.98239803314209, "step": 778, "token_acc": 0.21905449393850912 }, { "epoch": 0.45675754910583405, "grad_norm": 1.8566778758906217, "learning_rate": 0.0002283118405627198, "loss": 3.983452558517456, "step": 779, "token_acc": 0.21680983706370532 }, { "epoch": 0.4573438874230431, "grad_norm": 1.6718892396047285, "learning_rate": 0.00022860492379835873, "loss": 3.9802134037017822, "step": 780, "token_acc": 0.2166123987447817 }, { "epoch": 0.4579302257402521, "grad_norm": 1.76356672238489, "learning_rate": 0.00022889800703399765, "loss": 3.9591665267944336, "step": 781, "token_acc": 0.22130066367825002 }, { "epoch": 0.45851656405746116, "grad_norm": 1.5392791281243248, "learning_rate": 0.00022919109026963658, "loss": 3.9620537757873535, "step": 782, "token_acc": 0.21998108722876786 }, { "epoch": 0.45910290237467016, "grad_norm": 2.0898358779682837, "learning_rate": 0.0002294841735052755, "loss": 3.993950128555298, "step": 783, "token_acc": 0.21714388040206564 }, { "epoch": 0.4596892406918792, "grad_norm": 1.5344549833791856, "learning_rate": 0.00022977725674091444, "loss": 3.9980597496032715, "step": 784, "token_acc": 0.21616525084778657 }, { "epoch": 0.4602755790090882, "grad_norm": 2.2583607519702027, "learning_rate": 0.00023007033997655336, "loss": 3.998781204223633, "step": 785, "token_acc": 0.217110075847996 }, { "epoch": 0.4608619173262973, "grad_norm": 1.9138605630757042, "learning_rate": 0.0002303634232121923, "loss": 3.9461002349853516, "step": 786, "token_acc": 0.22020585189694916 }, { "epoch": 0.4614482556435063, "grad_norm": 1.8192137921741556, "learning_rate": 0.00023065650644783116, "loss": 4.0256547927856445, "step": 787, "token_acc": 0.21172509765751066 }, { "epoch": 0.46203459396071533, "grad_norm": 1.7106583109607434, "learning_rate": 0.0002309495896834701, "loss": 3.969985008239746, "step": 788, "token_acc": 0.21994648976431475 }, { "epoch": 0.46262093227792433, "grad_norm": 1.6847870238014038, "learning_rate": 0.00023124267291910902, "loss": 3.9602386951446533, "step": 789, "token_acc": 0.219969565847498 }, { "epoch": 0.4632072705951334, "grad_norm": 1.902475063581863, "learning_rate": 0.00023153575615474795, "loss": 3.966522216796875, "step": 790, "token_acc": 0.21975405667791595 }, { "epoch": 0.46379360891234245, "grad_norm": 1.5687939762527154, "learning_rate": 0.00023182883939038687, "loss": 3.9939188957214355, "step": 791, "token_acc": 0.21603237381806753 }, { "epoch": 0.46437994722955145, "grad_norm": 1.7354660867276823, "learning_rate": 0.0002321219226260258, "loss": 3.9329535961151123, "step": 792, "token_acc": 0.2234515050167224 }, { "epoch": 0.4649662855467605, "grad_norm": 1.7651701514741607, "learning_rate": 0.00023241500586166473, "loss": 3.980254650115967, "step": 793, "token_acc": 0.21843814153295277 }, { "epoch": 0.4655526238639695, "grad_norm": 1.6692683821353422, "learning_rate": 0.00023270808909730365, "loss": 3.9503610134124756, "step": 794, "token_acc": 0.2194911304946864 }, { "epoch": 0.46613896218117856, "grad_norm": 2.1222261682151147, "learning_rate": 0.00023300117233294258, "loss": 3.946131706237793, "step": 795, "token_acc": 0.22192083007888044 }, { "epoch": 0.46672530049838756, "grad_norm": 1.3760324566699202, "learning_rate": 0.00023329425556858148, "loss": 3.954542875289917, "step": 796, "token_acc": 0.22213777783986924 }, { "epoch": 0.4673116388155966, "grad_norm": 1.9735904335451047, "learning_rate": 0.0002335873388042204, "loss": 3.9499893188476562, "step": 797, "token_acc": 0.22057887172770108 }, { "epoch": 0.4678979771328056, "grad_norm": 1.7351168327461521, "learning_rate": 0.0002338804220398593, "loss": 3.87284517288208, "step": 798, "token_acc": 0.22793353027300806 }, { "epoch": 0.4684843154500147, "grad_norm": 1.7946512194609563, "learning_rate": 0.00023417350527549824, "loss": 3.956754684448242, "step": 799, "token_acc": 0.21745916082619007 }, { "epoch": 0.4690706537672237, "grad_norm": 1.6625602665169559, "learning_rate": 0.00023446658851113716, "loss": 3.9647860527038574, "step": 800, "token_acc": 0.21907062507725905 }, { "epoch": 0.46965699208443273, "grad_norm": 1.551781501356406, "learning_rate": 0.0002347596717467761, "loss": 3.965974807739258, "step": 801, "token_acc": 0.21881128516581275 }, { "epoch": 0.47024333040164173, "grad_norm": 1.5915130492622156, "learning_rate": 0.00023505275498241502, "loss": 3.9539031982421875, "step": 802, "token_acc": 0.2180815172321433 }, { "epoch": 0.4708296687188508, "grad_norm": 2.0533221002606616, "learning_rate": 0.00023534583821805394, "loss": 3.974269151687622, "step": 803, "token_acc": 0.2189483978327563 }, { "epoch": 0.4714160070360598, "grad_norm": 1.4006372625876125, "learning_rate": 0.00023563892145369287, "loss": 3.9977452754974365, "step": 804, "token_acc": 0.21441884660729177 }, { "epoch": 0.47200234535326885, "grad_norm": 1.8883413465404273, "learning_rate": 0.00023593200468933177, "loss": 3.9808130264282227, "step": 805, "token_acc": 0.21612854984348975 }, { "epoch": 0.47258868367047785, "grad_norm": 1.2265562261984315, "learning_rate": 0.0002362250879249707, "loss": 3.887144088745117, "step": 806, "token_acc": 0.22653741712375927 }, { "epoch": 0.4731750219876869, "grad_norm": 2.158122965318434, "learning_rate": 0.00023651817116060963, "loss": 3.995875597000122, "step": 807, "token_acc": 0.21453683908816715 }, { "epoch": 0.4737613603048959, "grad_norm": 1.47491822181735, "learning_rate": 0.00023681125439624853, "loss": 3.9565086364746094, "step": 808, "token_acc": 0.21757540663226793 }, { "epoch": 0.47434769862210496, "grad_norm": 1.8730675623460245, "learning_rate": 0.00023710433763188745, "loss": 3.9426002502441406, "step": 809, "token_acc": 0.22001132613018065 }, { "epoch": 0.47493403693931396, "grad_norm": 1.3421298458653779, "learning_rate": 0.00023739742086752638, "loss": 3.9120216369628906, "step": 810, "token_acc": 0.22239072462580084 }, { "epoch": 0.475520375256523, "grad_norm": 1.8665485111656386, "learning_rate": 0.0002376905041031653, "loss": 3.9659433364868164, "step": 811, "token_acc": 0.21813315041919312 }, { "epoch": 0.476106713573732, "grad_norm": 1.6895152632784898, "learning_rate": 0.00023798358733880423, "loss": 3.9220194816589355, "step": 812, "token_acc": 0.22356798395696392 }, { "epoch": 0.4766930518909411, "grad_norm": 1.4356603748541694, "learning_rate": 0.00023827667057444316, "loss": 3.895996332168579, "step": 813, "token_acc": 0.22298496449163932 }, { "epoch": 0.4772793902081501, "grad_norm": 1.416091288295409, "learning_rate": 0.00023856975381008206, "loss": 3.922710418701172, "step": 814, "token_acc": 0.22113981734748728 }, { "epoch": 0.47786572852535913, "grad_norm": 1.5985097604893062, "learning_rate": 0.000238862837045721, "loss": 3.9446113109588623, "step": 815, "token_acc": 0.21899810989139165 }, { "epoch": 0.47845206684256814, "grad_norm": 1.3560245718360042, "learning_rate": 0.00023915592028135992, "loss": 3.944176197052002, "step": 816, "token_acc": 0.21776866388977892 }, { "epoch": 0.4790384051597772, "grad_norm": 1.9297407873813672, "learning_rate": 0.00023944900351699884, "loss": 3.918590545654297, "step": 817, "token_acc": 0.22297865869113398 }, { "epoch": 0.47962474347698625, "grad_norm": 1.8250733812697824, "learning_rate": 0.00023974208675263774, "loss": 3.912874937057495, "step": 818, "token_acc": 0.22163575675182112 }, { "epoch": 0.48021108179419525, "grad_norm": 1.445252455662415, "learning_rate": 0.00024003516998827667, "loss": 3.8873467445373535, "step": 819, "token_acc": 0.22527255594569412 }, { "epoch": 0.4807974201114043, "grad_norm": 1.7365774074738167, "learning_rate": 0.0002403282532239156, "loss": 3.89288592338562, "step": 820, "token_acc": 0.22349862639470816 }, { "epoch": 0.4813837584286133, "grad_norm": 1.4258480441439452, "learning_rate": 0.00024062133645955453, "loss": 3.9203014373779297, "step": 821, "token_acc": 0.22084931774179467 }, { "epoch": 0.48197009674582236, "grad_norm": 2.0842359087282265, "learning_rate": 0.00024091441969519343, "loss": 3.91632342338562, "step": 822, "token_acc": 0.22314360307824171 }, { "epoch": 0.48255643506303136, "grad_norm": 1.3275376084312145, "learning_rate": 0.00024120750293083235, "loss": 3.857473134994507, "step": 823, "token_acc": 0.22700043894755867 }, { "epoch": 0.4831427733802404, "grad_norm": 1.5158122755988426, "learning_rate": 0.00024150058616647128, "loss": 3.886094570159912, "step": 824, "token_acc": 0.22330989837846926 }, { "epoch": 0.4837291116974494, "grad_norm": 1.7691343421846069, "learning_rate": 0.0002417936694021102, "loss": 3.9427223205566406, "step": 825, "token_acc": 0.21862984653970963 }, { "epoch": 0.4843154500146585, "grad_norm": 1.9605147878845215, "learning_rate": 0.00024208675263774913, "loss": 3.846086025238037, "step": 826, "token_acc": 0.23053430145353646 }, { "epoch": 0.4849017883318675, "grad_norm": 1.596179867223669, "learning_rate": 0.00024237983587338806, "loss": 3.8211421966552734, "step": 827, "token_acc": 0.23085348986982274 }, { "epoch": 0.48548812664907653, "grad_norm": 1.4227708355316044, "learning_rate": 0.00024267291910902696, "loss": 3.9240598678588867, "step": 828, "token_acc": 0.21963006295038515 }, { "epoch": 0.48607446496628554, "grad_norm": 2.304916818681686, "learning_rate": 0.0002429660023446659, "loss": 3.882449150085449, "step": 829, "token_acc": 0.22366936808673749 }, { "epoch": 0.4866608032834946, "grad_norm": 1.508748373161343, "learning_rate": 0.00024325908558030482, "loss": 3.860668182373047, "step": 830, "token_acc": 0.22611897418455504 }, { "epoch": 0.4872471416007036, "grad_norm": 1.9435105690103587, "learning_rate": 0.00024355216881594372, "loss": 3.8318357467651367, "step": 831, "token_acc": 0.22927656509113284 }, { "epoch": 0.48783347991791265, "grad_norm": 1.3717838639501128, "learning_rate": 0.00024384525205158264, "loss": 3.9031076431274414, "step": 832, "token_acc": 0.2213116243571805 }, { "epoch": 0.48841981823512165, "grad_norm": 2.3623056127812823, "learning_rate": 0.00024413833528722157, "loss": 3.8353021144866943, "step": 833, "token_acc": 0.22938385142725162 }, { "epoch": 0.4890061565523307, "grad_norm": 1.2101893519203448, "learning_rate": 0.0002444314185228605, "loss": 3.825613498687744, "step": 834, "token_acc": 0.22759554228708115 }, { "epoch": 0.4895924948695397, "grad_norm": 1.7127815553304444, "learning_rate": 0.0002447245017584994, "loss": 3.840912342071533, "step": 835, "token_acc": 0.22741469209953138 }, { "epoch": 0.49017883318674876, "grad_norm": 1.7684884931962814, "learning_rate": 0.00024501758499413835, "loss": 3.800657033920288, "step": 836, "token_acc": 0.2280093758783257 }, { "epoch": 0.49076517150395776, "grad_norm": 1.601524331679, "learning_rate": 0.0002453106682297773, "loss": 3.8888421058654785, "step": 837, "token_acc": 0.22171272451092972 }, { "epoch": 0.4913515098211668, "grad_norm": 1.7350966009248292, "learning_rate": 0.0002456037514654162, "loss": 3.8140242099761963, "step": 838, "token_acc": 0.2305299539170507 }, { "epoch": 0.4919378481383758, "grad_norm": 1.53146327976739, "learning_rate": 0.00024589683470105513, "loss": 3.8475823402404785, "step": 839, "token_acc": 0.22582238700472188 }, { "epoch": 0.4925241864555849, "grad_norm": 1.5708135632039935, "learning_rate": 0.000246189917936694, "loss": 3.800248861312866, "step": 840, "token_acc": 0.23169208328811924 }, { "epoch": 0.4931105247727939, "grad_norm": 1.5902371303967509, "learning_rate": 0.00024648300117233293, "loss": 3.8462116718292236, "step": 841, "token_acc": 0.22476964881434236 }, { "epoch": 0.49369686309000294, "grad_norm": 1.8709222520264444, "learning_rate": 0.00024677608440797186, "loss": 3.811887741088867, "step": 842, "token_acc": 0.22873590316872675 }, { "epoch": 0.49428320140721194, "grad_norm": 1.9788407246933075, "learning_rate": 0.0002470691676436108, "loss": 3.852499485015869, "step": 843, "token_acc": 0.2244373753807716 }, { "epoch": 0.494869539724421, "grad_norm": 1.347167371088646, "learning_rate": 0.0002473622508792497, "loss": 3.7867794036865234, "step": 844, "token_acc": 0.2342614632924079 }, { "epoch": 0.49545587804163, "grad_norm": 1.8687765602264133, "learning_rate": 0.00024765533411488864, "loss": 3.760441303253174, "step": 845, "token_acc": 0.23309817149673995 }, { "epoch": 0.49604221635883905, "grad_norm": 1.6938706310063778, "learning_rate": 0.00024794841735052757, "loss": 3.78448224067688, "step": 846, "token_acc": 0.22955764840907303 }, { "epoch": 0.4966285546760481, "grad_norm": 1.8302013611347514, "learning_rate": 0.0002482415005861665, "loss": 3.832221269607544, "step": 847, "token_acc": 0.2283948759617603 }, { "epoch": 0.4972148929932571, "grad_norm": 1.7567761809256568, "learning_rate": 0.0002485345838218054, "loss": 3.822584629058838, "step": 848, "token_acc": 0.22859576240415216 }, { "epoch": 0.49780123131046616, "grad_norm": 1.55317348636794, "learning_rate": 0.0002488276670574443, "loss": 3.8465898036956787, "step": 849, "token_acc": 0.22345761820214374 }, { "epoch": 0.49838756962767516, "grad_norm": 1.876278955854456, "learning_rate": 0.0002491207502930832, "loss": 3.8260867595672607, "step": 850, "token_acc": 0.22560403620414268 }, { "epoch": 0.4989739079448842, "grad_norm": 1.703511239070497, "learning_rate": 0.00024941383352872215, "loss": 3.7570641040802, "step": 851, "token_acc": 0.23538907273974283 }, { "epoch": 0.4995602462620932, "grad_norm": 1.744215941748723, "learning_rate": 0.0002497069167643611, "loss": 3.81729793548584, "step": 852, "token_acc": 0.2290196278488439 }, { "epoch": 0.5001465845793023, "grad_norm": 1.3282473726456636, "learning_rate": 0.00025, "loss": 3.78011417388916, "step": 853, "token_acc": 0.23217940344001742 }, { "epoch": 0.5007329228965113, "grad_norm": 2.0818591381285407, "learning_rate": 0.00025029308323563893, "loss": 3.7965335845947266, "step": 854, "token_acc": 0.22880796281741916 }, { "epoch": 0.5013192612137203, "grad_norm": 1.3255993760528444, "learning_rate": 0.00025058616647127786, "loss": 3.796429395675659, "step": 855, "token_acc": 0.2300318642577323 }, { "epoch": 0.5019055995309294, "grad_norm": 1.9816862652096225, "learning_rate": 0.0002508792497069168, "loss": 3.7942514419555664, "step": 856, "token_acc": 0.2281402858147639 }, { "epoch": 0.5024919378481384, "grad_norm": 1.483853262794814, "learning_rate": 0.0002511723329425557, "loss": 3.7643816471099854, "step": 857, "token_acc": 0.23288964975361512 }, { "epoch": 0.5030782761653474, "grad_norm": 1.4459776318361894, "learning_rate": 0.00025146541617819464, "loss": 3.771902561187744, "step": 858, "token_acc": 0.2300484127587409 }, { "epoch": 0.5036646144825564, "grad_norm": 1.9185294373549824, "learning_rate": 0.00025175849941383357, "loss": 3.8050169944763184, "step": 859, "token_acc": 0.22899281416895148 }, { "epoch": 0.5042509527997655, "grad_norm": 1.6138231773015839, "learning_rate": 0.0002520515826494725, "loss": 3.7790579795837402, "step": 860, "token_acc": 0.2292011251461742 }, { "epoch": 0.5048372911169745, "grad_norm": 1.674408509483965, "learning_rate": 0.0002523446658851114, "loss": 3.7430124282836914, "step": 861, "token_acc": 0.23371507881739023 }, { "epoch": 0.5054236294341835, "grad_norm": 1.4529751246816234, "learning_rate": 0.0002526377491207503, "loss": 3.7706427574157715, "step": 862, "token_acc": 0.2313683163497455 }, { "epoch": 0.5060099677513925, "grad_norm": 1.581861246698084, "learning_rate": 0.00025293083235638917, "loss": 3.7948055267333984, "step": 863, "token_acc": 0.22711628395775313 }, { "epoch": 0.5065963060686016, "grad_norm": 1.555470453769317, "learning_rate": 0.0002532239155920281, "loss": 3.814864158630371, "step": 864, "token_acc": 0.22548447593248594 }, { "epoch": 0.5071826443858106, "grad_norm": 1.452257538138237, "learning_rate": 0.000253516998827667, "loss": 3.745765209197998, "step": 865, "token_acc": 0.2342552713981896 }, { "epoch": 0.5077689827030196, "grad_norm": 1.4935048903499673, "learning_rate": 0.00025381008206330595, "loss": 3.7806010246276855, "step": 866, "token_acc": 0.23029535710455343 }, { "epoch": 0.5083553210202286, "grad_norm": 1.6269784268180973, "learning_rate": 0.0002541031652989449, "loss": 3.7190489768981934, "step": 867, "token_acc": 0.23618227994683721 }, { "epoch": 0.5089416593374377, "grad_norm": 1.7379111019279003, "learning_rate": 0.0002543962485345838, "loss": 3.6765201091766357, "step": 868, "token_acc": 0.24139835188174327 }, { "epoch": 0.5095279976546467, "grad_norm": 1.6690603724861948, "learning_rate": 0.00025468933177022273, "loss": 3.788986921310425, "step": 869, "token_acc": 0.2293322703228632 }, { "epoch": 0.5101143359718557, "grad_norm": 1.4586683935680724, "learning_rate": 0.00025498241500586166, "loss": 3.7371885776519775, "step": 870, "token_acc": 0.2350118615310204 }, { "epoch": 0.5107006742890647, "grad_norm": 1.703814884868689, "learning_rate": 0.0002552754982415006, "loss": 3.7847187519073486, "step": 871, "token_acc": 0.22848820905709494 }, { "epoch": 0.5112870126062738, "grad_norm": 1.5102894058540888, "learning_rate": 0.0002555685814771395, "loss": 3.671969413757324, "step": 872, "token_acc": 0.24356745967937476 }, { "epoch": 0.5118733509234829, "grad_norm": 1.4421153486857088, "learning_rate": 0.00025586166471277844, "loss": 3.783766269683838, "step": 873, "token_acc": 0.228521708494698 }, { "epoch": 0.5124596892406919, "grad_norm": 1.7147928289594974, "learning_rate": 0.00025615474794841737, "loss": 3.7101402282714844, "step": 874, "token_acc": 0.23715260361820079 }, { "epoch": 0.513046027557901, "grad_norm": 1.6473349422670045, "learning_rate": 0.0002564478311840563, "loss": 3.6850099563598633, "step": 875, "token_acc": 0.23835259512577417 }, { "epoch": 0.51363236587511, "grad_norm": 1.435763796706261, "learning_rate": 0.0002567409144196952, "loss": 3.693279266357422, "step": 876, "token_acc": 0.2382124459953473 }, { "epoch": 0.514218704192319, "grad_norm": 1.2554811401319679, "learning_rate": 0.00025703399765533415, "loss": 3.714064598083496, "step": 877, "token_acc": 0.23659434687011577 }, { "epoch": 0.514805042509528, "grad_norm": 1.846739856148871, "learning_rate": 0.0002573270808909731, "loss": 3.7165207862854004, "step": 878, "token_acc": 0.23597578492118504 }, { "epoch": 0.5153913808267371, "grad_norm": 1.7343840701434636, "learning_rate": 0.000257620164126612, "loss": 3.7642359733581543, "step": 879, "token_acc": 0.23114824560483913 }, { "epoch": 0.5159777191439461, "grad_norm": 1.5355658305727153, "learning_rate": 0.0002579132473622509, "loss": 3.677816867828369, "step": 880, "token_acc": 0.2423380585903915 }, { "epoch": 0.5165640574611551, "grad_norm": 1.6734766948831334, "learning_rate": 0.0002582063305978898, "loss": 3.7375717163085938, "step": 881, "token_acc": 0.23248602364693166 }, { "epoch": 0.5171503957783641, "grad_norm": 1.3877352433001913, "learning_rate": 0.00025849941383352873, "loss": 3.6727499961853027, "step": 882, "token_acc": 0.2387079859200266 }, { "epoch": 0.5177367340955732, "grad_norm": 1.7168686981707963, "learning_rate": 0.00025879249706916766, "loss": 3.7357640266418457, "step": 883, "token_acc": 0.23206524505063872 }, { "epoch": 0.5183230724127822, "grad_norm": 1.3815512820815554, "learning_rate": 0.00025908558030480653, "loss": 3.677196502685547, "step": 884, "token_acc": 0.2419070071900044 }, { "epoch": 0.5189094107299912, "grad_norm": 1.8921751982261423, "learning_rate": 0.00025937866354044546, "loss": 3.728128433227539, "step": 885, "token_acc": 0.23318721618903462 }, { "epoch": 0.5194957490472002, "grad_norm": 1.0594416564256517, "learning_rate": 0.0002596717467760844, "loss": 3.7119522094726562, "step": 886, "token_acc": 0.23479180756245246 }, { "epoch": 0.5200820873644093, "grad_norm": 1.9636985293337739, "learning_rate": 0.0002599648300117233, "loss": 3.738323450088501, "step": 887, "token_acc": 0.2331964429729615 }, { "epoch": 0.5206684256816183, "grad_norm": 1.13755919877721, "learning_rate": 0.00026025791324736224, "loss": 3.650294303894043, "step": 888, "token_acc": 0.2440574718483676 }, { "epoch": 0.5212547639988273, "grad_norm": 1.7955209398286704, "learning_rate": 0.00026055099648300117, "loss": 3.6420676708221436, "step": 889, "token_acc": 0.24424107048336896 }, { "epoch": 0.5218411023160363, "grad_norm": 1.649808238434703, "learning_rate": 0.0002608440797186401, "loss": 3.843682289123535, "step": 890, "token_acc": 0.2229359111333766 }, { "epoch": 0.5224274406332454, "grad_norm": 1.7562781272965513, "learning_rate": 0.000261137162954279, "loss": 3.748120069503784, "step": 891, "token_acc": 0.23115108186285035 }, { "epoch": 0.5230137789504544, "grad_norm": 1.315032986081824, "learning_rate": 0.00026143024618991795, "loss": 3.6962013244628906, "step": 892, "token_acc": 0.23633885874278351 }, { "epoch": 0.5236001172676634, "grad_norm": 1.4538060588276225, "learning_rate": 0.0002617233294255569, "loss": 3.750308036804199, "step": 893, "token_acc": 0.23088465815458406 }, { "epoch": 0.5241864555848724, "grad_norm": 1.4433750899747162, "learning_rate": 0.0002620164126611958, "loss": 3.770920753479004, "step": 894, "token_acc": 0.22776220795825852 }, { "epoch": 0.5247727939020815, "grad_norm": 1.5994260113138028, "learning_rate": 0.00026230949589683473, "loss": 3.672922134399414, "step": 895, "token_acc": 0.23874237670715165 }, { "epoch": 0.5253591322192905, "grad_norm": 1.9710930933372963, "learning_rate": 0.00026260257913247366, "loss": 3.6778249740600586, "step": 896, "token_acc": 0.240377017576823 }, { "epoch": 0.5259454705364995, "grad_norm": 1.447868446174383, "learning_rate": 0.0002628956623681126, "loss": 3.7156810760498047, "step": 897, "token_acc": 0.2359344003489785 }, { "epoch": 0.5265318088537085, "grad_norm": 1.7703622428736265, "learning_rate": 0.00026318874560375146, "loss": 3.7524983882904053, "step": 898, "token_acc": 0.23044549694042538 }, { "epoch": 0.5271181471709177, "grad_norm": 1.2780049250273275, "learning_rate": 0.0002634818288393904, "loss": 3.697524070739746, "step": 899, "token_acc": 0.23730320662988993 }, { "epoch": 0.5277044854881267, "grad_norm": 1.7118367502194758, "learning_rate": 0.0002637749120750293, "loss": 3.673832416534424, "step": 900, "token_acc": 0.24014515073940343 }, { "epoch": 0.5282908238053357, "grad_norm": 1.2900152989045572, "learning_rate": 0.00026406799531066824, "loss": 3.6508431434631348, "step": 901, "token_acc": 0.24179517180949583 }, { "epoch": 0.5288771621225447, "grad_norm": 1.4960768995430467, "learning_rate": 0.00026436107854630717, "loss": 3.70963716506958, "step": 902, "token_acc": 0.2366144450345194 }, { "epoch": 0.5294635004397538, "grad_norm": 1.407016462770081, "learning_rate": 0.0002646541617819461, "loss": 3.7448062896728516, "step": 903, "token_acc": 0.23096928969644098 }, { "epoch": 0.5300498387569628, "grad_norm": 1.312805594654422, "learning_rate": 0.00026494724501758497, "loss": 3.7442402839660645, "step": 904, "token_acc": 0.23011229131929434 }, { "epoch": 0.5306361770741718, "grad_norm": 1.6507352724637836, "learning_rate": 0.0002652403282532239, "loss": 3.7229902744293213, "step": 905, "token_acc": 0.23319511328598064 }, { "epoch": 0.5312225153913809, "grad_norm": 1.5986603900172769, "learning_rate": 0.0002655334114888628, "loss": 3.7134289741516113, "step": 906, "token_acc": 0.23611300525581444 }, { "epoch": 0.5318088537085899, "grad_norm": 1.5587459312244494, "learning_rate": 0.00026582649472450175, "loss": 3.6575586795806885, "step": 907, "token_acc": 0.2424574160640324 }, { "epoch": 0.5323951920257989, "grad_norm": 1.2896327052242644, "learning_rate": 0.0002661195779601407, "loss": 3.682987928390503, "step": 908, "token_acc": 0.23803892891039505 }, { "epoch": 0.5329815303430079, "grad_norm": 1.5910701578817552, "learning_rate": 0.0002664126611957796, "loss": 3.6545238494873047, "step": 909, "token_acc": 0.24120787162125207 }, { "epoch": 0.533567868660217, "grad_norm": 1.322561032431526, "learning_rate": 0.00026670574443141853, "loss": 3.709458351135254, "step": 910, "token_acc": 0.23508372747027995 }, { "epoch": 0.534154206977426, "grad_norm": 1.6789275610902028, "learning_rate": 0.00026699882766705746, "loss": 3.7739665508270264, "step": 911, "token_acc": 0.22829186440329005 }, { "epoch": 0.534740545294635, "grad_norm": 1.4693994047753915, "learning_rate": 0.0002672919109026964, "loss": 3.7147202491760254, "step": 912, "token_acc": 0.23320617369758645 }, { "epoch": 0.535326883611844, "grad_norm": 1.7096437235989137, "learning_rate": 0.0002675849941383353, "loss": 3.700660228729248, "step": 913, "token_acc": 0.23683841041006037 }, { "epoch": 0.5359132219290531, "grad_norm": 1.3740634061333472, "learning_rate": 0.00026787807737397424, "loss": 3.664520263671875, "step": 914, "token_acc": 0.23984174095248884 }, { "epoch": 0.5364995602462621, "grad_norm": 1.5306148097802394, "learning_rate": 0.00026817116060961317, "loss": 3.6581850051879883, "step": 915, "token_acc": 0.2397526761866706 }, { "epoch": 0.5370858985634711, "grad_norm": 1.4313735092079882, "learning_rate": 0.00026846424384525204, "loss": 3.647153615951538, "step": 916, "token_acc": 0.24048748839161907 }, { "epoch": 0.5376722368806801, "grad_norm": 1.7056871215337246, "learning_rate": 0.00026875732708089097, "loss": 3.672374725341797, "step": 917, "token_acc": 0.23911971464420934 }, { "epoch": 0.5382585751978892, "grad_norm": 1.5769228393630546, "learning_rate": 0.0002690504103165299, "loss": 3.637816905975342, "step": 918, "token_acc": 0.24200835525297848 }, { "epoch": 0.5388449135150982, "grad_norm": 1.6065656587295334, "learning_rate": 0.0002693434935521688, "loss": 3.709822177886963, "step": 919, "token_acc": 0.235122625250856 }, { "epoch": 0.5394312518323072, "grad_norm": 1.3937238481058214, "learning_rate": 0.00026963657678780775, "loss": 3.666848659515381, "step": 920, "token_acc": 0.23990388988266684 }, { "epoch": 0.5400175901495162, "grad_norm": 1.5515486711662791, "learning_rate": 0.0002699296600234467, "loss": 3.7059433460235596, "step": 921, "token_acc": 0.23402069638649073 }, { "epoch": 0.5406039284667253, "grad_norm": 1.280275575463355, "learning_rate": 0.0002702227432590856, "loss": 3.6682474613189697, "step": 922, "token_acc": 0.2378662725654388 }, { "epoch": 0.5411902667839343, "grad_norm": 1.7040837876578285, "learning_rate": 0.00027051582649472453, "loss": 3.67686128616333, "step": 923, "token_acc": 0.23788330022863857 }, { "epoch": 0.5417766051011433, "grad_norm": 1.2242242493961457, "learning_rate": 0.00027080890973036346, "loss": 3.7185275554656982, "step": 924, "token_acc": 0.23372108136210035 }, { "epoch": 0.5423629434183523, "grad_norm": 1.5593007597611654, "learning_rate": 0.00027110199296600233, "loss": 3.680393695831299, "step": 925, "token_acc": 0.23830905479225295 }, { "epoch": 0.5429492817355615, "grad_norm": 1.566231047067355, "learning_rate": 0.00027139507620164126, "loss": 3.6662802696228027, "step": 926, "token_acc": 0.23795519659334297 }, { "epoch": 0.5435356200527705, "grad_norm": 1.424326972095286, "learning_rate": 0.0002716881594372802, "loss": 3.6961522102355957, "step": 927, "token_acc": 0.23542440697342099 }, { "epoch": 0.5441219583699795, "grad_norm": 1.75356765742314, "learning_rate": 0.0002719812426729191, "loss": 3.708113670349121, "step": 928, "token_acc": 0.23260580716264792 }, { "epoch": 0.5447082966871885, "grad_norm": 1.2510373264903487, "learning_rate": 0.00027227432590855804, "loss": 3.6477930545806885, "step": 929, "token_acc": 0.24326106690183644 }, { "epoch": 0.5452946350043976, "grad_norm": 1.4548803766196858, "learning_rate": 0.00027256740914419696, "loss": 3.632298231124878, "step": 930, "token_acc": 0.24162094216807142 }, { "epoch": 0.5458809733216066, "grad_norm": 1.418298393172357, "learning_rate": 0.0002728604923798359, "loss": 3.699294328689575, "step": 931, "token_acc": 0.23485171363527688 }, { "epoch": 0.5464673116388156, "grad_norm": 1.1949458175856844, "learning_rate": 0.0002731535756154748, "loss": 3.630730152130127, "step": 932, "token_acc": 0.24417622305312223 }, { "epoch": 0.5470536499560247, "grad_norm": 1.663725947025361, "learning_rate": 0.0002734466588511137, "loss": 3.6516690254211426, "step": 933, "token_acc": 0.24071523505327017 }, { "epoch": 0.5476399882732337, "grad_norm": 1.473832392460202, "learning_rate": 0.0002737397420867526, "loss": 3.6903762817382812, "step": 934, "token_acc": 0.23811021688928444 }, { "epoch": 0.5482263265904427, "grad_norm": 1.5020590984664082, "learning_rate": 0.00027403282532239155, "loss": 3.593172788619995, "step": 935, "token_acc": 0.24806205590138178 }, { "epoch": 0.5488126649076517, "grad_norm": 1.410606778961064, "learning_rate": 0.0002743259085580305, "loss": 3.674931526184082, "step": 936, "token_acc": 0.2378261254551042 }, { "epoch": 0.5493990032248608, "grad_norm": 1.6590628344169205, "learning_rate": 0.0002746189917936694, "loss": 3.6757731437683105, "step": 937, "token_acc": 0.2382436082008386 }, { "epoch": 0.5499853415420698, "grad_norm": 1.430446721186781, "learning_rate": 0.00027491207502930833, "loss": 3.668175220489502, "step": 938, "token_acc": 0.23955169994520437 }, { "epoch": 0.5505716798592788, "grad_norm": 1.48518844546317, "learning_rate": 0.00027520515826494725, "loss": 3.6634273529052734, "step": 939, "token_acc": 0.23976885603546919 }, { "epoch": 0.5511580181764878, "grad_norm": 1.3094292865272106, "learning_rate": 0.0002754982415005862, "loss": 3.704200506210327, "step": 940, "token_acc": 0.232224367161714 }, { "epoch": 0.5517443564936969, "grad_norm": 1.4434376016126482, "learning_rate": 0.0002757913247362251, "loss": 3.637700319290161, "step": 941, "token_acc": 0.2427770989102637 }, { "epoch": 0.5523306948109059, "grad_norm": 1.4333197053663898, "learning_rate": 0.00027608440797186404, "loss": 3.693472385406494, "step": 942, "token_acc": 0.23614983477901694 }, { "epoch": 0.5529170331281149, "grad_norm": 1.336124291058086, "learning_rate": 0.00027637749120750296, "loss": 3.6455936431884766, "step": 943, "token_acc": 0.24077342725687537 }, { "epoch": 0.5535033714453239, "grad_norm": 1.4236844191835567, "learning_rate": 0.0002766705744431419, "loss": 3.6451268196105957, "step": 944, "token_acc": 0.23986033415081134 }, { "epoch": 0.554089709762533, "grad_norm": 1.5151071706187003, "learning_rate": 0.00027696365767878076, "loss": 3.671522617340088, "step": 945, "token_acc": 0.23706088334592965 }, { "epoch": 0.554676048079742, "grad_norm": 1.4474331432616678, "learning_rate": 0.0002772567409144197, "loss": 3.653430938720703, "step": 946, "token_acc": 0.23992236921559681 }, { "epoch": 0.555262386396951, "grad_norm": 1.3880910943751068, "learning_rate": 0.0002775498241500586, "loss": 3.6660728454589844, "step": 947, "token_acc": 0.23714635963752614 }, { "epoch": 0.55584872471416, "grad_norm": 1.282567661556668, "learning_rate": 0.00027784290738569755, "loss": 3.675222873687744, "step": 948, "token_acc": 0.23885116046013283 }, { "epoch": 0.5564350630313691, "grad_norm": 1.5024143213464343, "learning_rate": 0.00027813599062133647, "loss": 3.6648471355438232, "step": 949, "token_acc": 0.23879919635013444 }, { "epoch": 0.5570214013485781, "grad_norm": 1.3100281801484799, "learning_rate": 0.0002784290738569754, "loss": 3.688194751739502, "step": 950, "token_acc": 0.2354916704306932 }, { "epoch": 0.5576077396657871, "grad_norm": 1.2605884090101194, "learning_rate": 0.00027872215709261427, "loss": 3.662505626678467, "step": 951, "token_acc": 0.23758602096001794 }, { "epoch": 0.5581940779829961, "grad_norm": 1.5417644358255755, "learning_rate": 0.0002790152403282532, "loss": 3.6443071365356445, "step": 952, "token_acc": 0.24169462607079295 }, { "epoch": 0.5587804163002053, "grad_norm": 1.378770453888231, "learning_rate": 0.0002793083235638921, "loss": 3.699918270111084, "step": 953, "token_acc": 0.23420569438205788 }, { "epoch": 0.5593667546174143, "grad_norm": 1.3985727205672815, "learning_rate": 0.00027960140679953105, "loss": 3.6232728958129883, "step": 954, "token_acc": 0.24246628371303938 }, { "epoch": 0.5599530929346233, "grad_norm": 1.17551988270458, "learning_rate": 0.00027989449003517, "loss": 3.6305136680603027, "step": 955, "token_acc": 0.24157700707518454 }, { "epoch": 0.5605394312518323, "grad_norm": 1.464389733669569, "learning_rate": 0.0002801875732708089, "loss": 3.5816683769226074, "step": 956, "token_acc": 0.24643953445489353 }, { "epoch": 0.5611257695690414, "grad_norm": 1.5202380201888184, "learning_rate": 0.00028048065650644784, "loss": 3.6169071197509766, "step": 957, "token_acc": 0.24440837359098228 }, { "epoch": 0.5617121078862504, "grad_norm": 1.553775257267738, "learning_rate": 0.00028077373974208676, "loss": 3.608323574066162, "step": 958, "token_acc": 0.24402427292237105 }, { "epoch": 0.5622984462034594, "grad_norm": 1.0718120225455532, "learning_rate": 0.0002810668229777257, "loss": 3.6268668174743652, "step": 959, "token_acc": 0.24203052317852286 }, { "epoch": 0.5628847845206685, "grad_norm": 1.567008922012089, "learning_rate": 0.0002813599062133646, "loss": 3.6444454193115234, "step": 960, "token_acc": 0.239551687011117 }, { "epoch": 0.5634711228378775, "grad_norm": 1.260261647985568, "learning_rate": 0.00028165298944900354, "loss": 3.624262809753418, "step": 961, "token_acc": 0.24357578631944296 }, { "epoch": 0.5640574611550865, "grad_norm": 1.301844245572885, "learning_rate": 0.00028194607268464247, "loss": 3.635347843170166, "step": 962, "token_acc": 0.24149458839974922 }, { "epoch": 0.5646437994722955, "grad_norm": 1.7375882907047981, "learning_rate": 0.0002822391559202814, "loss": 3.6010549068450928, "step": 963, "token_acc": 0.24749788435865844 }, { "epoch": 0.5652301377895046, "grad_norm": 1.5143098884869093, "learning_rate": 0.0002825322391559203, "loss": 3.5999910831451416, "step": 964, "token_acc": 0.24292920407429094 }, { "epoch": 0.5658164761067136, "grad_norm": 1.295096613319315, "learning_rate": 0.0002828253223915592, "loss": 3.627565860748291, "step": 965, "token_acc": 0.24175109307249915 }, { "epoch": 0.5664028144239226, "grad_norm": 1.3590009676842254, "learning_rate": 0.0002831184056271981, "loss": 3.6357483863830566, "step": 966, "token_acc": 0.24200606940371194 }, { "epoch": 0.5669891527411316, "grad_norm": 1.5019272884970047, "learning_rate": 0.00028341148886283705, "loss": 3.6971030235290527, "step": 967, "token_acc": 0.23295798743184884 }, { "epoch": 0.5675754910583407, "grad_norm": 1.4407196964135756, "learning_rate": 0.000283704572098476, "loss": 3.6472368240356445, "step": 968, "token_acc": 0.24040210601558706 }, { "epoch": 0.5681618293755497, "grad_norm": 1.102711073391483, "learning_rate": 0.00028399765533411485, "loss": 3.6212730407714844, "step": 969, "token_acc": 0.2446720908191644 }, { "epoch": 0.5687481676927587, "grad_norm": 1.5965234085692896, "learning_rate": 0.0002842907385697538, "loss": 3.677675247192383, "step": 970, "token_acc": 0.2334926658302352 }, { "epoch": 0.5693345060099677, "grad_norm": 1.4657220461830711, "learning_rate": 0.0002845838218053927, "loss": 3.610607862472534, "step": 971, "token_acc": 0.24342608969382665 }, { "epoch": 0.5699208443271768, "grad_norm": 1.5361650544965009, "learning_rate": 0.00028487690504103163, "loss": 3.604387044906616, "step": 972, "token_acc": 0.2446919024348877 }, { "epoch": 0.5705071826443858, "grad_norm": 1.241086038496879, "learning_rate": 0.00028516998827667056, "loss": 3.6427831649780273, "step": 973, "token_acc": 0.23833738550493097 }, { "epoch": 0.5710935209615948, "grad_norm": 1.4342048818985964, "learning_rate": 0.0002854630715123095, "loss": 3.6039347648620605, "step": 974, "token_acc": 0.2419299107061669 }, { "epoch": 0.5716798592788038, "grad_norm": 1.3527565134234767, "learning_rate": 0.0002857561547479484, "loss": 3.618162155151367, "step": 975, "token_acc": 0.2438772973084972 }, { "epoch": 0.5722661975960129, "grad_norm": 1.2848849752898708, "learning_rate": 0.00028604923798358734, "loss": 3.5735626220703125, "step": 976, "token_acc": 0.24879073269314594 }, { "epoch": 0.5728525359132219, "grad_norm": 1.5040951386383123, "learning_rate": 0.00028634232121922627, "loss": 3.585871696472168, "step": 977, "token_acc": 0.24762311690126462 }, { "epoch": 0.5734388742304309, "grad_norm": 1.1852199729733248, "learning_rate": 0.0002866354044548652, "loss": 3.607529640197754, "step": 978, "token_acc": 0.2434763883259019 }, { "epoch": 0.5740252125476399, "grad_norm": 1.6717518342340965, "learning_rate": 0.0002869284876905041, "loss": 3.673367977142334, "step": 979, "token_acc": 0.23545167774700376 }, { "epoch": 0.574611550864849, "grad_norm": 1.289435702282436, "learning_rate": 0.00028722157092614305, "loss": 3.6096255779266357, "step": 980, "token_acc": 0.2433342050209205 }, { "epoch": 0.575197889182058, "grad_norm": 1.485572914756259, "learning_rate": 0.000287514654161782, "loss": 3.6879472732543945, "step": 981, "token_acc": 0.23460435601747376 }, { "epoch": 0.575784227499267, "grad_norm": 1.5299409048590933, "learning_rate": 0.0002878077373974209, "loss": 3.6253550052642822, "step": 982, "token_acc": 0.2424067395094425 }, { "epoch": 0.576370565816476, "grad_norm": 1.107850913781708, "learning_rate": 0.00028810082063305983, "loss": 3.6505818367004395, "step": 983, "token_acc": 0.23898196207410452 }, { "epoch": 0.5769569041336852, "grad_norm": 1.4180933459067868, "learning_rate": 0.00028839390386869876, "loss": 3.57523250579834, "step": 984, "token_acc": 0.24687898621944335 }, { "epoch": 0.5775432424508942, "grad_norm": 1.328159485655566, "learning_rate": 0.0002886869871043377, "loss": 3.5903732776641846, "step": 985, "token_acc": 0.24551621031945187 }, { "epoch": 0.5781295807681032, "grad_norm": 1.3772252299142345, "learning_rate": 0.00028898007033997656, "loss": 3.5875444412231445, "step": 986, "token_acc": 0.24669940046571906 }, { "epoch": 0.5787159190853123, "grad_norm": 1.4681394726941328, "learning_rate": 0.00028927315357561543, "loss": 3.664885997772217, "step": 987, "token_acc": 0.23568614704543978 }, { "epoch": 0.5793022574025213, "grad_norm": 1.3501584209081006, "learning_rate": 0.00028956623681125436, "loss": 3.636493682861328, "step": 988, "token_acc": 0.2403348935192476 }, { "epoch": 0.5798885957197303, "grad_norm": 1.3162626910672084, "learning_rate": 0.0002898593200468933, "loss": 3.5964999198913574, "step": 989, "token_acc": 0.24443516081328026 }, { "epoch": 0.5804749340369393, "grad_norm": 1.548417274062198, "learning_rate": 0.0002901524032825322, "loss": 3.646665573120117, "step": 990, "token_acc": 0.23912856608314878 }, { "epoch": 0.5810612723541484, "grad_norm": 1.3329807138893148, "learning_rate": 0.00029044548651817114, "loss": 3.604050636291504, "step": 991, "token_acc": 0.24390972794723825 }, { "epoch": 0.5816476106713574, "grad_norm": 1.8329805774599375, "learning_rate": 0.00029073856975381007, "loss": 3.6503264904022217, "step": 992, "token_acc": 0.23781426640291864 }, { "epoch": 0.5822339489885664, "grad_norm": 1.0386239214358395, "learning_rate": 0.000291031652989449, "loss": 3.671471118927002, "step": 993, "token_acc": 0.23513708251906298 }, { "epoch": 0.5828202873057754, "grad_norm": 1.3632384025221655, "learning_rate": 0.0002913247362250879, "loss": 3.6130118370056152, "step": 994, "token_acc": 0.24241149915641103 }, { "epoch": 0.5834066256229845, "grad_norm": 1.175448756516594, "learning_rate": 0.00029161781946072685, "loss": 3.623880386352539, "step": 995, "token_acc": 0.24121099575430469 }, { "epoch": 0.5839929639401935, "grad_norm": 1.6118297069228227, "learning_rate": 0.0002919109026963658, "loss": 3.6532297134399414, "step": 996, "token_acc": 0.23697712004469987 }, { "epoch": 0.5845793022574025, "grad_norm": 1.4563665658704563, "learning_rate": 0.0002922039859320047, "loss": 3.6464314460754395, "step": 997, "token_acc": 0.23914868288513783 }, { "epoch": 0.5851656405746115, "grad_norm": 1.268823400552579, "learning_rate": 0.00029249706916764363, "loss": 3.6455702781677246, "step": 998, "token_acc": 0.23681017530647253 }, { "epoch": 0.5857519788918206, "grad_norm": 1.3353034791218754, "learning_rate": 0.00029279015240328256, "loss": 3.631204128265381, "step": 999, "token_acc": 0.24132621422490358 }, { "epoch": 0.5863383172090296, "grad_norm": 1.1626901276860175, "learning_rate": 0.0002930832356389215, "loss": 3.6033883094787598, "step": 1000, "token_acc": 0.2434216214658328 }, { "epoch": 0.5869246555262386, "grad_norm": 1.5052685338613823, "learning_rate": 0.0002933763188745604, "loss": 3.6411616802215576, "step": 1001, "token_acc": 0.23954394649533942 }, { "epoch": 0.5875109938434476, "grad_norm": 1.2259943633896444, "learning_rate": 0.00029366940211019934, "loss": 3.672987461090088, "step": 1002, "token_acc": 0.2349683564484737 }, { "epoch": 0.5880973321606567, "grad_norm": 1.3537330602649946, "learning_rate": 0.00029396248534583827, "loss": 3.655560255050659, "step": 1003, "token_acc": 0.23706484605595948 }, { "epoch": 0.5886836704778657, "grad_norm": 1.23563878812983, "learning_rate": 0.0002942555685814772, "loss": 3.6128153800964355, "step": 1004, "token_acc": 0.2416935006812226 }, { "epoch": 0.5892700087950747, "grad_norm": 1.5330607884715752, "learning_rate": 0.00029454865181711607, "loss": 3.631728410720825, "step": 1005, "token_acc": 0.23961668672118777 }, { "epoch": 0.5898563471122837, "grad_norm": 1.4332882088741332, "learning_rate": 0.00029484173505275494, "loss": 3.6275744438171387, "step": 1006, "token_acc": 0.23903324113443747 }, { "epoch": 0.5904426854294929, "grad_norm": 1.2481874169431866, "learning_rate": 0.00029513481828839387, "loss": 3.638216972351074, "step": 1007, "token_acc": 0.24026861684156908 }, { "epoch": 0.5910290237467019, "grad_norm": 1.2322664946771773, "learning_rate": 0.0002954279015240328, "loss": 3.552248954772949, "step": 1008, "token_acc": 0.2494416131873663 }, { "epoch": 0.5916153620639109, "grad_norm": 1.2124431570188956, "learning_rate": 0.0002957209847596717, "loss": 3.5769190788269043, "step": 1009, "token_acc": 0.24815042255168582 }, { "epoch": 0.5922017003811199, "grad_norm": 1.3448800910646759, "learning_rate": 0.00029601406799531065, "loss": 3.6008925437927246, "step": 1010, "token_acc": 0.24129498862818036 }, { "epoch": 0.592788038698329, "grad_norm": 1.2132735883613457, "learning_rate": 0.0002963071512309496, "loss": 3.638364315032959, "step": 1011, "token_acc": 0.23899496705235304 }, { "epoch": 0.593374377015538, "grad_norm": 1.15419606938618, "learning_rate": 0.0002966002344665885, "loss": 3.5545496940612793, "step": 1012, "token_acc": 0.24795645118342452 }, { "epoch": 0.593960715332747, "grad_norm": 1.49230703988686, "learning_rate": 0.00029689331770222743, "loss": 3.59454607963562, "step": 1013, "token_acc": 0.2436325937764915 }, { "epoch": 0.594547053649956, "grad_norm": 1.3133019455039763, "learning_rate": 0.00029718640093786636, "loss": 3.631288528442383, "step": 1014, "token_acc": 0.24004319396530466 }, { "epoch": 0.5951333919671651, "grad_norm": 1.4435558317562465, "learning_rate": 0.0002974794841735053, "loss": 3.5768966674804688, "step": 1015, "token_acc": 0.24592537317354507 }, { "epoch": 0.5957197302843741, "grad_norm": 1.244061776445077, "learning_rate": 0.0002977725674091442, "loss": 3.611121654510498, "step": 1016, "token_acc": 0.24127637227609697 }, { "epoch": 0.5963060686015831, "grad_norm": 1.1682934186019207, "learning_rate": 0.00029806565064478314, "loss": 3.6214759349823, "step": 1017, "token_acc": 0.23975738695364865 }, { "epoch": 0.5968924069187922, "grad_norm": 1.2933443579843602, "learning_rate": 0.00029835873388042207, "loss": 3.6063427925109863, "step": 1018, "token_acc": 0.24503447758956678 }, { "epoch": 0.5974787452360012, "grad_norm": 1.426534387630876, "learning_rate": 0.000298651817116061, "loss": 3.57485294342041, "step": 1019, "token_acc": 0.24551953414727806 }, { "epoch": 0.5980650835532102, "grad_norm": 1.319177337623613, "learning_rate": 0.0002989449003516999, "loss": 3.580430746078491, "step": 1020, "token_acc": 0.24619251384600047 }, { "epoch": 0.5986514218704192, "grad_norm": 1.229497624194284, "learning_rate": 0.00029923798358733885, "loss": 3.6006674766540527, "step": 1021, "token_acc": 0.2422664194763761 }, { "epoch": 0.5992377601876283, "grad_norm": 1.2176666824970224, "learning_rate": 0.0002995310668229777, "loss": 3.566042900085449, "step": 1022, "token_acc": 0.24857474139833916 }, { "epoch": 0.5998240985048373, "grad_norm": 1.3094918895241563, "learning_rate": 0.00029982415005861665, "loss": 3.5706920623779297, "step": 1023, "token_acc": 0.2467744899834589 }, { "epoch": 0.6004104368220463, "grad_norm": 1.248359085046116, "learning_rate": 0.0003001172332942556, "loss": 3.5613551139831543, "step": 1024, "token_acc": 0.24592954107369888 }, { "epoch": 0.6009967751392553, "grad_norm": 1.2605542190605061, "learning_rate": 0.0003004103165298945, "loss": 3.55257248878479, "step": 1025, "token_acc": 0.24668932748729588 }, { "epoch": 0.6015831134564644, "grad_norm": 1.618452542926587, "learning_rate": 0.0003007033997655334, "loss": 3.5784997940063477, "step": 1026, "token_acc": 0.24533656817193472 }, { "epoch": 0.6021694517736734, "grad_norm": 1.4087004271954864, "learning_rate": 0.0003009964830011723, "loss": 3.5917067527770996, "step": 1027, "token_acc": 0.2432783162334239 }, { "epoch": 0.6027557900908824, "grad_norm": 1.3518881039942305, "learning_rate": 0.00030128956623681123, "loss": 3.5749573707580566, "step": 1028, "token_acc": 0.24643084430725787 }, { "epoch": 0.6033421284080914, "grad_norm": 1.2800365906757063, "learning_rate": 0.00030158264947245016, "loss": 3.6161842346191406, "step": 1029, "token_acc": 0.2406215310420285 }, { "epoch": 0.6039284667253005, "grad_norm": 1.1862867108440904, "learning_rate": 0.0003018757327080891, "loss": 3.5706331729888916, "step": 1030, "token_acc": 0.24555600950341286 }, { "epoch": 0.6045148050425095, "grad_norm": 1.5320263128554652, "learning_rate": 0.000302168815943728, "loss": 3.64438796043396, "step": 1031, "token_acc": 0.2397131655243398 }, { "epoch": 0.6051011433597185, "grad_norm": 1.0996446651716916, "learning_rate": 0.00030246189917936694, "loss": 3.57479190826416, "step": 1032, "token_acc": 0.2466358914257987 }, { "epoch": 0.6056874816769275, "grad_norm": 1.4535221697194116, "learning_rate": 0.00030275498241500587, "loss": 3.603222370147705, "step": 1033, "token_acc": 0.24240436696005416 }, { "epoch": 0.6062738199941367, "grad_norm": 1.0934213206078802, "learning_rate": 0.0003030480656506448, "loss": 3.628729820251465, "step": 1034, "token_acc": 0.24039238315060588 }, { "epoch": 0.6068601583113457, "grad_norm": 1.4052113790723477, "learning_rate": 0.0003033411488862837, "loss": 3.557115077972412, "step": 1035, "token_acc": 0.2477239581875148 }, { "epoch": 0.6074464966285547, "grad_norm": 1.1027017557717609, "learning_rate": 0.00030363423212192265, "loss": 3.584075450897217, "step": 1036, "token_acc": 0.24446978095039984 }, { "epoch": 0.6080328349457637, "grad_norm": 1.2720714744698813, "learning_rate": 0.0003039273153575616, "loss": 3.596144676208496, "step": 1037, "token_acc": 0.24484441749064262 }, { "epoch": 0.6086191732629728, "grad_norm": 1.0908590856950309, "learning_rate": 0.0003042203985932005, "loss": 3.5665292739868164, "step": 1038, "token_acc": 0.24901609830643304 }, { "epoch": 0.6092055115801818, "grad_norm": 1.3540384838745612, "learning_rate": 0.00030451348182883943, "loss": 3.5630507469177246, "step": 1039, "token_acc": 0.24783621165417077 }, { "epoch": 0.6097918498973908, "grad_norm": 1.3358213164083288, "learning_rate": 0.0003048065650644783, "loss": 3.559636116027832, "step": 1040, "token_acc": 0.24888909163115697 }, { "epoch": 0.6103781882145998, "grad_norm": 1.321331455081741, "learning_rate": 0.00030509964830011723, "loss": 3.58921480178833, "step": 1041, "token_acc": 0.246624477205616 }, { "epoch": 0.6109645265318089, "grad_norm": 1.288596534247709, "learning_rate": 0.00030539273153575616, "loss": 3.559378147125244, "step": 1042, "token_acc": 0.24742083393036252 }, { "epoch": 0.6115508648490179, "grad_norm": 1.1938219310801197, "learning_rate": 0.0003056858147713951, "loss": 3.587851047515869, "step": 1043, "token_acc": 0.24487342887044464 }, { "epoch": 0.6121372031662269, "grad_norm": 1.2576937115774205, "learning_rate": 0.000305978898007034, "loss": 3.512676239013672, "step": 1044, "token_acc": 0.2536537277933675 }, { "epoch": 0.612723541483436, "grad_norm": 1.2264604190519774, "learning_rate": 0.00030627198124267294, "loss": 3.5607190132141113, "step": 1045, "token_acc": 0.24816403080845864 }, { "epoch": 0.613309879800645, "grad_norm": 1.3621433500744178, "learning_rate": 0.0003065650644783118, "loss": 3.553762197494507, "step": 1046, "token_acc": 0.2483781865764765 }, { "epoch": 0.613896218117854, "grad_norm": 1.2562294334755622, "learning_rate": 0.00030685814771395074, "loss": 3.584376096725464, "step": 1047, "token_acc": 0.24362378351095432 }, { "epoch": 0.614482556435063, "grad_norm": 1.0916962712069853, "learning_rate": 0.00030715123094958967, "loss": 3.518728256225586, "step": 1048, "token_acc": 0.25117837440044777 }, { "epoch": 0.6150688947522721, "grad_norm": 1.2879650968830731, "learning_rate": 0.0003074443141852286, "loss": 3.5597729682922363, "step": 1049, "token_acc": 0.24674009172991093 }, { "epoch": 0.6156552330694811, "grad_norm": 1.306969513994692, "learning_rate": 0.0003077373974208675, "loss": 3.6184351444244385, "step": 1050, "token_acc": 0.2398520965389995 }, { "epoch": 0.6162415713866901, "grad_norm": 1.0379125959396636, "learning_rate": 0.00030803048065650645, "loss": 3.5165929794311523, "step": 1051, "token_acc": 0.251242698251751 }, { "epoch": 0.6168279097038991, "grad_norm": 1.2578212169091842, "learning_rate": 0.0003083235638921454, "loss": 3.611384630203247, "step": 1052, "token_acc": 0.24111120393901744 }, { "epoch": 0.6174142480211082, "grad_norm": 1.1708280867492544, "learning_rate": 0.0003086166471277843, "loss": 3.5429625511169434, "step": 1053, "token_acc": 0.24778100628575972 }, { "epoch": 0.6180005863383172, "grad_norm": 1.2800884252386273, "learning_rate": 0.00030890973036342323, "loss": 3.5960659980773926, "step": 1054, "token_acc": 0.24389746994951972 }, { "epoch": 0.6185869246555262, "grad_norm": 1.2309028839167542, "learning_rate": 0.00030920281359906216, "loss": 3.5429959297180176, "step": 1055, "token_acc": 0.24793755337290654 }, { "epoch": 0.6191732629727352, "grad_norm": 1.2592940658145133, "learning_rate": 0.0003094958968347011, "loss": 3.577817916870117, "step": 1056, "token_acc": 0.245884408491202 }, { "epoch": 0.6197596012899443, "grad_norm": 1.3332887358166907, "learning_rate": 0.00030978898007034, "loss": 3.568650960922241, "step": 1057, "token_acc": 0.24795525384537268 }, { "epoch": 0.6203459396071533, "grad_norm": 1.2369373345427892, "learning_rate": 0.0003100820633059789, "loss": 3.558310031890869, "step": 1058, "token_acc": 0.248153233703751 }, { "epoch": 0.6209322779243623, "grad_norm": 1.2807449244278, "learning_rate": 0.0003103751465416178, "loss": 3.57058048248291, "step": 1059, "token_acc": 0.2465741393764388 }, { "epoch": 0.6215186162415713, "grad_norm": 1.7017458386965152, "learning_rate": 0.00031066822977725674, "loss": 3.581871509552002, "step": 1060, "token_acc": 0.24369723233250434 }, { "epoch": 0.6221049545587805, "grad_norm": 1.008323281954998, "learning_rate": 0.00031096131301289567, "loss": 3.576396942138672, "step": 1061, "token_acc": 0.24595551379233238 }, { "epoch": 0.6226912928759895, "grad_norm": 1.6023932079371275, "learning_rate": 0.0003112543962485346, "loss": 3.582634449005127, "step": 1062, "token_acc": 0.24497768890370122 }, { "epoch": 0.6232776311931985, "grad_norm": 1.034828430848396, "learning_rate": 0.0003115474794841735, "loss": 3.5482912063598633, "step": 1063, "token_acc": 0.24882604664078267 }, { "epoch": 0.6238639695104075, "grad_norm": 1.3511534247738706, "learning_rate": 0.00031184056271981245, "loss": 3.564587354660034, "step": 1064, "token_acc": 0.2474659930453532 }, { "epoch": 0.6244503078276166, "grad_norm": 1.1506197615092917, "learning_rate": 0.0003121336459554514, "loss": 3.621370792388916, "step": 1065, "token_acc": 0.24299295756183764 }, { "epoch": 0.6250366461448256, "grad_norm": 1.2004968585508264, "learning_rate": 0.0003124267291910903, "loss": 3.62357759475708, "step": 1066, "token_acc": 0.23938016145495036 }, { "epoch": 0.6256229844620346, "grad_norm": 1.3657658591337762, "learning_rate": 0.0003127198124267292, "loss": 3.601881265640259, "step": 1067, "token_acc": 0.24152505961459966 }, { "epoch": 0.6262093227792436, "grad_norm": 1.4428572892473865, "learning_rate": 0.0003130128956623681, "loss": 3.605905771255493, "step": 1068, "token_acc": 0.24115721980531626 }, { "epoch": 0.6267956610964527, "grad_norm": 1.3483259583455922, "learning_rate": 0.00031330597889800703, "loss": 3.50486421585083, "step": 1069, "token_acc": 0.2549713510991744 }, { "epoch": 0.6273819994136617, "grad_norm": 1.4080052386525508, "learning_rate": 0.00031359906213364596, "loss": 3.589035749435425, "step": 1070, "token_acc": 0.24182433311646065 }, { "epoch": 0.6279683377308707, "grad_norm": 1.3657795830421322, "learning_rate": 0.0003138921453692849, "loss": 3.6037979125976562, "step": 1071, "token_acc": 0.24357838795394154 }, { "epoch": 0.6285546760480798, "grad_norm": 1.2079770564130006, "learning_rate": 0.0003141852286049238, "loss": 3.6032891273498535, "step": 1072, "token_acc": 0.24244832217638151 }, { "epoch": 0.6291410143652888, "grad_norm": 1.3856147410341002, "learning_rate": 0.00031447831184056274, "loss": 3.5857534408569336, "step": 1073, "token_acc": 0.244249200856987 }, { "epoch": 0.6297273526824978, "grad_norm": 1.1243418437968338, "learning_rate": 0.00031477139507620167, "loss": 3.6087419986724854, "step": 1074, "token_acc": 0.24339176787382535 }, { "epoch": 0.6303136909997068, "grad_norm": 1.0467605121947294, "learning_rate": 0.0003150644783118406, "loss": 3.5274486541748047, "step": 1075, "token_acc": 0.2508644671502483 }, { "epoch": 0.6309000293169159, "grad_norm": 1.1430397492857838, "learning_rate": 0.00031535756154747947, "loss": 3.5465409755706787, "step": 1076, "token_acc": 0.24883430799220274 }, { "epoch": 0.6314863676341249, "grad_norm": 1.5479730623570227, "learning_rate": 0.0003156506447831184, "loss": 3.5545127391815186, "step": 1077, "token_acc": 0.24666378667085576 }, { "epoch": 0.6320727059513339, "grad_norm": 1.0795272060536478, "learning_rate": 0.0003159437280187573, "loss": 3.5707156658172607, "step": 1078, "token_acc": 0.24650693047260813 }, { "epoch": 0.6326590442685429, "grad_norm": 1.5190832449251364, "learning_rate": 0.00031623681125439625, "loss": 3.605468273162842, "step": 1079, "token_acc": 0.23916483205063277 }, { "epoch": 0.633245382585752, "grad_norm": 0.8291199410879295, "learning_rate": 0.0003165298944900352, "loss": 3.560580253601074, "step": 1080, "token_acc": 0.2454103310975121 }, { "epoch": 0.633831720902961, "grad_norm": 1.420828667300388, "learning_rate": 0.0003168229777256741, "loss": 3.5293874740600586, "step": 1081, "token_acc": 0.2519151152193725 }, { "epoch": 0.63441805922017, "grad_norm": 1.1572233907469214, "learning_rate": 0.00031711606096131303, "loss": 3.5554580688476562, "step": 1082, "token_acc": 0.24748734706890368 }, { "epoch": 0.635004397537379, "grad_norm": 1.3992772126364095, "learning_rate": 0.00031740914419695196, "loss": 3.634899377822876, "step": 1083, "token_acc": 0.23648715806553 }, { "epoch": 0.6355907358545881, "grad_norm": 1.2881920686561694, "learning_rate": 0.0003177022274325909, "loss": 3.572756290435791, "step": 1084, "token_acc": 0.24625218019043404 }, { "epoch": 0.6361770741717971, "grad_norm": 1.1330656735565585, "learning_rate": 0.0003179953106682298, "loss": 3.590031147003174, "step": 1085, "token_acc": 0.24307775597211884 }, { "epoch": 0.6367634124890061, "grad_norm": 1.1416541572544145, "learning_rate": 0.00031828839390386874, "loss": 3.591721534729004, "step": 1086, "token_acc": 0.2436965605587694 }, { "epoch": 0.6373497508062151, "grad_norm": 1.2962455590491624, "learning_rate": 0.0003185814771395076, "loss": 3.5729780197143555, "step": 1087, "token_acc": 0.24474010473529356 }, { "epoch": 0.6379360891234243, "grad_norm": 1.2000039526613873, "learning_rate": 0.00031887456037514654, "loss": 3.5554540157318115, "step": 1088, "token_acc": 0.24678570804315852 }, { "epoch": 0.6385224274406333, "grad_norm": 1.1573919067125542, "learning_rate": 0.00031916764361078546, "loss": 3.5619325637817383, "step": 1089, "token_acc": 0.2472662643207856 }, { "epoch": 0.6391087657578423, "grad_norm": 1.1822609910139814, "learning_rate": 0.0003194607268464244, "loss": 3.5487148761749268, "step": 1090, "token_acc": 0.2490390021223998 }, { "epoch": 0.6396951040750513, "grad_norm": 1.2179345246746551, "learning_rate": 0.0003197538100820633, "loss": 3.5300283432006836, "step": 1091, "token_acc": 0.2503283397503825 }, { "epoch": 0.6402814423922604, "grad_norm": 1.1379827928680268, "learning_rate": 0.00032004689331770225, "loss": 3.539045572280884, "step": 1092, "token_acc": 0.24947478721819574 }, { "epoch": 0.6408677807094694, "grad_norm": 1.1383140911851763, "learning_rate": 0.0003203399765533411, "loss": 3.6093735694885254, "step": 1093, "token_acc": 0.24054456046624575 }, { "epoch": 0.6414541190266784, "grad_norm": 1.3386410166576508, "learning_rate": 0.00032063305978898005, "loss": 3.5443878173828125, "step": 1094, "token_acc": 0.24872283931852482 }, { "epoch": 0.6420404573438874, "grad_norm": 0.9958463126199089, "learning_rate": 0.000320926143024619, "loss": 3.5027291774749756, "step": 1095, "token_acc": 0.252696881462228 }, { "epoch": 0.6426267956610965, "grad_norm": 1.2637195292149674, "learning_rate": 0.0003212192262602579, "loss": 3.5294947624206543, "step": 1096, "token_acc": 0.25016069846893585 }, { "epoch": 0.6432131339783055, "grad_norm": 1.0775112577219639, "learning_rate": 0.00032151230949589683, "loss": 3.524590015411377, "step": 1097, "token_acc": 0.2503629842108192 }, { "epoch": 0.6437994722955145, "grad_norm": 1.3906939994597405, "learning_rate": 0.00032180539273153575, "loss": 3.562063694000244, "step": 1098, "token_acc": 0.24525273863208116 }, { "epoch": 0.6443858106127235, "grad_norm": 1.260113265740618, "learning_rate": 0.0003220984759671747, "loss": 3.5748047828674316, "step": 1099, "token_acc": 0.2450031146418931 }, { "epoch": 0.6449721489299326, "grad_norm": 1.230003679861811, "learning_rate": 0.0003223915592028136, "loss": 3.595684051513672, "step": 1100, "token_acc": 0.24075744832683438 }, { "epoch": 0.6455584872471416, "grad_norm": 1.3402025501774848, "learning_rate": 0.00032268464243845254, "loss": 3.593212604522705, "step": 1101, "token_acc": 0.2434796958300396 }, { "epoch": 0.6461448255643506, "grad_norm": 0.9260537558080999, "learning_rate": 0.00032297772567409146, "loss": 3.5308728218078613, "step": 1102, "token_acc": 0.24810283374200562 }, { "epoch": 0.6467311638815597, "grad_norm": 1.3607546331294096, "learning_rate": 0.0003232708089097304, "loss": 3.5628058910369873, "step": 1103, "token_acc": 0.2468668613154561 }, { "epoch": 0.6473175021987687, "grad_norm": 1.1217365511168185, "learning_rate": 0.0003235638921453693, "loss": 3.540827751159668, "step": 1104, "token_acc": 0.2516629336829591 }, { "epoch": 0.6479038405159777, "grad_norm": 1.351973406383005, "learning_rate": 0.00032385697538100824, "loss": 3.59983491897583, "step": 1105, "token_acc": 0.2413246809454221 }, { "epoch": 0.6484901788331867, "grad_norm": 1.355231792514714, "learning_rate": 0.00032415005861664717, "loss": 3.5499110221862793, "step": 1106, "token_acc": 0.24766060323300657 }, { "epoch": 0.6490765171503958, "grad_norm": 1.1929701896496483, "learning_rate": 0.0003244431418522861, "loss": 3.5612032413482666, "step": 1107, "token_acc": 0.2467023210035984 }, { "epoch": 0.6496628554676048, "grad_norm": 1.3111405815678407, "learning_rate": 0.00032473622508792497, "loss": 3.5945374965667725, "step": 1108, "token_acc": 0.2418001062457312 }, { "epoch": 0.6502491937848138, "grad_norm": 1.0737005914840283, "learning_rate": 0.0003250293083235639, "loss": 3.4876513481140137, "step": 1109, "token_acc": 0.25558413787262874 }, { "epoch": 0.6508355321020228, "grad_norm": 1.307575869342278, "learning_rate": 0.0003253223915592028, "loss": 3.5603158473968506, "step": 1110, "token_acc": 0.24741451659155544 }, { "epoch": 0.6514218704192319, "grad_norm": 1.209003447570316, "learning_rate": 0.0003256154747948417, "loss": 3.5082802772521973, "step": 1111, "token_acc": 0.25135685656087786 }, { "epoch": 0.6520082087364409, "grad_norm": 1.0395884785351583, "learning_rate": 0.0003259085580304806, "loss": 3.5476131439208984, "step": 1112, "token_acc": 0.24823504280184025 }, { "epoch": 0.6525945470536499, "grad_norm": 1.2087237450963717, "learning_rate": 0.00032620164126611955, "loss": 3.5242671966552734, "step": 1113, "token_acc": 0.25044261411746593 }, { "epoch": 0.6531808853708589, "grad_norm": 1.1238498900421816, "learning_rate": 0.0003264947245017585, "loss": 3.57403564453125, "step": 1114, "token_acc": 0.24646323511065646 }, { "epoch": 0.653767223688068, "grad_norm": 0.9858208484784472, "learning_rate": 0.0003267878077373974, "loss": 3.553864002227783, "step": 1115, "token_acc": 0.24866785546395923 }, { "epoch": 0.6543535620052771, "grad_norm": 1.4029590370655733, "learning_rate": 0.00032708089097303634, "loss": 3.559656858444214, "step": 1116, "token_acc": 0.24425655346921143 }, { "epoch": 0.6549399003224861, "grad_norm": 1.3028626023006458, "learning_rate": 0.00032737397420867526, "loss": 3.5004982948303223, "step": 1117, "token_acc": 0.2532436141346724 }, { "epoch": 0.6555262386396951, "grad_norm": 0.9296701964983376, "learning_rate": 0.0003276670574443142, "loss": 3.5941061973571777, "step": 1118, "token_acc": 0.2403735487907555 }, { "epoch": 0.6561125769569042, "grad_norm": 1.3879578424444867, "learning_rate": 0.0003279601406799531, "loss": 3.5592808723449707, "step": 1119, "token_acc": 0.24724811460503343 }, { "epoch": 0.6566989152741132, "grad_norm": 1.1693239724211568, "learning_rate": 0.00032825322391559204, "loss": 3.5292327404022217, "step": 1120, "token_acc": 0.2498458529695373 }, { "epoch": 0.6572852535913222, "grad_norm": 1.0471179945909954, "learning_rate": 0.00032854630715123097, "loss": 3.552354335784912, "step": 1121, "token_acc": 0.2473074620115077 }, { "epoch": 0.6578715919085312, "grad_norm": 1.3109248459522524, "learning_rate": 0.0003288393903868699, "loss": 3.5934853553771973, "step": 1122, "token_acc": 0.24050911031246566 }, { "epoch": 0.6584579302257403, "grad_norm": 0.969650259836423, "learning_rate": 0.0003291324736225088, "loss": 3.5656421184539795, "step": 1123, "token_acc": 0.24619973492344135 }, { "epoch": 0.6590442685429493, "grad_norm": 1.220902322112274, "learning_rate": 0.00032942555685814775, "loss": 3.538597583770752, "step": 1124, "token_acc": 0.24896147309072797 }, { "epoch": 0.6596306068601583, "grad_norm": 1.3066563924582757, "learning_rate": 0.0003297186400937867, "loss": 3.481600761413574, "step": 1125, "token_acc": 0.256214083676146 }, { "epoch": 0.6602169451773673, "grad_norm": 0.9900540592016093, "learning_rate": 0.0003300117233294256, "loss": 3.5342531204223633, "step": 1126, "token_acc": 0.2467207591884573 }, { "epoch": 0.6608032834945764, "grad_norm": 1.052127838539941, "learning_rate": 0.00033030480656506453, "loss": 3.5289125442504883, "step": 1127, "token_acc": 0.24936321294159233 }, { "epoch": 0.6613896218117854, "grad_norm": 1.332404081317421, "learning_rate": 0.0003305978898007034, "loss": 3.5762147903442383, "step": 1128, "token_acc": 0.24507660283721616 }, { "epoch": 0.6619759601289944, "grad_norm": 1.136520272589364, "learning_rate": 0.0003308909730363423, "loss": 3.5230135917663574, "step": 1129, "token_acc": 0.25160075329566856 }, { "epoch": 0.6625622984462035, "grad_norm": 1.1348348005989932, "learning_rate": 0.0003311840562719812, "loss": 3.5148396492004395, "step": 1130, "token_acc": 0.25060369346420647 }, { "epoch": 0.6631486367634125, "grad_norm": 1.440027444073951, "learning_rate": 0.00033147713950762013, "loss": 3.5625317096710205, "step": 1131, "token_acc": 0.24465781356872765 }, { "epoch": 0.6637349750806215, "grad_norm": 0.9401583073871229, "learning_rate": 0.00033177022274325906, "loss": 3.5676870346069336, "step": 1132, "token_acc": 0.24446914448993518 }, { "epoch": 0.6643213133978305, "grad_norm": 1.248781771466693, "learning_rate": 0.000332063305978898, "loss": 3.5910234451293945, "step": 1133, "token_acc": 0.24314042086156315 }, { "epoch": 0.6649076517150396, "grad_norm": 1.390313023548334, "learning_rate": 0.0003323563892145369, "loss": 3.5809402465820312, "step": 1134, "token_acc": 0.24386106704183755 }, { "epoch": 0.6654939900322486, "grad_norm": 1.1535699229215877, "learning_rate": 0.00033264947245017584, "loss": 3.533020496368408, "step": 1135, "token_acc": 0.249339550348733 }, { "epoch": 0.6660803283494576, "grad_norm": 0.957687999299108, "learning_rate": 0.00033294255568581477, "loss": 3.498629570007324, "step": 1136, "token_acc": 0.2547027831028947 }, { "epoch": 0.6666666666666666, "grad_norm": 1.0455585881163558, "learning_rate": 0.0003332356389214537, "loss": 3.547109603881836, "step": 1137, "token_acc": 0.24668252939684915 }, { "epoch": 0.6672530049838757, "grad_norm": 1.1612803316441251, "learning_rate": 0.0003335287221570926, "loss": 3.5291762351989746, "step": 1138, "token_acc": 0.25067939617429447 }, { "epoch": 0.6678393433010847, "grad_norm": 1.108069522393023, "learning_rate": 0.00033382180539273155, "loss": 3.547048568725586, "step": 1139, "token_acc": 0.24649685396759177 }, { "epoch": 0.6684256816182937, "grad_norm": 1.2785390987270575, "learning_rate": 0.0003341148886283705, "loss": 3.5433878898620605, "step": 1140, "token_acc": 0.25009580343389565 }, { "epoch": 0.6690120199355027, "grad_norm": 1.10542642193351, "learning_rate": 0.0003344079718640094, "loss": 3.540172576904297, "step": 1141, "token_acc": 0.2467566933861023 }, { "epoch": 0.6695983582527119, "grad_norm": 1.2953390295739424, "learning_rate": 0.00033470105509964833, "loss": 3.5272157192230225, "step": 1142, "token_acc": 0.2501110803925828 }, { "epoch": 0.6701846965699209, "grad_norm": 1.1108067743533743, "learning_rate": 0.00033499413833528726, "loss": 3.528212785720825, "step": 1143, "token_acc": 0.25052162007209744 }, { "epoch": 0.6707710348871299, "grad_norm": 1.3463178539451273, "learning_rate": 0.0003352872215709262, "loss": 3.5627002716064453, "step": 1144, "token_acc": 0.245476117451452 }, { "epoch": 0.6713573732043389, "grad_norm": 1.195757802747223, "learning_rate": 0.0003355803048065651, "loss": 3.5535452365875244, "step": 1145, "token_acc": 0.24571338058844494 }, { "epoch": 0.671943711521548, "grad_norm": 1.182634878953239, "learning_rate": 0.00033587338804220404, "loss": 3.495540142059326, "step": 1146, "token_acc": 0.25310745607774027 }, { "epoch": 0.672530049838757, "grad_norm": 1.1596468853060404, "learning_rate": 0.0003361664712778429, "loss": 3.508944034576416, "step": 1147, "token_acc": 0.25265622418635386 }, { "epoch": 0.673116388155966, "grad_norm": 1.1390830936271432, "learning_rate": 0.0003364595545134818, "loss": 3.5513880252838135, "step": 1148, "token_acc": 0.2457098802355412 }, { "epoch": 0.673702726473175, "grad_norm": 1.1886038458253203, "learning_rate": 0.0003367526377491207, "loss": 3.540112018585205, "step": 1149, "token_acc": 0.24757359557783776 }, { "epoch": 0.6742890647903841, "grad_norm": 1.0771840986201147, "learning_rate": 0.00033704572098475964, "loss": 3.5456600189208984, "step": 1150, "token_acc": 0.24660703310977458 }, { "epoch": 0.6748754031075931, "grad_norm": 1.1187039781642754, "learning_rate": 0.00033733880422039857, "loss": 3.5277175903320312, "step": 1151, "token_acc": 0.25042674309148166 }, { "epoch": 0.6754617414248021, "grad_norm": 1.3258653333578712, "learning_rate": 0.0003376318874560375, "loss": 3.5155279636383057, "step": 1152, "token_acc": 0.2514580254093943 }, { "epoch": 0.6760480797420111, "grad_norm": 1.063979292920111, "learning_rate": 0.0003379249706916764, "loss": 3.5286808013916016, "step": 1153, "token_acc": 0.24790684954560754 }, { "epoch": 0.6766344180592202, "grad_norm": 1.1978726028697113, "learning_rate": 0.00033821805392731535, "loss": 3.562964916229248, "step": 1154, "token_acc": 0.2453826068848415 }, { "epoch": 0.6772207563764292, "grad_norm": 1.1271359028397923, "learning_rate": 0.0003385111371629543, "loss": 3.519597053527832, "step": 1155, "token_acc": 0.25035711435090513 }, { "epoch": 0.6778070946936382, "grad_norm": 1.1437329064594568, "learning_rate": 0.0003388042203985932, "loss": 3.5524344444274902, "step": 1156, "token_acc": 0.2450053735998502 }, { "epoch": 0.6783934330108473, "grad_norm": 1.0928591410622746, "learning_rate": 0.00033909730363423213, "loss": 3.4724159240722656, "step": 1157, "token_acc": 0.2541115970065948 }, { "epoch": 0.6789797713280563, "grad_norm": 1.115896710197314, "learning_rate": 0.00033939038686987106, "loss": 3.559156894683838, "step": 1158, "token_acc": 0.2444567147924417 }, { "epoch": 0.6795661096452653, "grad_norm": 1.4392482058325582, "learning_rate": 0.00033968347010551, "loss": 3.5400705337524414, "step": 1159, "token_acc": 0.24868272388414306 }, { "epoch": 0.6801524479624743, "grad_norm": 1.0183113818136642, "learning_rate": 0.0003399765533411489, "loss": 3.4402782917022705, "step": 1160, "token_acc": 0.25780541620752456 }, { "epoch": 0.6807387862796834, "grad_norm": 1.3486267132712528, "learning_rate": 0.00034026963657678784, "loss": 3.5807957649230957, "step": 1161, "token_acc": 0.24274357981675054 }, { "epoch": 0.6813251245968924, "grad_norm": 1.0751325789540667, "learning_rate": 0.00034056271981242677, "loss": 3.4894602298736572, "step": 1162, "token_acc": 0.254426472023464 }, { "epoch": 0.6819114629141014, "grad_norm": 1.3859886314991148, "learning_rate": 0.0003408558030480657, "loss": 3.5615060329437256, "step": 1163, "token_acc": 0.24496862605191916 }, { "epoch": 0.6824978012313104, "grad_norm": 1.0178750970828456, "learning_rate": 0.00034114888628370457, "loss": 3.52013897895813, "step": 1164, "token_acc": 0.25224347216670956 }, { "epoch": 0.6830841395485195, "grad_norm": 1.2366225371445583, "learning_rate": 0.0003414419695193435, "loss": 3.5442519187927246, "step": 1165, "token_acc": 0.24564135368430173 }, { "epoch": 0.6836704778657285, "grad_norm": 1.2195981142350034, "learning_rate": 0.0003417350527549824, "loss": 3.535538673400879, "step": 1166, "token_acc": 0.24952392748215696 }, { "epoch": 0.6842568161829375, "grad_norm": 0.8817938506181477, "learning_rate": 0.00034202813599062135, "loss": 3.5537772178649902, "step": 1167, "token_acc": 0.24662827622408515 }, { "epoch": 0.6848431545001465, "grad_norm": 1.0967838727921415, "learning_rate": 0.0003423212192262602, "loss": 3.5506629943847656, "step": 1168, "token_acc": 0.2473518822966308 }, { "epoch": 0.6854294928173557, "grad_norm": 1.2437757554668571, "learning_rate": 0.00034261430246189915, "loss": 3.5429272651672363, "step": 1169, "token_acc": 0.24572574739724282 }, { "epoch": 0.6860158311345647, "grad_norm": 1.1782248783121134, "learning_rate": 0.0003429073856975381, "loss": 3.5021414756774902, "step": 1170, "token_acc": 0.2516976942342947 }, { "epoch": 0.6866021694517737, "grad_norm": 1.2704713431524937, "learning_rate": 0.000343200468933177, "loss": 3.528017520904541, "step": 1171, "token_acc": 0.2482961285870467 }, { "epoch": 0.6871885077689827, "grad_norm": 1.0816706899115751, "learning_rate": 0.00034349355216881593, "loss": 3.5300867557525635, "step": 1172, "token_acc": 0.24826474707530072 }, { "epoch": 0.6877748460861918, "grad_norm": 1.3348431394423883, "learning_rate": 0.00034378663540445486, "loss": 3.5262646675109863, "step": 1173, "token_acc": 0.248 }, { "epoch": 0.6883611844034008, "grad_norm": 1.006175191731131, "learning_rate": 0.0003440797186400938, "loss": 3.5259833335876465, "step": 1174, "token_acc": 0.25009552144520125 }, { "epoch": 0.6889475227206098, "grad_norm": 1.360901272289494, "learning_rate": 0.0003443728018757327, "loss": 3.5509562492370605, "step": 1175, "token_acc": 0.2466580212615457 }, { "epoch": 0.6895338610378188, "grad_norm": 0.9082580758852431, "learning_rate": 0.00034466588511137164, "loss": 3.540731906890869, "step": 1176, "token_acc": 0.2474872649419408 }, { "epoch": 0.6901201993550279, "grad_norm": 1.2298691569647349, "learning_rate": 0.00034495896834701057, "loss": 3.522731304168701, "step": 1177, "token_acc": 0.24689660611477693 }, { "epoch": 0.6907065376722369, "grad_norm": 1.2562276728572417, "learning_rate": 0.0003452520515826495, "loss": 3.563549518585205, "step": 1178, "token_acc": 0.24390001103096473 }, { "epoch": 0.6912928759894459, "grad_norm": 1.4389558089534102, "learning_rate": 0.0003455451348182884, "loss": 3.5353622436523438, "step": 1179, "token_acc": 0.24938515029220187 }, { "epoch": 0.6918792143066549, "grad_norm": 1.0543070598216913, "learning_rate": 0.00034583821805392735, "loss": 3.512922525405884, "step": 1180, "token_acc": 0.24991110162861815 }, { "epoch": 0.692465552623864, "grad_norm": 1.2160004716382278, "learning_rate": 0.0003461313012895663, "loss": 3.561436176300049, "step": 1181, "token_acc": 0.24524189579432804 }, { "epoch": 0.693051890941073, "grad_norm": 1.018136713771027, "learning_rate": 0.00034642438452520515, "loss": 3.464406728744507, "step": 1182, "token_acc": 0.25382326395121596 }, { "epoch": 0.693638229258282, "grad_norm": 1.431307631593445, "learning_rate": 0.0003467174677608441, "loss": 3.4845755100250244, "step": 1183, "token_acc": 0.2531205169628433 }, { "epoch": 0.6942245675754911, "grad_norm": 0.9004627438402442, "learning_rate": 0.000347010550996483, "loss": 3.472142219543457, "step": 1184, "token_acc": 0.25498336935561106 }, { "epoch": 0.6948109058927001, "grad_norm": 1.158960208969192, "learning_rate": 0.00034730363423212193, "loss": 3.5171923637390137, "step": 1185, "token_acc": 0.2506687934253579 }, { "epoch": 0.6953972442099091, "grad_norm": 0.9875161955788607, "learning_rate": 0.00034759671746776086, "loss": 3.5057528018951416, "step": 1186, "token_acc": 0.25150209945947616 }, { "epoch": 0.6959835825271181, "grad_norm": 1.2011554338477437, "learning_rate": 0.0003478898007033998, "loss": 3.5390753746032715, "step": 1187, "token_acc": 0.2474407327586207 }, { "epoch": 0.6965699208443272, "grad_norm": 1.2559757321722238, "learning_rate": 0.0003481828839390387, "loss": 3.507009506225586, "step": 1188, "token_acc": 0.24938997554693867 }, { "epoch": 0.6971562591615362, "grad_norm": 1.0191285004044885, "learning_rate": 0.0003484759671746776, "loss": 3.506450891494751, "step": 1189, "token_acc": 0.2503491822546896 }, { "epoch": 0.6977425974787452, "grad_norm": 0.9548017028787985, "learning_rate": 0.0003487690504103165, "loss": 3.5579071044921875, "step": 1190, "token_acc": 0.24633295462139945 }, { "epoch": 0.6983289357959542, "grad_norm": 0.981484851692534, "learning_rate": 0.00034906213364595544, "loss": 3.527952194213867, "step": 1191, "token_acc": 0.2501665828265616 }, { "epoch": 0.6989152741131633, "grad_norm": 1.3016208447900965, "learning_rate": 0.00034935521688159437, "loss": 3.5000200271606445, "step": 1192, "token_acc": 0.25106593727059723 }, { "epoch": 0.6995016124303723, "grad_norm": 1.011815590310952, "learning_rate": 0.0003496483001172333, "loss": 3.537379264831543, "step": 1193, "token_acc": 0.24750544787246243 }, { "epoch": 0.7000879507475813, "grad_norm": 1.0429483729395566, "learning_rate": 0.0003499413833528722, "loss": 3.484342575073242, "step": 1194, "token_acc": 0.2543926294995961 }, { "epoch": 0.7006742890647903, "grad_norm": 0.918082263696897, "learning_rate": 0.00035023446658851115, "loss": 3.5429186820983887, "step": 1195, "token_acc": 0.24734160243144343 }, { "epoch": 0.7012606273819995, "grad_norm": 1.0569709727769927, "learning_rate": 0.0003505275498241501, "loss": 3.5127363204956055, "step": 1196, "token_acc": 0.25067248601867015 }, { "epoch": 0.7018469656992085, "grad_norm": 1.049521149314088, "learning_rate": 0.000350820633059789, "loss": 3.5081734657287598, "step": 1197, "token_acc": 0.250620198828065 }, { "epoch": 0.7024333040164175, "grad_norm": 1.3525048616174489, "learning_rate": 0.00035111371629542793, "loss": 3.577838897705078, "step": 1198, "token_acc": 0.2428012769976505 }, { "epoch": 0.7030196423336265, "grad_norm": 1.0552220222608861, "learning_rate": 0.00035140679953106686, "loss": 3.5475518703460693, "step": 1199, "token_acc": 0.24710181093179073 }, { "epoch": 0.7036059806508356, "grad_norm": 1.4598665176116388, "learning_rate": 0.00035169988276670573, "loss": 3.554635524749756, "step": 1200, "token_acc": 0.2472148744909943 }, { "epoch": 0.7041923189680446, "grad_norm": 1.0895906890988776, "learning_rate": 0.00035199296600234466, "loss": 3.549295425415039, "step": 1201, "token_acc": 0.24648935709490916 }, { "epoch": 0.7047786572852536, "grad_norm": 1.1261405686585428, "learning_rate": 0.0003522860492379836, "loss": 3.535259962081909, "step": 1202, "token_acc": 0.2483663353489217 }, { "epoch": 0.7053649956024626, "grad_norm": 1.0943018129831263, "learning_rate": 0.0003525791324736225, "loss": 3.5624148845672607, "step": 1203, "token_acc": 0.24475923222521112 }, { "epoch": 0.7059513339196717, "grad_norm": 1.0284215128615037, "learning_rate": 0.00035287221570926144, "loss": 3.4818179607391357, "step": 1204, "token_acc": 0.2532396268625216 }, { "epoch": 0.7065376722368807, "grad_norm": 1.0716823631593215, "learning_rate": 0.00035316529894490037, "loss": 3.515615463256836, "step": 1205, "token_acc": 0.2520421197817211 }, { "epoch": 0.7071240105540897, "grad_norm": 1.2324743349430587, "learning_rate": 0.0003534583821805393, "loss": 3.5280814170837402, "step": 1206, "token_acc": 0.24831418331101615 }, { "epoch": 0.7077103488712987, "grad_norm": 1.0825953154193289, "learning_rate": 0.0003537514654161782, "loss": 3.5493626594543457, "step": 1207, "token_acc": 0.2467819801348404 }, { "epoch": 0.7082966871885078, "grad_norm": 1.0841989989331555, "learning_rate": 0.00035404454865181715, "loss": 3.5270919799804688, "step": 1208, "token_acc": 0.25028672577494515 }, { "epoch": 0.7088830255057168, "grad_norm": 0.9459441428851809, "learning_rate": 0.000354337631887456, "loss": 3.489910125732422, "step": 1209, "token_acc": 0.2526744881714106 }, { "epoch": 0.7094693638229258, "grad_norm": 0.9354617600278087, "learning_rate": 0.00035463071512309495, "loss": 3.5249242782592773, "step": 1210, "token_acc": 0.2486229410576238 }, { "epoch": 0.7100557021401348, "grad_norm": 1.0326240429524933, "learning_rate": 0.0003549237983587339, "loss": 3.5006346702575684, "step": 1211, "token_acc": 0.2512047879260994 }, { "epoch": 0.7106420404573439, "grad_norm": 1.3338598333550424, "learning_rate": 0.0003552168815943728, "loss": 3.52524733543396, "step": 1212, "token_acc": 0.250570148812942 }, { "epoch": 0.7112283787745529, "grad_norm": 0.9497428972354047, "learning_rate": 0.00035550996483001173, "loss": 3.491339921951294, "step": 1213, "token_acc": 0.2525678764914039 }, { "epoch": 0.7118147170917619, "grad_norm": 1.356692229245831, "learning_rate": 0.00035580304806565066, "loss": 3.5024614334106445, "step": 1214, "token_acc": 0.2491468965341877 }, { "epoch": 0.712401055408971, "grad_norm": 1.0586248772881608, "learning_rate": 0.0003560961313012896, "loss": 3.4918408393859863, "step": 1215, "token_acc": 0.2523862523144468 }, { "epoch": 0.71298739372618, "grad_norm": 0.8235559547246575, "learning_rate": 0.0003563892145369285, "loss": 3.4780097007751465, "step": 1216, "token_acc": 0.25423402465111006 }, { "epoch": 0.713573732043389, "grad_norm": 0.8871343746621152, "learning_rate": 0.00035668229777256744, "loss": 3.515353202819824, "step": 1217, "token_acc": 0.2500533748131882 }, { "epoch": 0.714160070360598, "grad_norm": 1.1428938447313297, "learning_rate": 0.0003569753810082063, "loss": 3.5247015953063965, "step": 1218, "token_acc": 0.24737294472340415 }, { "epoch": 0.7147464086778071, "grad_norm": 1.0848607754382695, "learning_rate": 0.00035726846424384524, "loss": 3.4805145263671875, "step": 1219, "token_acc": 0.25320163928068157 }, { "epoch": 0.7153327469950161, "grad_norm": 1.2135675779157435, "learning_rate": 0.00035756154747948417, "loss": 3.523923397064209, "step": 1220, "token_acc": 0.24836656520671696 }, { "epoch": 0.7159190853122251, "grad_norm": 0.8789548797979777, "learning_rate": 0.0003578546307151231, "loss": 3.498096466064453, "step": 1221, "token_acc": 0.2510642049318657 }, { "epoch": 0.7165054236294341, "grad_norm": 0.7578662696495625, "learning_rate": 0.000358147713950762, "loss": 3.4700088500976562, "step": 1222, "token_acc": 0.25496737451534607 }, { "epoch": 0.7170917619466433, "grad_norm": 1.0228189917114539, "learning_rate": 0.00035844079718640095, "loss": 3.5253689289093018, "step": 1223, "token_acc": 0.2494752038706153 }, { "epoch": 0.7176781002638523, "grad_norm": 1.2213198960682865, "learning_rate": 0.0003587338804220399, "loss": 3.456556797027588, "step": 1224, "token_acc": 0.25771414530093273 }, { "epoch": 0.7182644385810613, "grad_norm": 1.0631151053663197, "learning_rate": 0.0003590269636576788, "loss": 3.5372540950775146, "step": 1225, "token_acc": 0.2492148667712632 }, { "epoch": 0.7188507768982703, "grad_norm": 1.2113437554086248, "learning_rate": 0.00035932004689331773, "loss": 3.5380754470825195, "step": 1226, "token_acc": 0.24649231626117116 }, { "epoch": 0.7194371152154794, "grad_norm": 0.9555943062405109, "learning_rate": 0.00035961313012895666, "loss": 3.5020651817321777, "step": 1227, "token_acc": 0.25017010831489445 }, { "epoch": 0.7200234535326884, "grad_norm": 1.125559620095925, "learning_rate": 0.0003599062133645956, "loss": 3.5488991737365723, "step": 1228, "token_acc": 0.24545114072594487 }, { "epoch": 0.7206097918498974, "grad_norm": 1.0992764462400064, "learning_rate": 0.00036019929660023446, "loss": 3.450565814971924, "step": 1229, "token_acc": 0.25807414076485796 }, { "epoch": 0.7211961301671064, "grad_norm": 0.9545388846500981, "learning_rate": 0.0003604923798358734, "loss": 3.5097219944000244, "step": 1230, "token_acc": 0.25133427522476376 }, { "epoch": 0.7217824684843155, "grad_norm": 1.0387227315444867, "learning_rate": 0.0003607854630715123, "loss": 3.524634838104248, "step": 1231, "token_acc": 0.24821223670190967 }, { "epoch": 0.7223688068015245, "grad_norm": 0.8514007532483667, "learning_rate": 0.00036107854630715124, "loss": 3.5065577030181885, "step": 1232, "token_acc": 0.2523945202022857 }, { "epoch": 0.7229551451187335, "grad_norm": 0.9994290359397887, "learning_rate": 0.00036137162954279017, "loss": 3.536440849304199, "step": 1233, "token_acc": 0.24660880829015544 }, { "epoch": 0.7235414834359425, "grad_norm": 1.178655552475769, "learning_rate": 0.0003616647127784291, "loss": 3.4668939113616943, "step": 1234, "token_acc": 0.25552713294940016 }, { "epoch": 0.7241278217531516, "grad_norm": 0.9964407980041861, "learning_rate": 0.000361957796014068, "loss": 3.471785306930542, "step": 1235, "token_acc": 0.25725433020990324 }, { "epoch": 0.7247141600703606, "grad_norm": 1.4628672089350807, "learning_rate": 0.0003622508792497069, "loss": 3.534055709838867, "step": 1236, "token_acc": 0.24756990398580359 }, { "epoch": 0.7253004983875696, "grad_norm": 0.936330363919642, "learning_rate": 0.0003625439624853458, "loss": 3.4989395141601562, "step": 1237, "token_acc": 0.25195990974445087 }, { "epoch": 0.7258868367047786, "grad_norm": 0.9241122426506498, "learning_rate": 0.00036283704572098475, "loss": 3.4999499320983887, "step": 1238, "token_acc": 0.2507256837926323 }, { "epoch": 0.7264731750219877, "grad_norm": 0.9099503060430971, "learning_rate": 0.0003631301289566237, "loss": 3.531130313873291, "step": 1239, "token_acc": 0.24756478876550658 }, { "epoch": 0.7270595133391967, "grad_norm": 0.8444130566891199, "learning_rate": 0.0003634232121922626, "loss": 3.4660487174987793, "step": 1240, "token_acc": 0.2547390233330162 }, { "epoch": 0.7276458516564057, "grad_norm": 0.9945255179413414, "learning_rate": 0.00036371629542790153, "loss": 3.488422155380249, "step": 1241, "token_acc": 0.2546753879862216 }, { "epoch": 0.7282321899736148, "grad_norm": 1.0246236548373886, "learning_rate": 0.00036400937866354046, "loss": 3.466963291168213, "step": 1242, "token_acc": 0.2553291695433211 }, { "epoch": 0.7288185282908238, "grad_norm": 1.1706729509074931, "learning_rate": 0.0003643024618991794, "loss": 3.558746814727783, "step": 1243, "token_acc": 0.24539399728732697 }, { "epoch": 0.7294048666080328, "grad_norm": 1.09724316464441, "learning_rate": 0.0003645955451348183, "loss": 3.4664244651794434, "step": 1244, "token_acc": 0.2568734561082883 }, { "epoch": 0.7299912049252418, "grad_norm": 0.7928447267564022, "learning_rate": 0.00036488862837045724, "loss": 3.554063320159912, "step": 1245, "token_acc": 0.2435292165098495 }, { "epoch": 0.7305775432424509, "grad_norm": 0.9070691599138223, "learning_rate": 0.00036518171160609616, "loss": 3.5058159828186035, "step": 1246, "token_acc": 0.2518943413847264 }, { "epoch": 0.73116388155966, "grad_norm": 1.1480665913191976, "learning_rate": 0.0003654747948417351, "loss": 3.5055370330810547, "step": 1247, "token_acc": 0.2504925904240956 }, { "epoch": 0.731750219876869, "grad_norm": 1.0917878330671396, "learning_rate": 0.000365767878077374, "loss": 3.4870190620422363, "step": 1248, "token_acc": 0.25269090964089275 }, { "epoch": 0.732336558194078, "grad_norm": 1.2122370406200964, "learning_rate": 0.00036606096131301295, "loss": 3.5182714462280273, "step": 1249, "token_acc": 0.2504461826315001 }, { "epoch": 0.7329228965112871, "grad_norm": 1.0992772061454728, "learning_rate": 0.0003663540445486518, "loss": 3.5024056434631348, "step": 1250, "token_acc": 0.250882372065804 }, { "epoch": 0.7335092348284961, "grad_norm": 1.0945904717565895, "learning_rate": 0.00036664712778429075, "loss": 3.4923954010009766, "step": 1251, "token_acc": 0.2549660129076592 }, { "epoch": 0.7340955731457051, "grad_norm": 1.217726506041747, "learning_rate": 0.0003669402110199297, "loss": 3.4819135665893555, "step": 1252, "token_acc": 0.2544570544435777 }, { "epoch": 0.7346819114629141, "grad_norm": 0.8045037059017836, "learning_rate": 0.00036723329425556855, "loss": 3.496025562286377, "step": 1253, "token_acc": 0.2513662789162702 }, { "epoch": 0.7352682497801232, "grad_norm": 1.0533464337294605, "learning_rate": 0.0003675263774912075, "loss": 3.570786952972412, "step": 1254, "token_acc": 0.24342378010861904 }, { "epoch": 0.7358545880973322, "grad_norm": 1.098386702161414, "learning_rate": 0.0003678194607268464, "loss": 3.5218682289123535, "step": 1255, "token_acc": 0.24898910165003055 }, { "epoch": 0.7364409264145412, "grad_norm": 1.096858754177103, "learning_rate": 0.00036811254396248533, "loss": 3.474954128265381, "step": 1256, "token_acc": 0.2530937376510472 }, { "epoch": 0.7370272647317502, "grad_norm": 1.0361171354256042, "learning_rate": 0.00036840562719812425, "loss": 3.563544750213623, "step": 1257, "token_acc": 0.24390654370148665 }, { "epoch": 0.7376136030489593, "grad_norm": 0.964936736934839, "learning_rate": 0.0003686987104337632, "loss": 3.524034261703491, "step": 1258, "token_acc": 0.24788131229920093 }, { "epoch": 0.7381999413661683, "grad_norm": 1.0730831406859893, "learning_rate": 0.0003689917936694021, "loss": 3.473174571990967, "step": 1259, "token_acc": 0.25377940307403235 }, { "epoch": 0.7387862796833773, "grad_norm": 1.2270883194190767, "learning_rate": 0.00036928487690504104, "loss": 3.4905569553375244, "step": 1260, "token_acc": 0.25130399079476273 }, { "epoch": 0.7393726180005863, "grad_norm": 0.9903905620973006, "learning_rate": 0.00036957796014067996, "loss": 3.526292562484741, "step": 1261, "token_acc": 0.2472362765060365 }, { "epoch": 0.7399589563177954, "grad_norm": 1.3057551281395756, "learning_rate": 0.0003698710433763189, "loss": 3.4739603996276855, "step": 1262, "token_acc": 0.2543431986943893 }, { "epoch": 0.7405452946350044, "grad_norm": 1.0857925859584348, "learning_rate": 0.0003701641266119578, "loss": 3.4918572902679443, "step": 1263, "token_acc": 0.2496519247619918 }, { "epoch": 0.7411316329522134, "grad_norm": 1.1067318405815867, "learning_rate": 0.00037045720984759674, "loss": 3.4486000537872314, "step": 1264, "token_acc": 0.2564126836374346 }, { "epoch": 0.7417179712694224, "grad_norm": 1.0311115412015102, "learning_rate": 0.00037075029308323567, "loss": 3.5447144508361816, "step": 1265, "token_acc": 0.24598300512124774 }, { "epoch": 0.7423043095866315, "grad_norm": 0.9604109003891884, "learning_rate": 0.0003710433763188746, "loss": 3.4529128074645996, "step": 1266, "token_acc": 0.25577408900847837 }, { "epoch": 0.7428906479038405, "grad_norm": 1.1410367723596473, "learning_rate": 0.0003713364595545135, "loss": 3.443695545196533, "step": 1267, "token_acc": 0.2578074183007376 }, { "epoch": 0.7434769862210495, "grad_norm": 1.0710813989193992, "learning_rate": 0.00037162954279015245, "loss": 3.470284938812256, "step": 1268, "token_acc": 0.25557474959946813 }, { "epoch": 0.7440633245382586, "grad_norm": 1.003934501544495, "learning_rate": 0.0003719226260257914, "loss": 3.515272617340088, "step": 1269, "token_acc": 0.25064775378091797 }, { "epoch": 0.7446496628554676, "grad_norm": 1.2399214122389657, "learning_rate": 0.00037221570926143025, "loss": 3.5106518268585205, "step": 1270, "token_acc": 0.2490702668896131 }, { "epoch": 0.7452360011726766, "grad_norm": 1.0755101054033664, "learning_rate": 0.0003725087924970691, "loss": 3.461902379989624, "step": 1271, "token_acc": 0.25596038619869915 }, { "epoch": 0.7458223394898856, "grad_norm": 1.152119971935176, "learning_rate": 0.00037280187573270805, "loss": 3.4664485454559326, "step": 1272, "token_acc": 0.2557741616105202 }, { "epoch": 0.7464086778070947, "grad_norm": 1.0472432895044648, "learning_rate": 0.000373094958968347, "loss": 3.483273506164551, "step": 1273, "token_acc": 0.25200952204291105 }, { "epoch": 0.7469950161243037, "grad_norm": 0.8790006673685149, "learning_rate": 0.0003733880422039859, "loss": 3.4371213912963867, "step": 1274, "token_acc": 0.2581320134323977 }, { "epoch": 0.7475813544415127, "grad_norm": 0.886736452804881, "learning_rate": 0.00037368112543962484, "loss": 3.4758927822113037, "step": 1275, "token_acc": 0.2527080694618192 }, { "epoch": 0.7481676927587217, "grad_norm": 1.1449442477726546, "learning_rate": 0.00037397420867526376, "loss": 3.5382113456726074, "step": 1276, "token_acc": 0.24422432713411113 }, { "epoch": 0.7487540310759309, "grad_norm": 1.097091454594263, "learning_rate": 0.0003742672919109027, "loss": 3.4651803970336914, "step": 1277, "token_acc": 0.25596913992811243 }, { "epoch": 0.7493403693931399, "grad_norm": 1.138410631270296, "learning_rate": 0.0003745603751465416, "loss": 3.5188822746276855, "step": 1278, "token_acc": 0.24840670304609871 }, { "epoch": 0.7499267077103489, "grad_norm": 0.9394261071605571, "learning_rate": 0.00037485345838218054, "loss": 3.5016980171203613, "step": 1279, "token_acc": 0.2501159551792046 }, { "epoch": 0.7505130460275579, "grad_norm": 1.182035471455298, "learning_rate": 0.00037514654161781947, "loss": 3.487163543701172, "step": 1280, "token_acc": 0.25179838940700894 }, { "epoch": 0.751099384344767, "grad_norm": 1.01190698276881, "learning_rate": 0.0003754396248534584, "loss": 3.4713244438171387, "step": 1281, "token_acc": 0.2530424675354666 }, { "epoch": 0.751685722661976, "grad_norm": 1.1464516562070586, "learning_rate": 0.0003757327080890973, "loss": 3.442625045776367, "step": 1282, "token_acc": 0.2588711967438974 }, { "epoch": 0.752272060979185, "grad_norm": 1.0431492011488195, "learning_rate": 0.00037602579132473625, "loss": 3.4936037063598633, "step": 1283, "token_acc": 0.25121045106907675 }, { "epoch": 0.752858399296394, "grad_norm": 1.050580213235949, "learning_rate": 0.0003763188745603752, "loss": 3.534257411956787, "step": 1284, "token_acc": 0.24597087918194954 }, { "epoch": 0.7534447376136031, "grad_norm": 1.0272016491610667, "learning_rate": 0.0003766119577960141, "loss": 3.5080385208129883, "step": 1285, "token_acc": 0.2503993049232866 }, { "epoch": 0.7540310759308121, "grad_norm": 0.8400726262906948, "learning_rate": 0.00037690504103165303, "loss": 3.4579713344573975, "step": 1286, "token_acc": 0.25657550247714184 }, { "epoch": 0.7546174142480211, "grad_norm": 0.9676103867597385, "learning_rate": 0.00037719812426729196, "loss": 3.551908493041992, "step": 1287, "token_acc": 0.2439862816612781 }, { "epoch": 0.7552037525652301, "grad_norm": 1.030458951125422, "learning_rate": 0.0003774912075029309, "loss": 3.482819080352783, "step": 1288, "token_acc": 0.25361263141315904 }, { "epoch": 0.7557900908824392, "grad_norm": 1.0138518380325043, "learning_rate": 0.00037778429073856976, "loss": 3.5801682472229004, "step": 1289, "token_acc": 0.24201541083316241 }, { "epoch": 0.7563764291996482, "grad_norm": 1.0795402076389047, "learning_rate": 0.00037807737397420863, "loss": 3.4637675285339355, "step": 1290, "token_acc": 0.25344788268771257 }, { "epoch": 0.7569627675168572, "grad_norm": 0.9483087373746847, "learning_rate": 0.00037837045720984756, "loss": 3.4972915649414062, "step": 1291, "token_acc": 0.2523110899921164 }, { "epoch": 0.7575491058340662, "grad_norm": 0.9929928646355264, "learning_rate": 0.0003786635404454865, "loss": 3.499781608581543, "step": 1292, "token_acc": 0.24930556275328822 }, { "epoch": 0.7581354441512753, "grad_norm": 0.8805073109554905, "learning_rate": 0.0003789566236811254, "loss": 3.5048651695251465, "step": 1293, "token_acc": 0.2518881285489272 }, { "epoch": 0.7587217824684843, "grad_norm": 0.8408700573444599, "learning_rate": 0.00037924970691676434, "loss": 3.495518207550049, "step": 1294, "token_acc": 0.24965957039980777 }, { "epoch": 0.7593081207856933, "grad_norm": 0.9853569578582353, "learning_rate": 0.00037954279015240327, "loss": 3.495433807373047, "step": 1295, "token_acc": 0.2517029212946871 }, { "epoch": 0.7598944591029023, "grad_norm": 1.081447091992857, "learning_rate": 0.0003798358733880422, "loss": 3.449873924255371, "step": 1296, "token_acc": 0.25396492790444886 }, { "epoch": 0.7604807974201114, "grad_norm": 1.002555529161158, "learning_rate": 0.0003801289566236811, "loss": 3.4970436096191406, "step": 1297, "token_acc": 0.2514988088069744 }, { "epoch": 0.7610671357373204, "grad_norm": 0.9577475569141637, "learning_rate": 0.00038042203985932005, "loss": 3.5140223503112793, "step": 1298, "token_acc": 0.24797525601705048 }, { "epoch": 0.7616534740545294, "grad_norm": 1.1279460541170863, "learning_rate": 0.000380715123094959, "loss": 3.473954677581787, "step": 1299, "token_acc": 0.2529052675501462 }, { "epoch": 0.7622398123717385, "grad_norm": 0.9474834382608195, "learning_rate": 0.0003810082063305979, "loss": 3.4546966552734375, "step": 1300, "token_acc": 0.2558521285847343 }, { "epoch": 0.7628261506889475, "grad_norm": 1.1003938100742867, "learning_rate": 0.00038130128956623683, "loss": 3.4985344409942627, "step": 1301, "token_acc": 0.25046651724592556 }, { "epoch": 0.7634124890061565, "grad_norm": 1.1268289807504321, "learning_rate": 0.00038159437280187576, "loss": 3.464116096496582, "step": 1302, "token_acc": 0.25468803448479843 }, { "epoch": 0.7639988273233655, "grad_norm": 1.0437345553194601, "learning_rate": 0.0003818874560375147, "loss": 3.4308760166168213, "step": 1303, "token_acc": 0.2579341929083774 }, { "epoch": 0.7645851656405747, "grad_norm": 1.0168721838168475, "learning_rate": 0.0003821805392731536, "loss": 3.499641180038452, "step": 1304, "token_acc": 0.25185373259615834 }, { "epoch": 0.7651715039577837, "grad_norm": 0.9809467629589137, "learning_rate": 0.00038247362250879254, "loss": 3.53367280960083, "step": 1305, "token_acc": 0.24565776529901218 }, { "epoch": 0.7657578422749927, "grad_norm": 0.9777913711597344, "learning_rate": 0.00038276670574443147, "loss": 3.5120949745178223, "step": 1306, "token_acc": 0.24813353581449063 }, { "epoch": 0.7663441805922017, "grad_norm": 0.9721165069007411, "learning_rate": 0.00038305978898007034, "loss": 3.50046706199646, "step": 1307, "token_acc": 0.2501995672406845 }, { "epoch": 0.7669305189094108, "grad_norm": 1.1242031761992586, "learning_rate": 0.00038335287221570927, "loss": 3.432429790496826, "step": 1308, "token_acc": 0.2584317354360933 }, { "epoch": 0.7675168572266198, "grad_norm": 0.9033200657614953, "learning_rate": 0.0003836459554513482, "loss": 3.450303554534912, "step": 1309, "token_acc": 0.2545959443750807 }, { "epoch": 0.7681031955438288, "grad_norm": 0.8387323915753694, "learning_rate": 0.00038393903868698707, "loss": 3.427049160003662, "step": 1310, "token_acc": 0.26161080958375466 }, { "epoch": 0.7686895338610378, "grad_norm": 0.9207370783235117, "learning_rate": 0.000384232121922626, "loss": 3.512810230255127, "step": 1311, "token_acc": 0.2500586039871782 }, { "epoch": 0.7692758721782469, "grad_norm": 0.9286609876884838, "learning_rate": 0.0003845252051582649, "loss": 3.4888782501220703, "step": 1312, "token_acc": 0.24952270965661105 }, { "epoch": 0.7698622104954559, "grad_norm": 0.8513281468983792, "learning_rate": 0.00038481828839390385, "loss": 3.4999337196350098, "step": 1313, "token_acc": 0.2507118114171627 }, { "epoch": 0.7704485488126649, "grad_norm": 1.0558274250444264, "learning_rate": 0.0003851113716295428, "loss": 3.49381160736084, "step": 1314, "token_acc": 0.2524881784594836 }, { "epoch": 0.7710348871298739, "grad_norm": 1.2848425923343314, "learning_rate": 0.0003854044548651817, "loss": 3.476231575012207, "step": 1315, "token_acc": 0.2532608206872654 }, { "epoch": 0.771621225447083, "grad_norm": 0.9618716502650655, "learning_rate": 0.00038569753810082063, "loss": 3.4800541400909424, "step": 1316, "token_acc": 0.25382663269231265 }, { "epoch": 0.772207563764292, "grad_norm": 1.3026909008185643, "learning_rate": 0.00038599062133645956, "loss": 3.4334373474121094, "step": 1317, "token_acc": 0.25964722030133486 }, { "epoch": 0.772793902081501, "grad_norm": 0.8279225536912802, "learning_rate": 0.0003862837045720985, "loss": 3.4873340129852295, "step": 1318, "token_acc": 0.2507323710680078 }, { "epoch": 0.77338024039871, "grad_norm": 1.1738047788359856, "learning_rate": 0.0003865767878077374, "loss": 3.5061237812042236, "step": 1319, "token_acc": 0.24763991996363882 }, { "epoch": 0.7739665787159191, "grad_norm": 0.9826258911228014, "learning_rate": 0.00038686987104337634, "loss": 3.472834348678589, "step": 1320, "token_acc": 0.2559736420741471 }, { "epoch": 0.7745529170331281, "grad_norm": 0.9680921098301252, "learning_rate": 0.00038716295427901527, "loss": 3.4527368545532227, "step": 1321, "token_acc": 0.25550909827847346 }, { "epoch": 0.7751392553503371, "grad_norm": 0.9599234070876219, "learning_rate": 0.0003874560375146542, "loss": 3.490530014038086, "step": 1322, "token_acc": 0.25294146038011067 }, { "epoch": 0.7757255936675461, "grad_norm": 0.8859629164520336, "learning_rate": 0.0003877491207502931, "loss": 3.4869954586029053, "step": 1323, "token_acc": 0.25216506749512 }, { "epoch": 0.7763119319847552, "grad_norm": 0.8370047156045757, "learning_rate": 0.000388042203985932, "loss": 3.4701340198516846, "step": 1324, "token_acc": 0.253744326179443 }, { "epoch": 0.7768982703019642, "grad_norm": 0.809264841428513, "learning_rate": 0.0003883352872215709, "loss": 3.455357074737549, "step": 1325, "token_acc": 0.2543043215956026 }, { "epoch": 0.7774846086191732, "grad_norm": 0.8866636587753497, "learning_rate": 0.00038862837045720985, "loss": 3.4878439903259277, "step": 1326, "token_acc": 0.25365168614321604 }, { "epoch": 0.7780709469363823, "grad_norm": 1.3516164428753499, "learning_rate": 0.0003889214536928488, "loss": 3.512117385864258, "step": 1327, "token_acc": 0.24848324910604722 }, { "epoch": 0.7786572852535913, "grad_norm": 1.0427806512913531, "learning_rate": 0.0003892145369284877, "loss": 3.4557085037231445, "step": 1328, "token_acc": 0.255793352978092 }, { "epoch": 0.7792436235708003, "grad_norm": 0.7482853663299864, "learning_rate": 0.00038950762016412663, "loss": 3.4501166343688965, "step": 1329, "token_acc": 0.255271152831596 }, { "epoch": 0.7798299618880093, "grad_norm": 0.8558108600578263, "learning_rate": 0.00038980070339976556, "loss": 3.510521411895752, "step": 1330, "token_acc": 0.2493889149300728 }, { "epoch": 0.7804163002052185, "grad_norm": 0.8919451203223152, "learning_rate": 0.00039009378663540443, "loss": 3.446441888809204, "step": 1331, "token_acc": 0.2563788324277697 }, { "epoch": 0.7810026385224275, "grad_norm": 0.9448816313122309, "learning_rate": 0.00039038686987104336, "loss": 3.485175132751465, "step": 1332, "token_acc": 0.25235118904892073 }, { "epoch": 0.7815889768396365, "grad_norm": 1.172294510352743, "learning_rate": 0.0003906799531066823, "loss": 3.505655288696289, "step": 1333, "token_acc": 0.24697578067057108 }, { "epoch": 0.7821753151568455, "grad_norm": 1.1952119988026573, "learning_rate": 0.0003909730363423212, "loss": 3.543708324432373, "step": 1334, "token_acc": 0.24501603072534195 }, { "epoch": 0.7827616534740546, "grad_norm": 0.8748477516495661, "learning_rate": 0.00039126611957796014, "loss": 3.4726717472076416, "step": 1335, "token_acc": 0.2526044792701584 }, { "epoch": 0.7833479917912636, "grad_norm": 0.7946392964916559, "learning_rate": 0.00039155920281359907, "loss": 3.4403886795043945, "step": 1336, "token_acc": 0.2559458652602249 }, { "epoch": 0.7839343301084726, "grad_norm": 1.103170369075015, "learning_rate": 0.000391852286049238, "loss": 3.428464412689209, "step": 1337, "token_acc": 0.2595619474794555 }, { "epoch": 0.7845206684256816, "grad_norm": 1.194272859704073, "learning_rate": 0.0003921453692848769, "loss": 3.503781318664551, "step": 1338, "token_acc": 0.25076877320261093 }, { "epoch": 0.7851070067428907, "grad_norm": 0.8001982418328981, "learning_rate": 0.00039243845252051585, "loss": 3.4887337684631348, "step": 1339, "token_acc": 0.25095999273663355 }, { "epoch": 0.7856933450600997, "grad_norm": 1.0203876461531287, "learning_rate": 0.0003927315357561548, "loss": 3.4944047927856445, "step": 1340, "token_acc": 0.25051094130358376 }, { "epoch": 0.7862796833773087, "grad_norm": 1.1035535984900533, "learning_rate": 0.0003930246189917937, "loss": 3.5003156661987305, "step": 1341, "token_acc": 0.2486349386223504 }, { "epoch": 0.7868660216945177, "grad_norm": 0.8397372201335467, "learning_rate": 0.0003933177022274326, "loss": 3.48427152633667, "step": 1342, "token_acc": 0.2521773918184088 }, { "epoch": 0.7874523600117268, "grad_norm": 1.0557629358776, "learning_rate": 0.0003936107854630715, "loss": 3.4606246948242188, "step": 1343, "token_acc": 0.2532410076816997 }, { "epoch": 0.7880386983289358, "grad_norm": 1.3835518556883315, "learning_rate": 0.00039390386869871043, "loss": 3.527385711669922, "step": 1344, "token_acc": 0.24679863927573348 }, { "epoch": 0.7886250366461448, "grad_norm": 0.7060137875705657, "learning_rate": 0.00039419695193434936, "loss": 3.4654507637023926, "step": 1345, "token_acc": 0.2550812122556437 }, { "epoch": 0.7892113749633538, "grad_norm": 0.9843111851801306, "learning_rate": 0.0003944900351699883, "loss": 3.464249610900879, "step": 1346, "token_acc": 0.25332529093045747 }, { "epoch": 0.7897977132805629, "grad_norm": 1.1098217910051005, "learning_rate": 0.0003947831184056272, "loss": 3.478701114654541, "step": 1347, "token_acc": 0.2545163459732028 }, { "epoch": 0.7903840515977719, "grad_norm": 0.8101602358852716, "learning_rate": 0.00039507620164126614, "loss": 3.5002989768981934, "step": 1348, "token_acc": 0.24926323643049375 }, { "epoch": 0.7909703899149809, "grad_norm": 0.9472157344819309, "learning_rate": 0.00039536928487690507, "loss": 3.493964672088623, "step": 1349, "token_acc": 0.25211640488473575 }, { "epoch": 0.7915567282321899, "grad_norm": 0.8253257653614141, "learning_rate": 0.000395662368112544, "loss": 3.4287829399108887, "step": 1350, "token_acc": 0.2595739271649969 }, { "epoch": 0.792143066549399, "grad_norm": 0.9959955467271533, "learning_rate": 0.00039595545134818287, "loss": 3.513934850692749, "step": 1351, "token_acc": 0.2474573231204237 }, { "epoch": 0.792729404866608, "grad_norm": 1.3208406456319923, "learning_rate": 0.0003962485345838218, "loss": 3.4896159172058105, "step": 1352, "token_acc": 0.25308993726811474 }, { "epoch": 0.793315743183817, "grad_norm": 0.8886095797458569, "learning_rate": 0.0003965416178194607, "loss": 3.5137810707092285, "step": 1353, "token_acc": 0.2456682044389553 }, { "epoch": 0.7939020815010261, "grad_norm": 0.9601854114348637, "learning_rate": 0.00039683470105509965, "loss": 3.541001081466675, "step": 1354, "token_acc": 0.2445393757304402 }, { "epoch": 0.7944884198182351, "grad_norm": 1.1096799473887897, "learning_rate": 0.0003971277842907386, "loss": 3.463557720184326, "step": 1355, "token_acc": 0.2540497488688722 }, { "epoch": 0.7950747581354441, "grad_norm": 1.1615019689632204, "learning_rate": 0.0003974208675263775, "loss": 3.458465099334717, "step": 1356, "token_acc": 0.2565389893434878 }, { "epoch": 0.7956610964526531, "grad_norm": 0.8567150855293202, "learning_rate": 0.00039771395076201643, "loss": 3.3802685737609863, "step": 1357, "token_acc": 0.26693192402170957 }, { "epoch": 0.7962474347698623, "grad_norm": 0.8819941643727316, "learning_rate": 0.00039800703399765536, "loss": 3.535536050796509, "step": 1358, "token_acc": 0.24338467976044145 }, { "epoch": 0.7968337730870713, "grad_norm": 0.8674153197792284, "learning_rate": 0.0003983001172332943, "loss": 3.4684572219848633, "step": 1359, "token_acc": 0.25408120339091084 }, { "epoch": 0.7974201114042803, "grad_norm": 0.9884503667344221, "learning_rate": 0.00039859320046893316, "loss": 3.4723939895629883, "step": 1360, "token_acc": 0.2526795454372077 }, { "epoch": 0.7980064497214893, "grad_norm": 1.2055938752026887, "learning_rate": 0.0003988862837045721, "loss": 3.4520530700683594, "step": 1361, "token_acc": 0.25510560691292317 }, { "epoch": 0.7985927880386984, "grad_norm": 0.8980384725050254, "learning_rate": 0.000399179366940211, "loss": 3.3998494148254395, "step": 1362, "token_acc": 0.2651210777998559 }, { "epoch": 0.7991791263559074, "grad_norm": 0.9689317948322111, "learning_rate": 0.00039947245017584994, "loss": 3.4836082458496094, "step": 1363, "token_acc": 0.25241331719804044 }, { "epoch": 0.7997654646731164, "grad_norm": 0.913257659928347, "learning_rate": 0.00039976553341148887, "loss": 3.4455323219299316, "step": 1364, "token_acc": 0.2559823218270327 }, { "epoch": 0.8003518029903254, "grad_norm": 0.8098367705266915, "learning_rate": 0.0004000586166471278, "loss": 3.461963176727295, "step": 1365, "token_acc": 0.2544156604817866 }, { "epoch": 0.8009381413075345, "grad_norm": 0.842508688137061, "learning_rate": 0.0004003516998827667, "loss": 3.472806930541992, "step": 1366, "token_acc": 0.2523059396304877 }, { "epoch": 0.8015244796247435, "grad_norm": 0.9386681215326841, "learning_rate": 0.00040064478311840565, "loss": 3.4677324295043945, "step": 1367, "token_acc": 0.25425463395509756 }, { "epoch": 0.8021108179419525, "grad_norm": 1.1981374414805317, "learning_rate": 0.0004009378663540446, "loss": 3.4789719581604004, "step": 1368, "token_acc": 0.2501066035961829 }, { "epoch": 0.8026971562591615, "grad_norm": 1.0594723685365937, "learning_rate": 0.0004012309495896835, "loss": 3.4348092079162598, "step": 1369, "token_acc": 0.2579946893043786 }, { "epoch": 0.8032834945763706, "grad_norm": 1.0776821177607765, "learning_rate": 0.00040152403282532243, "loss": 3.445159912109375, "step": 1370, "token_acc": 0.2551865284974093 }, { "epoch": 0.8038698328935796, "grad_norm": 0.9232657095786017, "learning_rate": 0.0004018171160609613, "loss": 3.475691318511963, "step": 1371, "token_acc": 0.25365587060339106 }, { "epoch": 0.8044561712107886, "grad_norm": 0.7796945633236816, "learning_rate": 0.00040211019929660023, "loss": 3.472425937652588, "step": 1372, "token_acc": 0.25509974501274935 }, { "epoch": 0.8050425095279976, "grad_norm": 0.8640813817278077, "learning_rate": 0.00040240328253223916, "loss": 3.4655838012695312, "step": 1373, "token_acc": 0.2542636992101896 }, { "epoch": 0.8056288478452067, "grad_norm": 0.8537053087070122, "learning_rate": 0.0004026963657678781, "loss": 3.4734272956848145, "step": 1374, "token_acc": 0.251195033521703 }, { "epoch": 0.8062151861624157, "grad_norm": 1.0435184273580218, "learning_rate": 0.000402989449003517, "loss": 3.4575772285461426, "step": 1375, "token_acc": 0.25561145250419226 }, { "epoch": 0.8068015244796247, "grad_norm": 1.336303247311034, "learning_rate": 0.00040328253223915594, "loss": 3.4576003551483154, "step": 1376, "token_acc": 0.253703814041205 }, { "epoch": 0.8073878627968337, "grad_norm": 0.8379482737762568, "learning_rate": 0.00040357561547479487, "loss": 3.4817376136779785, "step": 1377, "token_acc": 0.2514983959729793 }, { "epoch": 0.8079742011140428, "grad_norm": 1.0769148677468892, "learning_rate": 0.00040386869871043374, "loss": 3.484868288040161, "step": 1378, "token_acc": 0.2522101894004133 }, { "epoch": 0.8085605394312518, "grad_norm": 1.1363569453563487, "learning_rate": 0.00040416178194607267, "loss": 3.4506750106811523, "step": 1379, "token_acc": 0.2532601789190225 }, { "epoch": 0.8091468777484608, "grad_norm": 1.2384835183434617, "learning_rate": 0.0004044548651817116, "loss": 3.5003504753112793, "step": 1380, "token_acc": 0.2497182533496546 }, { "epoch": 0.8097332160656698, "grad_norm": 0.8918144598700374, "learning_rate": 0.0004047479484173505, "loss": 3.481995105743408, "step": 1381, "token_acc": 0.25035134403870957 }, { "epoch": 0.810319554382879, "grad_norm": 0.8508959591167338, "learning_rate": 0.00040504103165298945, "loss": 3.4822134971618652, "step": 1382, "token_acc": 0.25215677456051006 }, { "epoch": 0.810905892700088, "grad_norm": 0.9730378953300497, "learning_rate": 0.0004053341148886284, "loss": 3.4935007095336914, "step": 1383, "token_acc": 0.2493882045317992 }, { "epoch": 0.811492231017297, "grad_norm": 1.205260522557191, "learning_rate": 0.0004056271981242673, "loss": 3.460737705230713, "step": 1384, "token_acc": 0.2547436127439082 }, { "epoch": 0.8120785693345061, "grad_norm": 1.0319250304632726, "learning_rate": 0.00040592028135990623, "loss": 3.44663667678833, "step": 1385, "token_acc": 0.25536034870685265 }, { "epoch": 0.8126649076517151, "grad_norm": 1.178763445900931, "learning_rate": 0.00040621336459554516, "loss": 3.4629967212677, "step": 1386, "token_acc": 0.2543632846032621 }, { "epoch": 0.8132512459689241, "grad_norm": 0.777232254189139, "learning_rate": 0.0004065064478311841, "loss": 3.436275005340576, "step": 1387, "token_acc": 0.25696680590106463 }, { "epoch": 0.8138375842861331, "grad_norm": 0.8379848816516873, "learning_rate": 0.000406799531066823, "loss": 3.513934850692749, "step": 1388, "token_acc": 0.2472319473971217 }, { "epoch": 0.8144239226033422, "grad_norm": 0.8496106230172547, "learning_rate": 0.00040709261430246194, "loss": 3.455428123474121, "step": 1389, "token_acc": 0.25344041250539323 }, { "epoch": 0.8150102609205512, "grad_norm": 0.7953816851023453, "learning_rate": 0.00040738569753810086, "loss": 3.4240341186523438, "step": 1390, "token_acc": 0.2573587462056567 }, { "epoch": 0.8155965992377602, "grad_norm": 0.7818021752142441, "learning_rate": 0.0004076787807737398, "loss": 3.4149389266967773, "step": 1391, "token_acc": 0.2580034849312809 }, { "epoch": 0.8161829375549692, "grad_norm": 0.9059756382908705, "learning_rate": 0.00040797186400937866, "loss": 3.4758903980255127, "step": 1392, "token_acc": 0.2547208728458244 }, { "epoch": 0.8167692758721783, "grad_norm": 1.0849325275453237, "learning_rate": 0.0004082649472450176, "loss": 3.4370362758636475, "step": 1393, "token_acc": 0.25648111728076584 }, { "epoch": 0.8173556141893873, "grad_norm": 0.9861873957805206, "learning_rate": 0.0004085580304806565, "loss": 3.4439001083374023, "step": 1394, "token_acc": 0.25485562267322204 }, { "epoch": 0.8179419525065963, "grad_norm": 0.9493362416674663, "learning_rate": 0.00040885111371629545, "loss": 3.4527602195739746, "step": 1395, "token_acc": 0.2546407189513649 }, { "epoch": 0.8185282908238053, "grad_norm": 0.890556405471747, "learning_rate": 0.0004091441969519343, "loss": 3.433462142944336, "step": 1396, "token_acc": 0.2568332558827257 }, { "epoch": 0.8191146291410144, "grad_norm": 0.8517890219455814, "learning_rate": 0.00040943728018757325, "loss": 3.479552745819092, "step": 1397, "token_acc": 0.25350896654219446 }, { "epoch": 0.8197009674582234, "grad_norm": 0.9247260836531138, "learning_rate": 0.0004097303634232122, "loss": 3.5245985984802246, "step": 1398, "token_acc": 0.24757986321755046 }, { "epoch": 0.8202873057754324, "grad_norm": 1.0050053466281912, "learning_rate": 0.0004100234466588511, "loss": 3.5012197494506836, "step": 1399, "token_acc": 0.24878283652233152 }, { "epoch": 0.8208736440926414, "grad_norm": 0.8551374936045113, "learning_rate": 0.00041031652989449003, "loss": 3.4459376335144043, "step": 1400, "token_acc": 0.25510156577334303 }, { "epoch": 0.8214599824098505, "grad_norm": 0.7644142277675621, "learning_rate": 0.00041060961313012896, "loss": 3.483003854751587, "step": 1401, "token_acc": 0.25132728991888276 }, { "epoch": 0.8220463207270595, "grad_norm": 0.9172063868011725, "learning_rate": 0.0004109026963657679, "loss": 3.4474453926086426, "step": 1402, "token_acc": 0.25700021818861885 }, { "epoch": 0.8226326590442685, "grad_norm": 1.1086408235294434, "learning_rate": 0.0004111957796014068, "loss": 3.4551424980163574, "step": 1403, "token_acc": 0.2570417779255457 }, { "epoch": 0.8232189973614775, "grad_norm": 1.0763783036603938, "learning_rate": 0.00041148886283704574, "loss": 3.4139785766601562, "step": 1404, "token_acc": 0.25992755109015103 }, { "epoch": 0.8238053356786866, "grad_norm": 0.8374865732826589, "learning_rate": 0.00041178194607268466, "loss": 3.4263217449188232, "step": 1405, "token_acc": 0.25560226060393637 }, { "epoch": 0.8243916739958956, "grad_norm": 0.8004204572117098, "learning_rate": 0.0004120750293083236, "loss": 3.4471328258514404, "step": 1406, "token_acc": 0.2545264302493768 }, { "epoch": 0.8249780123131046, "grad_norm": 0.9693946951942966, "learning_rate": 0.0004123681125439625, "loss": 3.5001187324523926, "step": 1407, "token_acc": 0.24848862909995456 }, { "epoch": 0.8255643506303136, "grad_norm": 0.8964247682417612, "learning_rate": 0.00041266119577960145, "loss": 3.394019842147827, "step": 1408, "token_acc": 0.26270580001520316 }, { "epoch": 0.8261506889475227, "grad_norm": 0.8471161278870991, "learning_rate": 0.00041295427901524037, "loss": 3.511037588119507, "step": 1409, "token_acc": 0.24715160264121688 }, { "epoch": 0.8267370272647317, "grad_norm": 0.7692320627526633, "learning_rate": 0.0004132473622508793, "loss": 3.4227454662323, "step": 1410, "token_acc": 0.2576579684690336 }, { "epoch": 0.8273233655819408, "grad_norm": 0.7436822861973503, "learning_rate": 0.00041354044548651823, "loss": 3.4588029384613037, "step": 1411, "token_acc": 0.2553178530643319 }, { "epoch": 0.8279097038991499, "grad_norm": 0.7737460051288806, "learning_rate": 0.0004138335287221571, "loss": 3.4498367309570312, "step": 1412, "token_acc": 0.25420237332214984 }, { "epoch": 0.8284960422163589, "grad_norm": 0.8057163774560374, "learning_rate": 0.000414126611957796, "loss": 3.406430721282959, "step": 1413, "token_acc": 0.260003003469798 }, { "epoch": 0.8290823805335679, "grad_norm": 0.9261067619289706, "learning_rate": 0.0004144196951934349, "loss": 3.4880783557891846, "step": 1414, "token_acc": 0.2502901614401702 }, { "epoch": 0.8296687188507769, "grad_norm": 1.1313316390815347, "learning_rate": 0.00041471277842907383, "loss": 3.4647789001464844, "step": 1415, "token_acc": 0.25555520575904966 }, { "epoch": 0.830255057167986, "grad_norm": 0.9775831090958634, "learning_rate": 0.00041500586166471275, "loss": 3.4898481369018555, "step": 1416, "token_acc": 0.2508996303594658 }, { "epoch": 0.830841395485195, "grad_norm": 0.9163661926362927, "learning_rate": 0.0004152989449003517, "loss": 3.4043221473693848, "step": 1417, "token_acc": 0.2600599554364189 }, { "epoch": 0.831427733802404, "grad_norm": 1.0774927102610308, "learning_rate": 0.0004155920281359906, "loss": 3.4807019233703613, "step": 1418, "token_acc": 0.25223451491461474 }, { "epoch": 0.832014072119613, "grad_norm": 1.0666783787457559, "learning_rate": 0.00041588511137162954, "loss": 3.4704935550689697, "step": 1419, "token_acc": 0.25268332900743545 }, { "epoch": 0.8326004104368221, "grad_norm": 0.9716982065304188, "learning_rate": 0.00041617819460726846, "loss": 3.437676429748535, "step": 1420, "token_acc": 0.25658662707352037 }, { "epoch": 0.8331867487540311, "grad_norm": 0.9443100783668076, "learning_rate": 0.0004164712778429074, "loss": 3.4950509071350098, "step": 1421, "token_acc": 0.24932606709207758 }, { "epoch": 0.8337730870712401, "grad_norm": 0.9652461415355784, "learning_rate": 0.0004167643610785463, "loss": 3.466236114501953, "step": 1422, "token_acc": 0.2525144463949375 }, { "epoch": 0.8343594253884491, "grad_norm": 0.9398557077942571, "learning_rate": 0.00041705744431418524, "loss": 3.4050326347351074, "step": 1423, "token_acc": 0.26099609275891816 }, { "epoch": 0.8349457637056582, "grad_norm": 0.8597226819661934, "learning_rate": 0.00041735052754982417, "loss": 3.4048702716827393, "step": 1424, "token_acc": 0.2592719197900545 }, { "epoch": 0.8355321020228672, "grad_norm": 0.914149612924782, "learning_rate": 0.0004176436107854631, "loss": 3.408811092376709, "step": 1425, "token_acc": 0.26108501467252854 }, { "epoch": 0.8361184403400762, "grad_norm": 0.891830093648117, "learning_rate": 0.000417936694021102, "loss": 3.439952850341797, "step": 1426, "token_acc": 0.25521750828760137 }, { "epoch": 0.8367047786572852, "grad_norm": 0.8167885744609052, "learning_rate": 0.00041822977725674095, "loss": 3.4615821838378906, "step": 1427, "token_acc": 0.2547976621265537 }, { "epoch": 0.8372911169744943, "grad_norm": 0.8148129062872108, "learning_rate": 0.0004185228604923799, "loss": 3.4843926429748535, "step": 1428, "token_acc": 0.2515890409532684 }, { "epoch": 0.8378774552917033, "grad_norm": 0.7574575552111641, "learning_rate": 0.0004188159437280188, "loss": 3.4420738220214844, "step": 1429, "token_acc": 0.25465257953143644 }, { "epoch": 0.8384637936089123, "grad_norm": 0.782761945837116, "learning_rate": 0.00041910902696365774, "loss": 3.455498218536377, "step": 1430, "token_acc": 0.25486719734471996 }, { "epoch": 0.8390501319261213, "grad_norm": 0.7577469802258135, "learning_rate": 0.0004194021101992966, "loss": 3.459378719329834, "step": 1431, "token_acc": 0.2553597919079843 }, { "epoch": 0.8396364702433304, "grad_norm": 0.790007110800162, "learning_rate": 0.0004196951934349355, "loss": 3.434330940246582, "step": 1432, "token_acc": 0.25602041218944405 }, { "epoch": 0.8402228085605394, "grad_norm": 0.88987461672636, "learning_rate": 0.0004199882766705744, "loss": 3.4827966690063477, "step": 1433, "token_acc": 0.2518430599838126 }, { "epoch": 0.8408091468777484, "grad_norm": 1.1223352167923297, "learning_rate": 0.00042028135990621334, "loss": 3.441190481185913, "step": 1434, "token_acc": 0.2541853464425524 }, { "epoch": 0.8413954851949574, "grad_norm": 1.026808984090443, "learning_rate": 0.00042057444314185226, "loss": 3.4398837089538574, "step": 1435, "token_acc": 0.25734161951581486 }, { "epoch": 0.8419818235121665, "grad_norm": 0.9312421803437729, "learning_rate": 0.0004208675263774912, "loss": 3.4850120544433594, "step": 1436, "token_acc": 0.2504329044141798 }, { "epoch": 0.8425681618293756, "grad_norm": 1.039285703844888, "learning_rate": 0.0004211606096131301, "loss": 3.5089030265808105, "step": 1437, "token_acc": 0.24912117602626305 }, { "epoch": 0.8431545001465846, "grad_norm": 0.9521912847470324, "learning_rate": 0.00042145369284876904, "loss": 3.4704322814941406, "step": 1438, "token_acc": 0.253545115953876 }, { "epoch": 0.8437408384637937, "grad_norm": 0.8965030271138373, "learning_rate": 0.00042174677608440797, "loss": 3.447204113006592, "step": 1439, "token_acc": 0.2533925639064364 }, { "epoch": 0.8443271767810027, "grad_norm": 1.00228246741743, "learning_rate": 0.0004220398593200469, "loss": 3.4474263191223145, "step": 1440, "token_acc": 0.2554929491203626 }, { "epoch": 0.8449135150982117, "grad_norm": 1.0662324263822087, "learning_rate": 0.0004223329425556858, "loss": 3.463426113128662, "step": 1441, "token_acc": 0.2536346183780418 }, { "epoch": 0.8454998534154207, "grad_norm": 0.6862167973664497, "learning_rate": 0.00042262602579132475, "loss": 3.4336538314819336, "step": 1442, "token_acc": 0.25574753182869603 }, { "epoch": 0.8460861917326298, "grad_norm": 0.816118152117597, "learning_rate": 0.0004229191090269637, "loss": 3.4890904426574707, "step": 1443, "token_acc": 0.24978243118378923 }, { "epoch": 0.8466725300498388, "grad_norm": 0.7534709349554227, "learning_rate": 0.0004232121922626026, "loss": 3.459723472595215, "step": 1444, "token_acc": 0.25407042786823175 }, { "epoch": 0.8472588683670478, "grad_norm": 0.8106012433488445, "learning_rate": 0.00042350527549824153, "loss": 3.433746337890625, "step": 1445, "token_acc": 0.2556739497379244 }, { "epoch": 0.8478452066842568, "grad_norm": 0.860812528294037, "learning_rate": 0.00042379835873388046, "loss": 3.457742691040039, "step": 1446, "token_acc": 0.2537492721769646 }, { "epoch": 0.8484315450014659, "grad_norm": 0.8339282560506189, "learning_rate": 0.0004240914419695194, "loss": 3.4138875007629395, "step": 1447, "token_acc": 0.26043569847172365 }, { "epoch": 0.8490178833186749, "grad_norm": 0.8167831666660748, "learning_rate": 0.0004243845252051583, "loss": 3.47696590423584, "step": 1448, "token_acc": 0.2512036727484278 }, { "epoch": 0.8496042216358839, "grad_norm": 0.8640974255530178, "learning_rate": 0.0004246776084407972, "loss": 3.447117805480957, "step": 1449, "token_acc": 0.25497912729706673 }, { "epoch": 0.8501905599530929, "grad_norm": 0.9085842442267588, "learning_rate": 0.0004249706916764361, "loss": 3.4408185482025146, "step": 1450, "token_acc": 0.25575547593737286 }, { "epoch": 0.850776898270302, "grad_norm": 1.0624063708683804, "learning_rate": 0.00042526377491207504, "loss": 3.4095301628112793, "step": 1451, "token_acc": 0.259989085806351 }, { "epoch": 0.851363236587511, "grad_norm": 1.1174616615503816, "learning_rate": 0.0004255568581477139, "loss": 3.4574272632598877, "step": 1452, "token_acc": 0.2539443758860185 }, { "epoch": 0.85194957490472, "grad_norm": 0.8601468245880046, "learning_rate": 0.00042584994138335284, "loss": 3.4638266563415527, "step": 1453, "token_acc": 0.25422001905772484 }, { "epoch": 0.852535913221929, "grad_norm": 0.7607748812315341, "learning_rate": 0.00042614302461899177, "loss": 3.4572696685791016, "step": 1454, "token_acc": 0.25261450946577263 }, { "epoch": 0.8531222515391381, "grad_norm": 0.8406494619883967, "learning_rate": 0.0004264361078546307, "loss": 3.4227023124694824, "step": 1455, "token_acc": 0.2592655428636565 }, { "epoch": 0.8537085898563471, "grad_norm": 0.9024436803761858, "learning_rate": 0.0004267291910902696, "loss": 3.3479840755462646, "step": 1456, "token_acc": 0.26578968745557563 }, { "epoch": 0.8542949281735561, "grad_norm": 0.8822254387646381, "learning_rate": 0.00042702227432590855, "loss": 3.4071271419525146, "step": 1457, "token_acc": 0.2608795010722058 }, { "epoch": 0.8548812664907651, "grad_norm": 0.8309948721357688, "learning_rate": 0.0004273153575615475, "loss": 3.4616141319274902, "step": 1458, "token_acc": 0.25356811624072545 }, { "epoch": 0.8554676048079742, "grad_norm": 0.836223798076072, "learning_rate": 0.0004276084407971864, "loss": 3.3768367767333984, "step": 1459, "token_acc": 0.26454960467478883 }, { "epoch": 0.8560539431251832, "grad_norm": 0.86669354929888, "learning_rate": 0.00042790152403282533, "loss": 3.4410605430603027, "step": 1460, "token_acc": 0.25572766977909717 }, { "epoch": 0.8566402814423922, "grad_norm": 0.9473105411947157, "learning_rate": 0.00042819460726846426, "loss": 3.4127588272094727, "step": 1461, "token_acc": 0.2586588625850053 }, { "epoch": 0.8572266197596012, "grad_norm": 0.940028395892319, "learning_rate": 0.0004284876905041032, "loss": 3.4587528705596924, "step": 1462, "token_acc": 0.2546515619636114 }, { "epoch": 0.8578129580768104, "grad_norm": 1.0154059126279888, "learning_rate": 0.0004287807737397421, "loss": 3.462338447570801, "step": 1463, "token_acc": 0.25339021947759327 }, { "epoch": 0.8583992963940194, "grad_norm": 0.9312404537777714, "learning_rate": 0.00042907385697538104, "loss": 3.4368348121643066, "step": 1464, "token_acc": 0.25753379824274314 }, { "epoch": 0.8589856347112284, "grad_norm": 0.7195493711637365, "learning_rate": 0.00042936694021101997, "loss": 3.420156478881836, "step": 1465, "token_acc": 0.25829490319783993 }, { "epoch": 0.8595719730284375, "grad_norm": 0.7055490871307102, "learning_rate": 0.0004296600234466589, "loss": 3.400454521179199, "step": 1466, "token_acc": 0.2606092502516162 }, { "epoch": 0.8601583113456465, "grad_norm": 0.7829869333068712, "learning_rate": 0.00042995310668229777, "loss": 3.456515312194824, "step": 1467, "token_acc": 0.2539611791560821 }, { "epoch": 0.8607446496628555, "grad_norm": 0.8827962992302656, "learning_rate": 0.0004302461899179367, "loss": 3.4581494331359863, "step": 1468, "token_acc": 0.2515526966220927 }, { "epoch": 0.8613309879800645, "grad_norm": 0.9745171058837616, "learning_rate": 0.0004305392731535756, "loss": 3.4407095909118652, "step": 1469, "token_acc": 0.2564492051436503 }, { "epoch": 0.8619173262972736, "grad_norm": 0.9109063054679837, "learning_rate": 0.00043083235638921455, "loss": 3.4063034057617188, "step": 1470, "token_acc": 0.25997696356606104 }, { "epoch": 0.8625036646144826, "grad_norm": 0.9629509262400516, "learning_rate": 0.0004311254396248535, "loss": 3.4884450435638428, "step": 1471, "token_acc": 0.24953335243505184 }, { "epoch": 0.8630900029316916, "grad_norm": 0.8275062747254033, "learning_rate": 0.0004314185228604924, "loss": 3.4839179515838623, "step": 1472, "token_acc": 0.25182774238914624 }, { "epoch": 0.8636763412489006, "grad_norm": 0.8412194139229128, "learning_rate": 0.0004317116060961313, "loss": 3.41888165473938, "step": 1473, "token_acc": 0.25964413677764897 }, { "epoch": 0.8642626795661097, "grad_norm": 0.8805580040254419, "learning_rate": 0.0004320046893317702, "loss": 3.4754481315612793, "step": 1474, "token_acc": 0.25245035436290786 }, { "epoch": 0.8648490178833187, "grad_norm": 0.9185386368495354, "learning_rate": 0.00043229777256740913, "loss": 3.4386610984802246, "step": 1475, "token_acc": 0.254469358034086 }, { "epoch": 0.8654353562005277, "grad_norm": 0.9583935089343669, "learning_rate": 0.00043259085580304806, "loss": 3.4110469818115234, "step": 1476, "token_acc": 0.26002842786560226 }, { "epoch": 0.8660216945177367, "grad_norm": 0.9505998025954556, "learning_rate": 0.000432883939038687, "loss": 3.4340872764587402, "step": 1477, "token_acc": 0.2556921305440022 }, { "epoch": 0.8666080328349458, "grad_norm": 0.9200141999600606, "learning_rate": 0.0004331770222743259, "loss": 3.4843838214874268, "step": 1478, "token_acc": 0.25116328651760256 }, { "epoch": 0.8671943711521548, "grad_norm": 0.9547306496408687, "learning_rate": 0.00043347010550996484, "loss": 3.4228217601776123, "step": 1479, "token_acc": 0.2578297792429996 }, { "epoch": 0.8677807094693638, "grad_norm": 1.05516449975363, "learning_rate": 0.00043376318874560377, "loss": 3.4603793621063232, "step": 1480, "token_acc": 0.25362284511119887 }, { "epoch": 0.8683670477865728, "grad_norm": 1.2468264497910304, "learning_rate": 0.0004340562719812427, "loss": 3.4523167610168457, "step": 1481, "token_acc": 0.2544499288532605 }, { "epoch": 0.8689533861037819, "grad_norm": 0.8341177163474129, "learning_rate": 0.0004343493552168816, "loss": 3.374677896499634, "step": 1482, "token_acc": 0.2639230366575195 }, { "epoch": 0.8695397244209909, "grad_norm": 0.7183986127931309, "learning_rate": 0.00043464243845252055, "loss": 3.4278154373168945, "step": 1483, "token_acc": 0.2578605254622121 }, { "epoch": 0.8701260627381999, "grad_norm": 0.7075598297851543, "learning_rate": 0.0004349355216881594, "loss": 3.4355833530426025, "step": 1484, "token_acc": 0.25609448554135844 }, { "epoch": 0.8707124010554089, "grad_norm": 0.7943205193572124, "learning_rate": 0.00043522860492379835, "loss": 3.4582817554473877, "step": 1485, "token_acc": 0.2526775595766982 }, { "epoch": 0.871298739372618, "grad_norm": 0.6936701984103647, "learning_rate": 0.0004355216881594373, "loss": 3.4594743251800537, "step": 1486, "token_acc": 0.25172484675349316 }, { "epoch": 0.871885077689827, "grad_norm": 0.7843735283362575, "learning_rate": 0.0004358147713950762, "loss": 3.4104268550872803, "step": 1487, "token_acc": 0.2591858239498279 }, { "epoch": 0.872471416007036, "grad_norm": 0.8385010528910897, "learning_rate": 0.00043610785463071513, "loss": 3.4594948291778564, "step": 1488, "token_acc": 0.2539525229035631 }, { "epoch": 0.873057754324245, "grad_norm": 0.8521190273723712, "learning_rate": 0.00043640093786635406, "loss": 3.45263409614563, "step": 1489, "token_acc": 0.253604147803329 }, { "epoch": 0.8736440926414542, "grad_norm": 0.7969212495836359, "learning_rate": 0.000436694021101993, "loss": 3.451590061187744, "step": 1490, "token_acc": 0.25418343173015456 }, { "epoch": 0.8742304309586632, "grad_norm": 0.7939133493957939, "learning_rate": 0.0004369871043376319, "loss": 3.436300754547119, "step": 1491, "token_acc": 0.25612652254320517 }, { "epoch": 0.8748167692758722, "grad_norm": 0.8659457667904533, "learning_rate": 0.00043728018757327084, "loss": 3.40575909614563, "step": 1492, "token_acc": 0.25953701488782954 }, { "epoch": 0.8754031075930812, "grad_norm": 1.0481689355984503, "learning_rate": 0.0004375732708089097, "loss": 3.437127113342285, "step": 1493, "token_acc": 0.25430251710448515 }, { "epoch": 0.8759894459102903, "grad_norm": 1.1270808248628945, "learning_rate": 0.00043786635404454864, "loss": 3.4267630577087402, "step": 1494, "token_acc": 0.25789778726985463 }, { "epoch": 0.8765757842274993, "grad_norm": 0.8821833823317513, "learning_rate": 0.00043815943728018757, "loss": 3.4602396488189697, "step": 1495, "token_acc": 0.2535968699826976 }, { "epoch": 0.8771621225447083, "grad_norm": 0.8636231313320814, "learning_rate": 0.0004384525205158265, "loss": 3.44230318069458, "step": 1496, "token_acc": 0.25648189029124735 }, { "epoch": 0.8777484608619174, "grad_norm": 0.727782951489715, "learning_rate": 0.0004387456037514654, "loss": 3.5044708251953125, "step": 1497, "token_acc": 0.2473282688038731 }, { "epoch": 0.8783347991791264, "grad_norm": 0.7491378264358077, "learning_rate": 0.00043903868698710435, "loss": 3.436920642852783, "step": 1498, "token_acc": 0.25539466062871885 }, { "epoch": 0.8789211374963354, "grad_norm": 0.7251735352081701, "learning_rate": 0.0004393317702227433, "loss": 3.4543185234069824, "step": 1499, "token_acc": 0.2535053640072425 }, { "epoch": 0.8795074758135444, "grad_norm": 0.694502544977926, "learning_rate": 0.0004396248534583822, "loss": 3.4446935653686523, "step": 1500, "token_acc": 0.2561802556526069 }, { "epoch": 0.8800938141307535, "grad_norm": 0.844119397593279, "learning_rate": 0.00043991793669402113, "loss": 3.4321775436401367, "step": 1501, "token_acc": 0.25758230363777757 }, { "epoch": 0.8806801524479625, "grad_norm": 0.9564906810529307, "learning_rate": 0.00044021101992966, "loss": 3.4222607612609863, "step": 1502, "token_acc": 0.2599147049056446 }, { "epoch": 0.8812664907651715, "grad_norm": 1.064010489593906, "learning_rate": 0.00044050410316529893, "loss": 3.4573817253112793, "step": 1503, "token_acc": 0.2537067968649621 }, { "epoch": 0.8818528290823805, "grad_norm": 0.9666213167522902, "learning_rate": 0.00044079718640093786, "loss": 3.463282585144043, "step": 1504, "token_acc": 0.251973219374716 }, { "epoch": 0.8824391673995896, "grad_norm": 0.832827453958595, "learning_rate": 0.0004410902696365768, "loss": 3.395981788635254, "step": 1505, "token_acc": 0.2627781102947247 }, { "epoch": 0.8830255057167986, "grad_norm": 0.7249847479709868, "learning_rate": 0.0004413833528722157, "loss": 3.3865585327148438, "step": 1506, "token_acc": 0.26271599965920267 }, { "epoch": 0.8836118440340076, "grad_norm": 0.895989554590513, "learning_rate": 0.00044167643610785464, "loss": 3.464002847671509, "step": 1507, "token_acc": 0.25222992929263066 }, { "epoch": 0.8841981823512166, "grad_norm": 0.9866100257618226, "learning_rate": 0.00044196951934349357, "loss": 3.44522762298584, "step": 1508, "token_acc": 0.257718978295675 }, { "epoch": 0.8847845206684257, "grad_norm": 1.1298580544241126, "learning_rate": 0.0004422626025791325, "loss": 3.4064228534698486, "step": 1509, "token_acc": 0.2585145752112565 }, { "epoch": 0.8853708589856347, "grad_norm": 1.0633836265481191, "learning_rate": 0.0004425556858147714, "loss": 3.380262613296509, "step": 1510, "token_acc": 0.26509709960872224 }, { "epoch": 0.8859571973028437, "grad_norm": 0.967645857665294, "learning_rate": 0.00044284876905041035, "loss": 3.468302011489868, "step": 1511, "token_acc": 0.25397846532655205 }, { "epoch": 0.8865435356200527, "grad_norm": 0.7681624013133922, "learning_rate": 0.0004431418522860493, "loss": 3.4191842079162598, "step": 1512, "token_acc": 0.2582953125921478 }, { "epoch": 0.8871298739372618, "grad_norm": 0.7604443185612104, "learning_rate": 0.0004434349355216882, "loss": 3.4150850772857666, "step": 1513, "token_acc": 0.2591268902038133 }, { "epoch": 0.8877162122544708, "grad_norm": 0.7256366834177113, "learning_rate": 0.0004437280187573271, "loss": 3.4862349033355713, "step": 1514, "token_acc": 0.24925985120159877 }, { "epoch": 0.8883025505716798, "grad_norm": 0.7450095216733653, "learning_rate": 0.000444021101992966, "loss": 3.471071243286133, "step": 1515, "token_acc": 0.25373223246128307 }, { "epoch": 0.8888888888888888, "grad_norm": 0.7552527488203084, "learning_rate": 0.00044431418522860493, "loss": 3.425395965576172, "step": 1516, "token_acc": 0.2553320911036851 }, { "epoch": 0.889475227206098, "grad_norm": 0.7276074726692404, "learning_rate": 0.00044460726846424386, "loss": 3.427705764770508, "step": 1517, "token_acc": 0.25619508471430325 }, { "epoch": 0.890061565523307, "grad_norm": 0.6958205881846867, "learning_rate": 0.0004449003516998828, "loss": 3.4386587142944336, "step": 1518, "token_acc": 0.2563137110845808 }, { "epoch": 0.890647903840516, "grad_norm": 0.6143305518967327, "learning_rate": 0.0004451934349355217, "loss": 3.4120535850524902, "step": 1519, "token_acc": 0.2598844662414826 }, { "epoch": 0.891234242157725, "grad_norm": 0.6187609831929747, "learning_rate": 0.0004454865181711606, "loss": 3.3950066566467285, "step": 1520, "token_acc": 0.2598706229483616 }, { "epoch": 0.8918205804749341, "grad_norm": 0.6690485881618454, "learning_rate": 0.0004457796014067995, "loss": 3.4305474758148193, "step": 1521, "token_acc": 0.2563496843329457 }, { "epoch": 0.8924069187921431, "grad_norm": 0.8080564417157619, "learning_rate": 0.00044607268464243844, "loss": 3.457669258117676, "step": 1522, "token_acc": 0.2535174039800916 }, { "epoch": 0.8929932571093521, "grad_norm": 1.0022077962969926, "learning_rate": 0.00044636576787807737, "loss": 3.4499759674072266, "step": 1523, "token_acc": 0.2530148405089499 }, { "epoch": 0.8935795954265612, "grad_norm": 1.0991819722787757, "learning_rate": 0.0004466588511137163, "loss": 3.4505443572998047, "step": 1524, "token_acc": 0.25348414355120064 }, { "epoch": 0.8941659337437702, "grad_norm": 0.8229506536416142, "learning_rate": 0.0004469519343493552, "loss": 3.4218320846557617, "step": 1525, "token_acc": 0.2562675979369103 }, { "epoch": 0.8947522720609792, "grad_norm": 0.9723971810192745, "learning_rate": 0.00044724501758499415, "loss": 3.4341561794281006, "step": 1526, "token_acc": 0.25721039341525004 }, { "epoch": 0.8953386103781882, "grad_norm": 1.2789149366603556, "learning_rate": 0.0004475381008206331, "loss": 3.3914241790771484, "step": 1527, "token_acc": 0.26124540922705675 }, { "epoch": 0.8959249486953973, "grad_norm": 0.8915035534052372, "learning_rate": 0.000447831184056272, "loss": 3.4878382682800293, "step": 1528, "token_acc": 0.25029549612514645 }, { "epoch": 0.8965112870126063, "grad_norm": 1.0395847724431233, "learning_rate": 0.00044812426729191093, "loss": 3.4202322959899902, "step": 1529, "token_acc": 0.2591754538007044 }, { "epoch": 0.8970976253298153, "grad_norm": 0.8809901991927357, "learning_rate": 0.00044841735052754986, "loss": 3.3865814208984375, "step": 1530, "token_acc": 0.2627483866065274 }, { "epoch": 0.8976839636470243, "grad_norm": 0.8507205426019562, "learning_rate": 0.0004487104337631888, "loss": 3.45574951171875, "step": 1531, "token_acc": 0.2545419027153741 }, { "epoch": 0.8982703019642334, "grad_norm": 1.0113115296818116, "learning_rate": 0.0004490035169988277, "loss": 3.4781317710876465, "step": 1532, "token_acc": 0.25018436530797356 }, { "epoch": 0.8988566402814424, "grad_norm": 0.9702843157526281, "learning_rate": 0.00044929660023446664, "loss": 3.406515598297119, "step": 1533, "token_acc": 0.25830583286276015 }, { "epoch": 0.8994429785986514, "grad_norm": 0.8307863112443473, "learning_rate": 0.0004495896834701055, "loss": 3.4074923992156982, "step": 1534, "token_acc": 0.26047814003682834 }, { "epoch": 0.9000293169158604, "grad_norm": 0.6881073759023008, "learning_rate": 0.00044988276670574444, "loss": 3.410942792892456, "step": 1535, "token_acc": 0.25954212619061484 }, { "epoch": 0.9006156552330695, "grad_norm": 0.7094587050482819, "learning_rate": 0.00045017584994138337, "loss": 3.4270544052124023, "step": 1536, "token_acc": 0.25797216805295764 }, { "epoch": 0.9012019935502785, "grad_norm": 0.6896778192969361, "learning_rate": 0.0004504689331770223, "loss": 3.396339178085327, "step": 1537, "token_acc": 0.26028072170111394 }, { "epoch": 0.9017883318674875, "grad_norm": 0.6997777555512175, "learning_rate": 0.00045076201641266117, "loss": 3.4065232276916504, "step": 1538, "token_acc": 0.2586796314979729 }, { "epoch": 0.9023746701846965, "grad_norm": 0.7851855432610988, "learning_rate": 0.0004510550996483001, "loss": 3.456482410430908, "step": 1539, "token_acc": 0.25301366793636565 }, { "epoch": 0.9029610085019056, "grad_norm": 0.8142782169840991, "learning_rate": 0.000451348182883939, "loss": 3.40527081489563, "step": 1540, "token_acc": 0.2588093084075203 }, { "epoch": 0.9035473468191146, "grad_norm": 1.0882979760075386, "learning_rate": 0.00045164126611957795, "loss": 3.402641773223877, "step": 1541, "token_acc": 0.26020041644976577 }, { "epoch": 0.9041336851363236, "grad_norm": 0.9696739952497907, "learning_rate": 0.0004519343493552169, "loss": 3.4109444618225098, "step": 1542, "token_acc": 0.2592243617104094 }, { "epoch": 0.9047200234535326, "grad_norm": 0.8256579594392981, "learning_rate": 0.0004522274325908558, "loss": 3.4110050201416016, "step": 1543, "token_acc": 0.2589638542557138 }, { "epoch": 0.9053063617707418, "grad_norm": 0.9063908896488115, "learning_rate": 0.00045252051582649473, "loss": 3.4508819580078125, "step": 1544, "token_acc": 0.254906256604824 }, { "epoch": 0.9058927000879508, "grad_norm": 0.8362884002696215, "learning_rate": 0.00045281359906213366, "loss": 3.3710813522338867, "step": 1545, "token_acc": 0.26476358848185666 }, { "epoch": 0.9064790384051598, "grad_norm": 0.8237916840830777, "learning_rate": 0.0004531066822977726, "loss": 3.344583034515381, "step": 1546, "token_acc": 0.2665647351508241 }, { "epoch": 0.9070653767223688, "grad_norm": 0.8644669591830477, "learning_rate": 0.0004533997655334115, "loss": 3.382384777069092, "step": 1547, "token_acc": 0.26273714607242576 }, { "epoch": 0.9076517150395779, "grad_norm": 0.8158798497352968, "learning_rate": 0.00045369284876905044, "loss": 3.448951244354248, "step": 1548, "token_acc": 0.25178438082974697 }, { "epoch": 0.9082380533567869, "grad_norm": 0.82736387667415, "learning_rate": 0.00045398593200468936, "loss": 3.3965988159179688, "step": 1549, "token_acc": 0.2596030213273226 }, { "epoch": 0.9088243916739959, "grad_norm": 0.8628189460305075, "learning_rate": 0.0004542790152403283, "loss": 3.384303569793701, "step": 1550, "token_acc": 0.2631956587598127 }, { "epoch": 0.909410729991205, "grad_norm": 0.7776277001445487, "learning_rate": 0.0004545720984759672, "loss": 3.416213274002075, "step": 1551, "token_acc": 0.2589604935669291 }, { "epoch": 0.909997068308414, "grad_norm": 0.7525587208813943, "learning_rate": 0.00045486518171160615, "loss": 3.440732002258301, "step": 1552, "token_acc": 0.25442421869902787 }, { "epoch": 0.910583406625623, "grad_norm": 0.7461404215388849, "learning_rate": 0.0004551582649472451, "loss": 3.401362180709839, "step": 1553, "token_acc": 0.2611922234960221 }, { "epoch": 0.911169744942832, "grad_norm": 0.7976664744244445, "learning_rate": 0.00045545134818288395, "loss": 3.444603443145752, "step": 1554, "token_acc": 0.255203014958997 }, { "epoch": 0.9117560832600411, "grad_norm": 0.934012421168276, "learning_rate": 0.0004557444314185228, "loss": 3.4650862216949463, "step": 1555, "token_acc": 0.25218694743818426 }, { "epoch": 0.9123424215772501, "grad_norm": 1.0056039007466018, "learning_rate": 0.00045603751465416175, "loss": 3.4694719314575195, "step": 1556, "token_acc": 0.2504398919535937 }, { "epoch": 0.9129287598944591, "grad_norm": 0.8584646325625831, "learning_rate": 0.0004563305978898007, "loss": 3.490917682647705, "step": 1557, "token_acc": 0.24916871012055308 }, { "epoch": 0.9135150982116681, "grad_norm": 0.6892864356013069, "learning_rate": 0.0004566236811254396, "loss": 3.4395768642425537, "step": 1558, "token_acc": 0.2560468170521397 }, { "epoch": 0.9141014365288772, "grad_norm": 0.5715343669688692, "learning_rate": 0.00045691676436107853, "loss": 3.4453001022338867, "step": 1559, "token_acc": 0.2535166352725612 }, { "epoch": 0.9146877748460862, "grad_norm": 0.5783989184446671, "learning_rate": 0.00045720984759671746, "loss": 3.424844741821289, "step": 1560, "token_acc": 0.25697604381254147 }, { "epoch": 0.9152741131632952, "grad_norm": 0.6417535487109237, "learning_rate": 0.0004575029308323564, "loss": 3.3383431434631348, "step": 1561, "token_acc": 0.26958431265802246 }, { "epoch": 0.9158604514805042, "grad_norm": 0.7253250977890331, "learning_rate": 0.0004577960140679953, "loss": 3.410315752029419, "step": 1562, "token_acc": 0.2582153056753575 }, { "epoch": 0.9164467897977133, "grad_norm": 0.833096932275878, "learning_rate": 0.00045808909730363424, "loss": 3.379018545150757, "step": 1563, "token_acc": 0.26390519489729186 }, { "epoch": 0.9170331281149223, "grad_norm": 1.0364196269763921, "learning_rate": 0.00045838218053927316, "loss": 3.3636443614959717, "step": 1564, "token_acc": 0.26538452806486035 }, { "epoch": 0.9176194664321313, "grad_norm": 0.9223858474968215, "learning_rate": 0.0004586752637749121, "loss": 3.4284346103668213, "step": 1565, "token_acc": 0.2564395985746259 }, { "epoch": 0.9182058047493403, "grad_norm": 0.7095741677287191, "learning_rate": 0.000458968347010551, "loss": 3.386415958404541, "step": 1566, "token_acc": 0.26226823136235 }, { "epoch": 0.9187921430665494, "grad_norm": 1.0024116063579434, "learning_rate": 0.00045926143024618995, "loss": 3.469064474105835, "step": 1567, "token_acc": 0.2506314933995212 }, { "epoch": 0.9193784813837584, "grad_norm": 1.0187843948557882, "learning_rate": 0.00045955451348182887, "loss": 3.4033994674682617, "step": 1568, "token_acc": 0.26132159563307067 }, { "epoch": 0.9199648197009674, "grad_norm": 1.0015692640637548, "learning_rate": 0.0004598475967174678, "loss": 3.384068727493286, "step": 1569, "token_acc": 0.2616135022319235 }, { "epoch": 0.9205511580181764, "grad_norm": 0.9776105114346834, "learning_rate": 0.00046014067995310673, "loss": 3.4020183086395264, "step": 1570, "token_acc": 0.26139094616528225 }, { "epoch": 0.9211374963353856, "grad_norm": 1.0423668930768366, "learning_rate": 0.00046043376318874565, "loss": 3.4187798500061035, "step": 1571, "token_acc": 0.2581928344199305 }, { "epoch": 0.9217238346525946, "grad_norm": 1.095887504601878, "learning_rate": 0.0004607268464243846, "loss": 3.4371373653411865, "step": 1572, "token_acc": 0.25752831300325135 }, { "epoch": 0.9223101729698036, "grad_norm": 0.7967860483310408, "learning_rate": 0.00046101992966002345, "loss": 3.405745029449463, "step": 1573, "token_acc": 0.2594555011050765 }, { "epoch": 0.9228965112870126, "grad_norm": 0.6371829929420623, "learning_rate": 0.00046131301289566233, "loss": 3.423616886138916, "step": 1574, "token_acc": 0.25608867302081106 }, { "epoch": 0.9234828496042217, "grad_norm": 0.802053166181579, "learning_rate": 0.00046160609613130125, "loss": 3.3841419219970703, "step": 1575, "token_acc": 0.2621176312052327 }, { "epoch": 0.9240691879214307, "grad_norm": 0.7545336693719783, "learning_rate": 0.0004618991793669402, "loss": 3.413292407989502, "step": 1576, "token_acc": 0.2594243066979091 }, { "epoch": 0.9246555262386397, "grad_norm": 0.6806272522945446, "learning_rate": 0.0004621922626025791, "loss": 3.3866686820983887, "step": 1577, "token_acc": 0.26241214668686225 }, { "epoch": 0.9252418645558487, "grad_norm": 0.6601874188764207, "learning_rate": 0.00046248534583821804, "loss": 3.497767448425293, "step": 1578, "token_acc": 0.24897872902629145 }, { "epoch": 0.9258282028730578, "grad_norm": 0.6994873009410468, "learning_rate": 0.00046277842907385696, "loss": 3.3819782733917236, "step": 1579, "token_acc": 0.2612785056598792 }, { "epoch": 0.9264145411902668, "grad_norm": 0.9191257119463673, "learning_rate": 0.0004630715123094959, "loss": 3.42500901222229, "step": 1580, "token_acc": 0.25566361935595155 }, { "epoch": 0.9270008795074758, "grad_norm": 0.8625670309788941, "learning_rate": 0.0004633645955451348, "loss": 3.388427734375, "step": 1581, "token_acc": 0.26102203893579606 }, { "epoch": 0.9275872178246849, "grad_norm": 0.6473263962471194, "learning_rate": 0.00046365767878077374, "loss": 3.4127166271209717, "step": 1582, "token_acc": 0.2586886488535153 }, { "epoch": 0.9281735561418939, "grad_norm": 0.6452112309743301, "learning_rate": 0.00046395076201641267, "loss": 3.398099422454834, "step": 1583, "token_acc": 0.2612349063310302 }, { "epoch": 0.9287598944591029, "grad_norm": 0.773040487725159, "learning_rate": 0.0004642438452520516, "loss": 3.428492546081543, "step": 1584, "token_acc": 0.2571181127708332 }, { "epoch": 0.9293462327763119, "grad_norm": 0.6020016419547088, "learning_rate": 0.0004645369284876905, "loss": 3.407637119293213, "step": 1585, "token_acc": 0.2573565891472868 }, { "epoch": 0.929932571093521, "grad_norm": 0.7526683456667859, "learning_rate": 0.00046483001172332945, "loss": 3.420018196105957, "step": 1586, "token_acc": 0.25794642122337585 }, { "epoch": 0.93051890941073, "grad_norm": 0.8750191173531543, "learning_rate": 0.0004651230949589684, "loss": 3.4087538719177246, "step": 1587, "token_acc": 0.2581692218205991 }, { "epoch": 0.931105247727939, "grad_norm": 0.8565740622312593, "learning_rate": 0.0004654161781946073, "loss": 3.459507465362549, "step": 1588, "token_acc": 0.25244922372834533 }, { "epoch": 0.931691586045148, "grad_norm": 0.8451456813762578, "learning_rate": 0.00046570926143024624, "loss": 3.4358930587768555, "step": 1589, "token_acc": 0.25554337812509925 }, { "epoch": 0.9322779243623571, "grad_norm": 0.8084681051793079, "learning_rate": 0.00046600234466588516, "loss": 3.3934881687164307, "step": 1590, "token_acc": 0.26135432532561215 }, { "epoch": 0.9328642626795661, "grad_norm": 0.8093867823702288, "learning_rate": 0.00046629542790152404, "loss": 3.4133834838867188, "step": 1591, "token_acc": 0.25900633085604186 }, { "epoch": 0.9334506009967751, "grad_norm": 0.7827236144484844, "learning_rate": 0.00046658851113716296, "loss": 3.4193758964538574, "step": 1592, "token_acc": 0.2549593554643174 }, { "epoch": 0.9340369393139841, "grad_norm": 0.8614505004918908, "learning_rate": 0.0004668815943728019, "loss": 3.3967010974884033, "step": 1593, "token_acc": 0.2623547238108266 }, { "epoch": 0.9346232776311932, "grad_norm": 0.838653188362184, "learning_rate": 0.0004671746776084408, "loss": 3.364828586578369, "step": 1594, "token_acc": 0.26203081095253355 }, { "epoch": 0.9352096159484022, "grad_norm": 0.8650187103391519, "learning_rate": 0.0004674677608440797, "loss": 3.4230520725250244, "step": 1595, "token_acc": 0.2580488773476161 }, { "epoch": 0.9357959542656112, "grad_norm": 0.8085338404694958, "learning_rate": 0.0004677608440797186, "loss": 3.353464126586914, "step": 1596, "token_acc": 0.2656242771892492 }, { "epoch": 0.9363822925828202, "grad_norm": 0.6541015660443199, "learning_rate": 0.00046805392731535754, "loss": 3.4348366260528564, "step": 1597, "token_acc": 0.25665783815454934 }, { "epoch": 0.9369686309000294, "grad_norm": 0.7302554032103659, "learning_rate": 0.00046834701055099647, "loss": 3.3766796588897705, "step": 1598, "token_acc": 0.2622136767327506 }, { "epoch": 0.9375549692172384, "grad_norm": 0.8012532300872414, "learning_rate": 0.0004686400937866354, "loss": 3.395601511001587, "step": 1599, "token_acc": 0.26080943990602806 }, { "epoch": 0.9381413075344474, "grad_norm": 0.7580375437116129, "learning_rate": 0.0004689331770222743, "loss": 3.411672353744507, "step": 1600, "token_acc": 0.25875652011185996 }, { "epoch": 0.9387276458516564, "grad_norm": 0.8228498393194645, "learning_rate": 0.00046922626025791325, "loss": 3.418936252593994, "step": 1601, "token_acc": 0.25610453881558143 }, { "epoch": 0.9393139841688655, "grad_norm": 0.9847939664936951, "learning_rate": 0.0004695193434935522, "loss": 3.364391326904297, "step": 1602, "token_acc": 0.2644745540385086 }, { "epoch": 0.9399003224860745, "grad_norm": 0.8533649591447857, "learning_rate": 0.0004698124267291911, "loss": 3.429565668106079, "step": 1603, "token_acc": 0.2551882863876285 }, { "epoch": 0.9404866608032835, "grad_norm": 0.785523459566351, "learning_rate": 0.00047010550996483003, "loss": 3.4098591804504395, "step": 1604, "token_acc": 0.2579425172409315 }, { "epoch": 0.9410729991204925, "grad_norm": 0.8813742367554283, "learning_rate": 0.00047039859320046896, "loss": 3.455411672592163, "step": 1605, "token_acc": 0.25353182237238275 }, { "epoch": 0.9416593374377016, "grad_norm": 0.8617999188506723, "learning_rate": 0.0004706916764361079, "loss": 3.4443020820617676, "step": 1606, "token_acc": 0.25336424378668665 }, { "epoch": 0.9422456757549106, "grad_norm": 0.7926410203151111, "learning_rate": 0.0004709847596717468, "loss": 3.3781723976135254, "step": 1607, "token_acc": 0.2607925445787291 }, { "epoch": 0.9428320140721196, "grad_norm": 0.6707672444765512, "learning_rate": 0.00047127784290738574, "loss": 3.386352777481079, "step": 1608, "token_acc": 0.26079590279484227 }, { "epoch": 0.9434183523893287, "grad_norm": 0.716145505301023, "learning_rate": 0.0004715709261430246, "loss": 3.419933795928955, "step": 1609, "token_acc": 0.2577940441651602 }, { "epoch": 0.9440046907065377, "grad_norm": 0.8430247330819329, "learning_rate": 0.00047186400937866354, "loss": 3.432588577270508, "step": 1610, "token_acc": 0.25608389217120386 }, { "epoch": 0.9445910290237467, "grad_norm": 0.9469236448094432, "learning_rate": 0.00047215709261430247, "loss": 3.398334503173828, "step": 1611, "token_acc": 0.2584604188031361 }, { "epoch": 0.9451773673409557, "grad_norm": 0.9579249439622376, "learning_rate": 0.0004724501758499414, "loss": 3.426422357559204, "step": 1612, "token_acc": 0.25801911469080036 }, { "epoch": 0.9457637056581648, "grad_norm": 0.7695586715347654, "learning_rate": 0.0004727432590855803, "loss": 3.4373886585235596, "step": 1613, "token_acc": 0.2563144605301693 }, { "epoch": 0.9463500439753738, "grad_norm": 0.8098633244935107, "learning_rate": 0.00047303634232121925, "loss": 3.3671505451202393, "step": 1614, "token_acc": 0.26410754023702326 }, { "epoch": 0.9469363822925828, "grad_norm": 0.6710145475248834, "learning_rate": 0.0004733294255568581, "loss": 3.400850772857666, "step": 1615, "token_acc": 0.2604837949137091 }, { "epoch": 0.9475227206097918, "grad_norm": 0.5800965944556575, "learning_rate": 0.00047362250879249705, "loss": 3.3928072452545166, "step": 1616, "token_acc": 0.262466446691352 }, { "epoch": 0.9481090589270009, "grad_norm": 0.6754966391847237, "learning_rate": 0.000473915592028136, "loss": 3.4092674255371094, "step": 1617, "token_acc": 0.25772788868852636 }, { "epoch": 0.9486953972442099, "grad_norm": 0.6339734969168743, "learning_rate": 0.0004742086752637749, "loss": 3.4248099327087402, "step": 1618, "token_acc": 0.25686503674689914 }, { "epoch": 0.9492817355614189, "grad_norm": 0.5920743661390458, "learning_rate": 0.00047450175849941383, "loss": 3.396275520324707, "step": 1619, "token_acc": 0.2600200771384794 }, { "epoch": 0.9498680738786279, "grad_norm": 0.5970969865219967, "learning_rate": 0.00047479484173505276, "loss": 3.44085955619812, "step": 1620, "token_acc": 0.25566467214899774 }, { "epoch": 0.950454412195837, "grad_norm": 0.6481590670443838, "learning_rate": 0.0004750879249706917, "loss": 3.433203935623169, "step": 1621, "token_acc": 0.2551482914916075 }, { "epoch": 0.951040750513046, "grad_norm": 0.7167417366934338, "learning_rate": 0.0004753810082063306, "loss": 3.3610758781433105, "step": 1622, "token_acc": 0.26279211393652363 }, { "epoch": 0.951627088830255, "grad_norm": 0.7010329218212996, "learning_rate": 0.00047567409144196954, "loss": 3.3953728675842285, "step": 1623, "token_acc": 0.2593054258914925 }, { "epoch": 0.952213427147464, "grad_norm": 0.7630775961357231, "learning_rate": 0.00047596717467760847, "loss": 3.417132616043091, "step": 1624, "token_acc": 0.25767371840583153 }, { "epoch": 0.9527997654646732, "grad_norm": 0.7898798427939647, "learning_rate": 0.0004762602579132474, "loss": 3.4229321479797363, "step": 1625, "token_acc": 0.25517000107042065 }, { "epoch": 0.9533861037818822, "grad_norm": 0.8658687412395234, "learning_rate": 0.0004765533411488863, "loss": 3.3890128135681152, "step": 1626, "token_acc": 0.2610182408920889 }, { "epoch": 0.9539724420990912, "grad_norm": 0.954383604278532, "learning_rate": 0.0004768464243845252, "loss": 3.296166181564331, "step": 1627, "token_acc": 0.2732230694201181 }, { "epoch": 0.9545587804163002, "grad_norm": 0.871037437062733, "learning_rate": 0.0004771395076201641, "loss": 3.403876304626465, "step": 1628, "token_acc": 0.2604372744867815 }, { "epoch": 0.9551451187335093, "grad_norm": 0.6887515036189064, "learning_rate": 0.00047743259085580305, "loss": 3.3721160888671875, "step": 1629, "token_acc": 0.2629014008691085 }, { "epoch": 0.9557314570507183, "grad_norm": 0.6502597623294148, "learning_rate": 0.000477725674091442, "loss": 3.4297432899475098, "step": 1630, "token_acc": 0.2571084158991657 }, { "epoch": 0.9563177953679273, "grad_norm": 0.736743551503019, "learning_rate": 0.0004780187573270809, "loss": 3.3795535564422607, "step": 1631, "token_acc": 0.26382471816745073 }, { "epoch": 0.9569041336851363, "grad_norm": 0.7216480645634892, "learning_rate": 0.00047831184056271983, "loss": 3.4060494899749756, "step": 1632, "token_acc": 0.25803727355662714 }, { "epoch": 0.9574904720023454, "grad_norm": 0.688184405778739, "learning_rate": 0.00047860492379835876, "loss": 3.405019998550415, "step": 1633, "token_acc": 0.2596055868475178 }, { "epoch": 0.9580768103195544, "grad_norm": 0.7190072608485956, "learning_rate": 0.0004788980070339977, "loss": 3.447218894958496, "step": 1634, "token_acc": 0.25326800728269544 }, { "epoch": 0.9586631486367634, "grad_norm": 0.7239985510905795, "learning_rate": 0.00047919109026963656, "loss": 3.4083261489868164, "step": 1635, "token_acc": 0.2599820172091127 }, { "epoch": 0.9592494869539725, "grad_norm": 0.8339365726306746, "learning_rate": 0.0004794841735052755, "loss": 3.41027569770813, "step": 1636, "token_acc": 0.2577975645569752 }, { "epoch": 0.9598358252711815, "grad_norm": 1.0206311370767058, "learning_rate": 0.0004797772567409144, "loss": 3.389923095703125, "step": 1637, "token_acc": 0.2631311200951761 }, { "epoch": 0.9604221635883905, "grad_norm": 1.1153323497999141, "learning_rate": 0.00048007033997655334, "loss": 3.37899112701416, "step": 1638, "token_acc": 0.2600477920887954 }, { "epoch": 0.9610085019055995, "grad_norm": 0.9268614356177819, "learning_rate": 0.00048036342321219227, "loss": 3.452530860900879, "step": 1639, "token_acc": 0.25428895726922934 }, { "epoch": 0.9615948402228086, "grad_norm": 0.8818451293908526, "learning_rate": 0.0004806565064478312, "loss": 3.402853488922119, "step": 1640, "token_acc": 0.25927329611212147 }, { "epoch": 0.9621811785400176, "grad_norm": 0.8675753557854642, "learning_rate": 0.0004809495896834701, "loss": 3.4168453216552734, "step": 1641, "token_acc": 0.2570313335472142 }, { "epoch": 0.9627675168572266, "grad_norm": 0.9406236909494683, "learning_rate": 0.00048124267291910905, "loss": 3.468832015991211, "step": 1642, "token_acc": 0.24978093661766138 }, { "epoch": 0.9633538551744356, "grad_norm": 0.7194484908773244, "learning_rate": 0.000481535756154748, "loss": 3.477181911468506, "step": 1643, "token_acc": 0.2500675574631976 }, { "epoch": 0.9639401934916447, "grad_norm": 0.7158929938451088, "learning_rate": 0.00048182883939038685, "loss": 3.4275970458984375, "step": 1644, "token_acc": 0.25635618338903954 }, { "epoch": 0.9645265318088537, "grad_norm": 0.7264616014841921, "learning_rate": 0.0004821219226260258, "loss": 3.381517171859741, "step": 1645, "token_acc": 0.2620527127574895 }, { "epoch": 0.9651128701260627, "grad_norm": 0.7003298576904327, "learning_rate": 0.0004824150058616647, "loss": 3.4201953411102295, "step": 1646, "token_acc": 0.2557165427934221 }, { "epoch": 0.9656992084432717, "grad_norm": 0.6600357159929129, "learning_rate": 0.00048270808909730363, "loss": 3.344597816467285, "step": 1647, "token_acc": 0.26677482325630475 }, { "epoch": 0.9662855467604808, "grad_norm": 0.7301278050131998, "learning_rate": 0.00048300117233294256, "loss": 3.395505905151367, "step": 1648, "token_acc": 0.2616742383786475 }, { "epoch": 0.9668718850776898, "grad_norm": 0.7758075543448638, "learning_rate": 0.0004832942555685815, "loss": 3.427757501602173, "step": 1649, "token_acc": 0.2550704389335196 }, { "epoch": 0.9674582233948988, "grad_norm": 0.8507861698669322, "learning_rate": 0.0004835873388042204, "loss": 3.3657493591308594, "step": 1650, "token_acc": 0.2646349945811294 }, { "epoch": 0.9680445617121078, "grad_norm": 0.8676422858402819, "learning_rate": 0.00048388042203985934, "loss": 3.436417579650879, "step": 1651, "token_acc": 0.2544621086511265 }, { "epoch": 0.968630900029317, "grad_norm": 0.809804243949132, "learning_rate": 0.00048417350527549827, "loss": 3.365145683288574, "step": 1652, "token_acc": 0.2659257237326631 }, { "epoch": 0.969217238346526, "grad_norm": 0.8750343611634581, "learning_rate": 0.0004844665885111372, "loss": 3.426231622695923, "step": 1653, "token_acc": 0.25500453264919865 }, { "epoch": 0.969803576663735, "grad_norm": 0.9551722112847345, "learning_rate": 0.0004847596717467761, "loss": 3.3726325035095215, "step": 1654, "token_acc": 0.26359752614834325 }, { "epoch": 0.970389914980944, "grad_norm": 0.6610719693143526, "learning_rate": 0.00048505275498241505, "loss": 3.3770828247070312, "step": 1655, "token_acc": 0.2626288159179623 }, { "epoch": 0.9709762532981531, "grad_norm": 0.5935609038606166, "learning_rate": 0.0004853458382180539, "loss": 3.385439872741699, "step": 1656, "token_acc": 0.26044661184452567 }, { "epoch": 0.9715625916153621, "grad_norm": 0.7036883488546932, "learning_rate": 0.00048563892145369285, "loss": 3.3625690937042236, "step": 1657, "token_acc": 0.26399290706164596 }, { "epoch": 0.9721489299325711, "grad_norm": 0.8974794960322794, "learning_rate": 0.0004859320046893318, "loss": 3.41567325592041, "step": 1658, "token_acc": 0.25691336367859746 }, { "epoch": 0.9727352682497801, "grad_norm": 0.8198680084145463, "learning_rate": 0.0004862250879249707, "loss": 3.3812437057495117, "step": 1659, "token_acc": 0.26185687643982886 }, { "epoch": 0.9733216065669892, "grad_norm": 0.6649985622623101, "learning_rate": 0.00048651817116060963, "loss": 3.3992867469787598, "step": 1660, "token_acc": 0.2576827028526882 }, { "epoch": 0.9739079448841982, "grad_norm": 0.7137361713751447, "learning_rate": 0.00048681125439624856, "loss": 3.3787105083465576, "step": 1661, "token_acc": 0.26169331429557685 }, { "epoch": 0.9744942832014072, "grad_norm": 0.637235866239349, "learning_rate": 0.00048710433763188743, "loss": 3.364614963531494, "step": 1662, "token_acc": 0.2627127032066321 }, { "epoch": 0.9750806215186162, "grad_norm": 0.5648236779574606, "learning_rate": 0.00048739742086752636, "loss": 3.395259380340576, "step": 1663, "token_acc": 0.2592245025289073 }, { "epoch": 0.9756669598358253, "grad_norm": 0.6271310964666041, "learning_rate": 0.0004876905041031653, "loss": 3.4439775943756104, "step": 1664, "token_acc": 0.25464983302440103 }, { "epoch": 0.9762532981530343, "grad_norm": 0.6723891038383868, "learning_rate": 0.0004879835873388042, "loss": 3.3858578205108643, "step": 1665, "token_acc": 0.259938133388556 }, { "epoch": 0.9768396364702433, "grad_norm": 0.6157248971507705, "learning_rate": 0.00048827667057444314, "loss": 3.4156439304351807, "step": 1666, "token_acc": 0.2571010195871461 }, { "epoch": 0.9774259747874524, "grad_norm": 0.6192836367967911, "learning_rate": 0.0004885697538100821, "loss": 3.386714458465576, "step": 1667, "token_acc": 0.26158883688592266 }, { "epoch": 0.9780123131046614, "grad_norm": 0.7146332695280808, "learning_rate": 0.000488862837045721, "loss": 3.3835902214050293, "step": 1668, "token_acc": 0.26070445542856086 }, { "epoch": 0.9785986514218704, "grad_norm": 0.6593723124701049, "learning_rate": 0.0004891559202813599, "loss": 3.419379234313965, "step": 1669, "token_acc": 0.2575162235246998 }, { "epoch": 0.9791849897390794, "grad_norm": 0.6899664470717639, "learning_rate": 0.0004894490035169988, "loss": 3.4234187602996826, "step": 1670, "token_acc": 0.25621284276434536 }, { "epoch": 0.9797713280562885, "grad_norm": 0.7132113193640718, "learning_rate": 0.0004897420867526378, "loss": 3.325669050216675, "step": 1671, "token_acc": 0.26915408184724654 }, { "epoch": 0.9803576663734975, "grad_norm": 0.7689165626854885, "learning_rate": 0.0004900351699882767, "loss": 3.434572219848633, "step": 1672, "token_acc": 0.25396825396825395 }, { "epoch": 0.9809440046907065, "grad_norm": 0.7958390660551526, "learning_rate": 0.0004903282532239156, "loss": 3.408353805541992, "step": 1673, "token_acc": 0.2560165497617441 }, { "epoch": 0.9815303430079155, "grad_norm": 0.8642436370261324, "learning_rate": 0.0004906213364595546, "loss": 3.4102115631103516, "step": 1674, "token_acc": 0.2596058029532425 }, { "epoch": 0.9821166813251246, "grad_norm": 0.8545379790654077, "learning_rate": 0.0004909144196951935, "loss": 3.4542784690856934, "step": 1675, "token_acc": 0.25245472795165436 }, { "epoch": 0.9827030196423336, "grad_norm": 0.6886905204726381, "learning_rate": 0.0004912075029308324, "loss": 3.367812395095825, "step": 1676, "token_acc": 0.26391417341987683 }, { "epoch": 0.9832893579595426, "grad_norm": 0.7189864992837707, "learning_rate": 0.0004915005861664713, "loss": 3.4203402996063232, "step": 1677, "token_acc": 0.25541509248683375 }, { "epoch": 0.9838756962767516, "grad_norm": 0.8062820180700397, "learning_rate": 0.0004917936694021103, "loss": 3.4165563583374023, "step": 1678, "token_acc": 0.2579511418581393 }, { "epoch": 0.9844620345939608, "grad_norm": 0.7328879590792131, "learning_rate": 0.0004920867526377492, "loss": 3.4020133018493652, "step": 1679, "token_acc": 0.25841710397401657 }, { "epoch": 0.9850483729111698, "grad_norm": 0.744101512580751, "learning_rate": 0.000492379835873388, "loss": 3.381073236465454, "step": 1680, "token_acc": 0.2624654101185193 }, { "epoch": 0.9856347112283788, "grad_norm": 0.8031669352623175, "learning_rate": 0.0004926729191090269, "loss": 3.4260525703430176, "step": 1681, "token_acc": 0.2569296923622934 }, { "epoch": 0.9862210495455878, "grad_norm": 0.7796885413825525, "learning_rate": 0.0004929660023446659, "loss": 3.376763343811035, "step": 1682, "token_acc": 0.26230354358909536 }, { "epoch": 0.9868073878627969, "grad_norm": 0.765091477223617, "learning_rate": 0.0004932590855803048, "loss": 3.430267333984375, "step": 1683, "token_acc": 0.2569303674269375 }, { "epoch": 0.9873937261800059, "grad_norm": 0.8131342256736485, "learning_rate": 0.0004935521688159437, "loss": 3.404228925704956, "step": 1684, "token_acc": 0.2591612865042625 }, { "epoch": 0.9879800644972149, "grad_norm": 0.6887164853520207, "learning_rate": 0.0004938452520515826, "loss": 3.395916223526001, "step": 1685, "token_acc": 0.2582110421430905 }, { "epoch": 0.9885664028144239, "grad_norm": 0.683908218051115, "learning_rate": 0.0004941383352872216, "loss": 3.386446952819824, "step": 1686, "token_acc": 0.2630170465612051 }, { "epoch": 0.989152741131633, "grad_norm": 0.82379347470404, "learning_rate": 0.0004944314185228605, "loss": 3.3611955642700195, "step": 1687, "token_acc": 0.262778975790798 }, { "epoch": 0.989739079448842, "grad_norm": 0.7358622119024149, "learning_rate": 0.0004947245017584994, "loss": 3.413051128387451, "step": 1688, "token_acc": 0.25815074871289884 }, { "epoch": 0.990325417766051, "grad_norm": 0.5960150507073257, "learning_rate": 0.0004950175849941384, "loss": 3.4064674377441406, "step": 1689, "token_acc": 0.2595074463454207 }, { "epoch": 0.99091175608326, "grad_norm": 0.5853059987316303, "learning_rate": 0.0004953106682297773, "loss": 3.3748228549957275, "step": 1690, "token_acc": 0.26273726540838127 }, { "epoch": 0.9914980944004691, "grad_norm": 0.708395783955718, "learning_rate": 0.0004956037514654162, "loss": 3.39245867729187, "step": 1691, "token_acc": 0.26029490391772464 }, { "epoch": 0.9920844327176781, "grad_norm": 0.7771210564653088, "learning_rate": 0.0004958968347010551, "loss": 3.4332962036132812, "step": 1692, "token_acc": 0.2545539516316423 }, { "epoch": 0.9926707710348871, "grad_norm": 0.7487859833065365, "learning_rate": 0.0004961899179366941, "loss": 3.3970022201538086, "step": 1693, "token_acc": 0.2584394604345602 }, { "epoch": 0.9932571093520962, "grad_norm": 0.7307276119763373, "learning_rate": 0.000496483001172333, "loss": 3.402738094329834, "step": 1694, "token_acc": 0.2589121818800597 }, { "epoch": 0.9938434476693052, "grad_norm": 0.6769403752420412, "learning_rate": 0.0004967760844079719, "loss": 3.3789596557617188, "step": 1695, "token_acc": 0.2631026635090789 }, { "epoch": 0.9944297859865142, "grad_norm": 0.7209543302853496, "learning_rate": 0.0004970691676436108, "loss": 3.346503734588623, "step": 1696, "token_acc": 0.2656507929735107 }, { "epoch": 0.9950161243037232, "grad_norm": 0.7222250086788583, "learning_rate": 0.0004973622508792498, "loss": 3.394810676574707, "step": 1697, "token_acc": 0.25921117592203186 }, { "epoch": 0.9956024626209323, "grad_norm": 0.7763937070218895, "learning_rate": 0.0004976553341148886, "loss": 3.4150874614715576, "step": 1698, "token_acc": 0.25711403527255106 }, { "epoch": 0.9961888009381413, "grad_norm": 0.8701727333777808, "learning_rate": 0.0004979484173505275, "loss": 3.4142632484436035, "step": 1699, "token_acc": 0.2567015464493374 }, { "epoch": 0.9967751392553503, "grad_norm": 0.8057586007862209, "learning_rate": 0.0004982415005861664, "loss": 3.3474903106689453, "step": 1700, "token_acc": 0.2655471289274106 }, { "epoch": 0.9973614775725593, "grad_norm": 0.8036537891855855, "learning_rate": 0.0004985345838218054, "loss": 3.364363670349121, "step": 1701, "token_acc": 0.2631264529032091 }, { "epoch": 0.9979478158897684, "grad_norm": 0.7338803456436668, "learning_rate": 0.0004988276670574443, "loss": 3.3639016151428223, "step": 1702, "token_acc": 0.26256075722691224 }, { "epoch": 0.9985341542069774, "grad_norm": 0.7603604772565937, "learning_rate": 0.0004991207502930832, "loss": 3.407386064529419, "step": 1703, "token_acc": 0.26093307558615664 }, { "epoch": 0.9991204925241864, "grad_norm": 0.670563794799557, "learning_rate": 0.0004994138335287222, "loss": 3.3578433990478516, "step": 1704, "token_acc": 0.2657139079783387 }, { "epoch": 0.9997068308413954, "grad_norm": 0.7924571457431127, "learning_rate": 0.0004997069167643611, "loss": 3.4050941467285156, "step": 1705, "token_acc": 0.2587272240085745 }, { "epoch": 1.0, "grad_norm": 0.7857934700198668, "learning_rate": 0.0005, "loss": 3.3149847984313965, "step": 1706, "token_acc": 0.27139554777083036 }, { "epoch": 1.0, "eval_loss": 3.363898515701294, "eval_runtime": 6.3304, "eval_samples_per_second": 40.44, "eval_steps_per_second": 5.055, "eval_token_acc": 0.26314017610325324, "step": 1706 }, { "epoch": 1.0005863383172091, "grad_norm": 0.6748829955942852, "learning_rate": 0.0004999999988257934, "loss": 3.3930788040161133, "step": 1707, "token_acc": 0.2604787022109171 }, { "epoch": 1.001172676634418, "grad_norm": 0.6337504746544477, "learning_rate": 0.0004999999953031737, "loss": 3.3502395153045654, "step": 1708, "token_acc": 0.2652086723321074 }, { "epoch": 1.0017590149516271, "grad_norm": 0.6047408353877396, "learning_rate": 0.000499999989432141, "loss": 3.381061553955078, "step": 1709, "token_acc": 0.2618050757872131 }, { "epoch": 1.0023453532688362, "grad_norm": 0.7720425176929202, "learning_rate": 0.0004999999812126953, "loss": 3.3666725158691406, "step": 1710, "token_acc": 0.263822899024266 }, { "epoch": 1.0029316915860451, "grad_norm": 0.7813815613616496, "learning_rate": 0.0004999999706448365, "loss": 3.3831634521484375, "step": 1711, "token_acc": 0.2616015886255239 }, { "epoch": 1.0035180299032542, "grad_norm": 0.8105809824926559, "learning_rate": 0.000499999957728565, "loss": 3.475907802581787, "step": 1712, "token_acc": 0.2525038961038961 }, { "epoch": 1.0041043682204631, "grad_norm": 0.749777924398785, "learning_rate": 0.0004999999424638807, "loss": 3.3729724884033203, "step": 1713, "token_acc": 0.26232628973919664 }, { "epoch": 1.0046907065376722, "grad_norm": 0.7139398521142178, "learning_rate": 0.0004999999248507838, "loss": 3.368234634399414, "step": 1714, "token_acc": 0.26018118033426374 }, { "epoch": 1.0052770448548813, "grad_norm": 0.7397609215524301, "learning_rate": 0.0004999999048892746, "loss": 3.3365745544433594, "step": 1715, "token_acc": 0.2675504364707075 }, { "epoch": 1.0058633831720902, "grad_norm": 0.6976681068583725, "learning_rate": 0.0004999998825793531, "loss": 3.3686835765838623, "step": 1716, "token_acc": 0.26274320375664184 }, { "epoch": 1.0064497214892993, "grad_norm": 0.7475038689031207, "learning_rate": 0.0004999998579210196, "loss": 3.388590097427368, "step": 1717, "token_acc": 0.26067491111944596 }, { "epoch": 1.0070360598065085, "grad_norm": 0.7938229829548396, "learning_rate": 0.0004999998309142742, "loss": 3.3868818283081055, "step": 1718, "token_acc": 0.2588959239199532 }, { "epoch": 1.0076223981237173, "grad_norm": 0.7568816544778835, "learning_rate": 0.0004999998015591174, "loss": 3.3844709396362305, "step": 1719, "token_acc": 0.2609687949898292 }, { "epoch": 1.0082087364409265, "grad_norm": 0.8196030834242516, "learning_rate": 0.0004999997698555493, "loss": 3.3978371620178223, "step": 1720, "token_acc": 0.2597024001621623 }, { "epoch": 1.0087950747581353, "grad_norm": 0.767341805870375, "learning_rate": 0.0004999997358035703, "loss": 3.399653911590576, "step": 1721, "token_acc": 0.2593581171436213 }, { "epoch": 1.0093814130753445, "grad_norm": 0.6071678324713047, "learning_rate": 0.0004999996994031805, "loss": 3.3867225646972656, "step": 1722, "token_acc": 0.26138374753513005 }, { "epoch": 1.0099677513925536, "grad_norm": 0.6668094674908489, "learning_rate": 0.0004999996606543806, "loss": 3.330446720123291, "step": 1723, "token_acc": 0.2656608654059413 }, { "epoch": 1.0105540897097625, "grad_norm": 0.7300729028364705, "learning_rate": 0.0004999996195571706, "loss": 3.4130914211273193, "step": 1724, "token_acc": 0.25771057544115755 }, { "epoch": 1.0111404280269716, "grad_norm": 0.7227177106444123, "learning_rate": 0.0004999995761115511, "loss": 3.379822254180908, "step": 1725, "token_acc": 0.26179503766392764 }, { "epoch": 1.0117267663441807, "grad_norm": 0.7187443263441782, "learning_rate": 0.0004999995303175225, "loss": 3.3946266174316406, "step": 1726, "token_acc": 0.25713018837152407 }, { "epoch": 1.0123131046613896, "grad_norm": 0.6451969883365569, "learning_rate": 0.0004999994821750852, "loss": 3.3877639770507812, "step": 1727, "token_acc": 0.25795602563434583 }, { "epoch": 1.0128994429785987, "grad_norm": 0.6142002800653262, "learning_rate": 0.0004999994316842397, "loss": 3.412135124206543, "step": 1728, "token_acc": 0.25619754965946645 }, { "epoch": 1.0134857812958076, "grad_norm": 0.5043742488623404, "learning_rate": 0.0004999993788449863, "loss": 3.360666275024414, "step": 1729, "token_acc": 0.2632796036634787 }, { "epoch": 1.0140721196130167, "grad_norm": 0.5947335928966146, "learning_rate": 0.0004999993236573257, "loss": 3.393095016479492, "step": 1730, "token_acc": 0.25890385388924964 }, { "epoch": 1.0146584579302258, "grad_norm": 0.6515982104663784, "learning_rate": 0.0004999992661212583, "loss": 3.325471878051758, "step": 1731, "token_acc": 0.26492156221913554 }, { "epoch": 1.0152447962474347, "grad_norm": 0.5735870934063698, "learning_rate": 0.0004999992062367846, "loss": 3.331839084625244, "step": 1732, "token_acc": 0.2659748888405879 }, { "epoch": 1.0158311345646438, "grad_norm": 0.6803176211640519, "learning_rate": 0.0004999991440039054, "loss": 3.292379856109619, "step": 1733, "token_acc": 0.271922122043643 }, { "epoch": 1.016417472881853, "grad_norm": 0.7130285588326162, "learning_rate": 0.0004999990794226209, "loss": 3.376286506652832, "step": 1734, "token_acc": 0.2602660452968364 }, { "epoch": 1.0170038111990618, "grad_norm": 0.7155190872456177, "learning_rate": 0.000499999012492932, "loss": 3.3886499404907227, "step": 1735, "token_acc": 0.2611530302206128 }, { "epoch": 1.017590149516271, "grad_norm": 0.667956458513744, "learning_rate": 0.0004999989432148393, "loss": 3.348695755004883, "step": 1736, "token_acc": 0.2645891064131046 }, { "epoch": 1.01817648783348, "grad_norm": 0.6093231373645646, "learning_rate": 0.0004999988715883435, "loss": 3.400038242340088, "step": 1737, "token_acc": 0.2591881639870949 }, { "epoch": 1.018762826150689, "grad_norm": 0.5906252342828342, "learning_rate": 0.000499998797613445, "loss": 3.351621150970459, "step": 1738, "token_acc": 0.26448133972273813 }, { "epoch": 1.019349164467898, "grad_norm": 0.7274545561441506, "learning_rate": 0.0004999987212901448, "loss": 3.3263134956359863, "step": 1739, "token_acc": 0.2667792889674464 }, { "epoch": 1.019935502785107, "grad_norm": 0.701253329179986, "learning_rate": 0.0004999986426184435, "loss": 3.3744101524353027, "step": 1740, "token_acc": 0.26211195604908416 }, { "epoch": 1.020521841102316, "grad_norm": 0.6769895553211768, "learning_rate": 0.0004999985615983418, "loss": 3.354360818862915, "step": 1741, "token_acc": 0.26484586120949755 }, { "epoch": 1.0211081794195251, "grad_norm": 0.76944864621591, "learning_rate": 0.0004999984782298404, "loss": 3.368668556213379, "step": 1742, "token_acc": 0.2641847250700224 }, { "epoch": 1.021694517736734, "grad_norm": 0.7947936730781323, "learning_rate": 0.0004999983925129403, "loss": 3.432093620300293, "step": 1743, "token_acc": 0.2538197183851945 }, { "epoch": 1.0222808560539431, "grad_norm": 0.6766084274368989, "learning_rate": 0.0004999983044476421, "loss": 3.357813835144043, "step": 1744, "token_acc": 0.26502710428389326 }, { "epoch": 1.0228671943711523, "grad_norm": 0.666759444314078, "learning_rate": 0.0004999982140339468, "loss": 3.4198641777038574, "step": 1745, "token_acc": 0.25719278747779845 }, { "epoch": 1.0234535326883611, "grad_norm": 0.6746485385187634, "learning_rate": 0.0004999981212718551, "loss": 3.354325771331787, "step": 1746, "token_acc": 0.26598047905777833 }, { "epoch": 1.0240398710055703, "grad_norm": 0.8935724371046717, "learning_rate": 0.0004999980261613678, "loss": 3.3523499965667725, "step": 1747, "token_acc": 0.2646181423802162 }, { "epoch": 1.0246262093227791, "grad_norm": 0.9342917892161134, "learning_rate": 0.0004999979287024861, "loss": 3.362705707550049, "step": 1748, "token_acc": 0.2627683119527913 }, { "epoch": 1.0252125476399883, "grad_norm": 0.777772479521273, "learning_rate": 0.0004999978288952106, "loss": 3.3829448223114014, "step": 1749, "token_acc": 0.2608665723354622 }, { "epoch": 1.0257988859571974, "grad_norm": 0.5551377629841278, "learning_rate": 0.0004999977267395424, "loss": 3.368907928466797, "step": 1750, "token_acc": 0.26152975499835907 }, { "epoch": 1.0263852242744063, "grad_norm": 0.559240511469829, "learning_rate": 0.0004999976222354825, "loss": 3.3463454246520996, "step": 1751, "token_acc": 0.2652679739964385 }, { "epoch": 1.0269715625916154, "grad_norm": 0.6133721785415677, "learning_rate": 0.0004999975153830319, "loss": 3.345900535583496, "step": 1752, "token_acc": 0.26380465151198274 }, { "epoch": 1.0275579009088245, "grad_norm": 0.6599106867148942, "learning_rate": 0.0004999974061821914, "loss": 3.3614983558654785, "step": 1753, "token_acc": 0.26276796224642235 }, { "epoch": 1.0281442392260334, "grad_norm": 0.6571596715306742, "learning_rate": 0.0004999972946329621, "loss": 3.3581602573394775, "step": 1754, "token_acc": 0.2650858522505024 }, { "epoch": 1.0287305775432425, "grad_norm": 0.709919111647143, "learning_rate": 0.0004999971807353452, "loss": 3.3908140659332275, "step": 1755, "token_acc": 0.2572123102325368 }, { "epoch": 1.0293169158604514, "grad_norm": 0.569630564184072, "learning_rate": 0.0004999970644893416, "loss": 3.356912612915039, "step": 1756, "token_acc": 0.26457645032386196 }, { "epoch": 1.0299032541776605, "grad_norm": 0.607527295907633, "learning_rate": 0.0004999969458949524, "loss": 3.3633108139038086, "step": 1757, "token_acc": 0.26201357811674375 }, { "epoch": 1.0304895924948696, "grad_norm": 0.6859989890742458, "learning_rate": 0.0004999968249521789, "loss": 3.333763599395752, "step": 1758, "token_acc": 0.26654568139284296 }, { "epoch": 1.0310759308120785, "grad_norm": 0.6080341757915679, "learning_rate": 0.000499996701661022, "loss": 3.3805861473083496, "step": 1759, "token_acc": 0.26147884947073846 }, { "epoch": 1.0316622691292876, "grad_norm": 0.7174563110958333, "learning_rate": 0.0004999965760214831, "loss": 3.369626998901367, "step": 1760, "token_acc": 0.26097056705461474 }, { "epoch": 1.0322486074464967, "grad_norm": 0.8150131382092486, "learning_rate": 0.000499996448033563, "loss": 3.377744197845459, "step": 1761, "token_acc": 0.26109926365987923 }, { "epoch": 1.0328349457637056, "grad_norm": 1.0471528401807257, "learning_rate": 0.0004999963176972634, "loss": 3.3738291263580322, "step": 1762, "token_acc": 0.26174884027748224 }, { "epoch": 1.0334212840809147, "grad_norm": 0.9328608970303516, "learning_rate": 0.0004999961850125852, "loss": 3.3788812160491943, "step": 1763, "token_acc": 0.2606408762285867 }, { "epoch": 1.0340076223981236, "grad_norm": 0.7065766123746435, "learning_rate": 0.0004999960499795296, "loss": 3.376729965209961, "step": 1764, "token_acc": 0.2613145549608382 }, { "epoch": 1.0345939607153327, "grad_norm": 0.5407566341838578, "learning_rate": 0.000499995912598098, "loss": 3.3315343856811523, "step": 1765, "token_acc": 0.2660919420627653 }, { "epoch": 1.0351802990325418, "grad_norm": 0.621189182172432, "learning_rate": 0.0004999957728682918, "loss": 3.323065757751465, "step": 1766, "token_acc": 0.2681113218883895 }, { "epoch": 1.0357666373497507, "grad_norm": 0.5422461458584718, "learning_rate": 0.0004999956307901121, "loss": 3.3605518341064453, "step": 1767, "token_acc": 0.2646861190383962 }, { "epoch": 1.0363529756669598, "grad_norm": 0.6210377597270093, "learning_rate": 0.0004999954863635604, "loss": 3.394392490386963, "step": 1768, "token_acc": 0.25997867092162785 }, { "epoch": 1.036939313984169, "grad_norm": 0.6425091949025237, "learning_rate": 0.0004999953395886378, "loss": 3.328765869140625, "step": 1769, "token_acc": 0.2654887864050384 }, { "epoch": 1.0375256523013778, "grad_norm": 0.6271117122961921, "learning_rate": 0.000499995190465346, "loss": 3.3462252616882324, "step": 1770, "token_acc": 0.2639479486828194 }, { "epoch": 1.038111990618587, "grad_norm": 0.7426235276624706, "learning_rate": 0.0004999950389936862, "loss": 3.375129461288452, "step": 1771, "token_acc": 0.26430534033049885 }, { "epoch": 1.038698328935796, "grad_norm": 0.7971650133320137, "learning_rate": 0.00049999488517366, "loss": 3.4017906188964844, "step": 1772, "token_acc": 0.2572413774625774 }, { "epoch": 1.039284667253005, "grad_norm": 0.7147371173369438, "learning_rate": 0.0004999947290052686, "loss": 3.390611171722412, "step": 1773, "token_acc": 0.25935235920248884 }, { "epoch": 1.039871005570214, "grad_norm": 0.6193729362901955, "learning_rate": 0.0004999945704885137, "loss": 3.3345460891723633, "step": 1774, "token_acc": 0.2665105683493924 }, { "epoch": 1.040457343887423, "grad_norm": 0.7206153699453736, "learning_rate": 0.0004999944096233966, "loss": 3.367556095123291, "step": 1775, "token_acc": 0.2627952338019445 }, { "epoch": 1.041043682204632, "grad_norm": 0.7897275617631395, "learning_rate": 0.000499994246409919, "loss": 3.400113821029663, "step": 1776, "token_acc": 0.25870588298139435 }, { "epoch": 1.0416300205218412, "grad_norm": 0.6176379273788828, "learning_rate": 0.0004999940808480822, "loss": 3.366276502609253, "step": 1777, "token_acc": 0.26104859825020754 }, { "epoch": 1.04221635883905, "grad_norm": 0.530185348259988, "learning_rate": 0.0004999939129378878, "loss": 3.3744144439697266, "step": 1778, "token_acc": 0.2620523319404325 }, { "epoch": 1.0428026971562592, "grad_norm": 0.6424307231782191, "learning_rate": 0.0004999937426793376, "loss": 3.3443009853363037, "step": 1779, "token_acc": 0.2643695140060487 }, { "epoch": 1.0433890354734683, "grad_norm": 0.787044190861387, "learning_rate": 0.0004999935700724332, "loss": 3.3889405727386475, "step": 1780, "token_acc": 0.2584142390748931 }, { "epoch": 1.0439753737906772, "grad_norm": 0.8824838370481669, "learning_rate": 0.0004999933951171759, "loss": 3.375654697418213, "step": 1781, "token_acc": 0.259952274823826 }, { "epoch": 1.0445617121078863, "grad_norm": 0.8935646686320523, "learning_rate": 0.0004999932178135675, "loss": 3.3010785579681396, "step": 1782, "token_acc": 0.2697119190866822 }, { "epoch": 1.0451480504250952, "grad_norm": 0.8487326770320669, "learning_rate": 0.0004999930381616097, "loss": 3.3687915802001953, "step": 1783, "token_acc": 0.2626485251181288 }, { "epoch": 1.0457343887423043, "grad_norm": 0.6954601614095431, "learning_rate": 0.0004999928561613042, "loss": 3.323179006576538, "step": 1784, "token_acc": 0.2681812605673993 }, { "epoch": 1.0463207270595134, "grad_norm": 0.5976900116670606, "learning_rate": 0.0004999926718126527, "loss": 3.3723034858703613, "step": 1785, "token_acc": 0.26233275576805326 }, { "epoch": 1.0469070653767223, "grad_norm": 0.6694147867330238, "learning_rate": 0.000499992485115657, "loss": 3.3868842124938965, "step": 1786, "token_acc": 0.2607573843631575 }, { "epoch": 1.0474934036939314, "grad_norm": 0.5973223226217586, "learning_rate": 0.0004999922960703186, "loss": 3.4020771980285645, "step": 1787, "token_acc": 0.2592431174104918 }, { "epoch": 1.0480797420111405, "grad_norm": 0.6068263756477659, "learning_rate": 0.0004999921046766395, "loss": 3.367609977722168, "step": 1788, "token_acc": 0.26299552035317797 }, { "epoch": 1.0486660803283494, "grad_norm": 0.6786263449527892, "learning_rate": 0.0004999919109346214, "loss": 3.3985230922698975, "step": 1789, "token_acc": 0.25843748705271447 }, { "epoch": 1.0492524186455585, "grad_norm": 0.5951488449789288, "learning_rate": 0.0004999917148442663, "loss": 3.3365986347198486, "step": 1790, "token_acc": 0.26665637597139524 }, { "epoch": 1.0498387569627674, "grad_norm": 0.7109334805720633, "learning_rate": 0.0004999915164055759, "loss": 3.3945555686950684, "step": 1791, "token_acc": 0.2583650645719611 }, { "epoch": 1.0504250952799765, "grad_norm": 0.7271284886524519, "learning_rate": 0.000499991315618552, "loss": 3.371025562286377, "step": 1792, "token_acc": 0.2617175269309693 }, { "epoch": 1.0510114335971856, "grad_norm": 0.5688838521807622, "learning_rate": 0.0004999911124831967, "loss": 3.319046974182129, "step": 1793, "token_acc": 0.2682160262215897 }, { "epoch": 1.0515977719143945, "grad_norm": 0.641690404391398, "learning_rate": 0.0004999909069995116, "loss": 3.379556179046631, "step": 1794, "token_acc": 0.26156531428810914 }, { "epoch": 1.0521841102316036, "grad_norm": 0.6572637441109637, "learning_rate": 0.0004999906991674988, "loss": 3.3643383979797363, "step": 1795, "token_acc": 0.26225160674648995 }, { "epoch": 1.0527704485488127, "grad_norm": 0.549759502075547, "learning_rate": 0.0004999904889871603, "loss": 3.34519100189209, "step": 1796, "token_acc": 0.26352962639596555 }, { "epoch": 1.0533567868660216, "grad_norm": 0.5434664415730925, "learning_rate": 0.000499990276458498, "loss": 3.3356029987335205, "step": 1797, "token_acc": 0.26516858043306696 }, { "epoch": 1.0539431251832307, "grad_norm": 0.5761478692168411, "learning_rate": 0.0004999900615815139, "loss": 3.383047103881836, "step": 1798, "token_acc": 0.26080178173719376 }, { "epoch": 1.0545294635004399, "grad_norm": 0.5726446208156213, "learning_rate": 0.0004999898443562101, "loss": 3.344855308532715, "step": 1799, "token_acc": 0.2655866823643122 }, { "epoch": 1.0551158018176487, "grad_norm": 0.5781232852428487, "learning_rate": 0.0004999896247825885, "loss": 3.331421375274658, "step": 1800, "token_acc": 0.2655137388647701 }, { "epoch": 1.0557021401348579, "grad_norm": 0.6298368392090262, "learning_rate": 0.0004999894028606514, "loss": 3.390155553817749, "step": 1801, "token_acc": 0.2595282528063859 }, { "epoch": 1.0562884784520667, "grad_norm": 0.574812584160153, "learning_rate": 0.0004999891785904004, "loss": 3.3871545791625977, "step": 1802, "token_acc": 0.25976305514689135 }, { "epoch": 1.0568748167692759, "grad_norm": 0.6317646322221564, "learning_rate": 0.0004999889519718382, "loss": 3.388237476348877, "step": 1803, "token_acc": 0.2582999890411545 }, { "epoch": 1.057461155086485, "grad_norm": 0.6900534822495569, "learning_rate": 0.0004999887230049667, "loss": 3.375983953475952, "step": 1804, "token_acc": 0.26143238539886926 }, { "epoch": 1.0580474934036939, "grad_norm": 0.8767784815629192, "learning_rate": 0.0004999884916897879, "loss": 3.3584787845611572, "step": 1805, "token_acc": 0.26349800873603574 }, { "epoch": 1.058633831720903, "grad_norm": 1.007099225192306, "learning_rate": 0.000499988258026304, "loss": 3.3546409606933594, "step": 1806, "token_acc": 0.262421111459455 }, { "epoch": 1.059220170038112, "grad_norm": 0.8832702286904426, "learning_rate": 0.0004999880220145174, "loss": 3.3483357429504395, "step": 1807, "token_acc": 0.2629402200508295 }, { "epoch": 1.059806508355321, "grad_norm": 0.6009907201765745, "learning_rate": 0.0004999877836544302, "loss": 3.338658571243286, "step": 1808, "token_acc": 0.266400339185691 }, { "epoch": 1.06039284667253, "grad_norm": 0.654329565064158, "learning_rate": 0.0004999875429460446, "loss": 3.372558116912842, "step": 1809, "token_acc": 0.2592413487245147 }, { "epoch": 1.060979184989739, "grad_norm": 0.6799726831288975, "learning_rate": 0.0004999872998893628, "loss": 3.3641517162323, "step": 1810, "token_acc": 0.26096974502615644 }, { "epoch": 1.061565523306948, "grad_norm": 0.6776127141225181, "learning_rate": 0.0004999870544843874, "loss": 3.367945671081543, "step": 1811, "token_acc": 0.26141845321862356 }, { "epoch": 1.0621518616241572, "grad_norm": 0.6440756717176906, "learning_rate": 0.0004999868067311204, "loss": 3.348313331604004, "step": 1812, "token_acc": 0.26333283761606957 }, { "epoch": 1.062738199941366, "grad_norm": 0.6499229999172275, "learning_rate": 0.0004999865566295642, "loss": 3.3412609100341797, "step": 1813, "token_acc": 0.2657629102257766 }, { "epoch": 1.0633245382585752, "grad_norm": 0.5763340880916704, "learning_rate": 0.000499986304179721, "loss": 3.3790879249572754, "step": 1814, "token_acc": 0.2619082930343173 }, { "epoch": 1.0639108765757843, "grad_norm": 0.5513246032993949, "learning_rate": 0.0004999860493815935, "loss": 3.362651824951172, "step": 1815, "token_acc": 0.263337948139556 }, { "epoch": 1.0644972148929932, "grad_norm": 0.5268377542252404, "learning_rate": 0.0004999857922351839, "loss": 3.3605196475982666, "step": 1816, "token_acc": 0.26262203413911384 }, { "epoch": 1.0650835532102023, "grad_norm": 0.6251928919287948, "learning_rate": 0.0004999855327404947, "loss": 3.3256547451019287, "step": 1817, "token_acc": 0.2667386778127397 }, { "epoch": 1.0656698915274112, "grad_norm": 0.7228491988503415, "learning_rate": 0.0004999852708975283, "loss": 3.36371111869812, "step": 1818, "token_acc": 0.2629876962719242 }, { "epoch": 1.0662562298446203, "grad_norm": 0.5326920518090941, "learning_rate": 0.000499985006706287, "loss": 3.371650218963623, "step": 1819, "token_acc": 0.2576653202271892 }, { "epoch": 1.0668425681618294, "grad_norm": 0.5529386787284536, "learning_rate": 0.0004999847401667734, "loss": 3.371994972229004, "step": 1820, "token_acc": 0.2615316914843285 }, { "epoch": 1.0674289064790383, "grad_norm": 0.5786759343693647, "learning_rate": 0.0004999844712789902, "loss": 3.367558479309082, "step": 1821, "token_acc": 0.26153073724166576 }, { "epoch": 1.0680152447962474, "grad_norm": 0.5424333065781486, "learning_rate": 0.0004999842000429395, "loss": 3.367112874984741, "step": 1822, "token_acc": 0.2620458120805028 }, { "epoch": 1.0686015831134565, "grad_norm": 0.5202329588101864, "learning_rate": 0.0004999839264586243, "loss": 3.329667091369629, "step": 1823, "token_acc": 0.26598960158743123 }, { "epoch": 1.0691879214306654, "grad_norm": 0.5689297057917363, "learning_rate": 0.0004999836505260469, "loss": 3.3664710521698, "step": 1824, "token_acc": 0.26191230074395677 }, { "epoch": 1.0697742597478745, "grad_norm": 0.6323169839214782, "learning_rate": 0.0004999833722452101, "loss": 3.3379154205322266, "step": 1825, "token_acc": 0.2655530891697525 }, { "epoch": 1.0703605980650837, "grad_norm": 0.5124151991894181, "learning_rate": 0.0004999830916161162, "loss": 3.316084146499634, "step": 1826, "token_acc": 0.2683226797016983 }, { "epoch": 1.0709469363822925, "grad_norm": 0.5598974165907181, "learning_rate": 0.0004999828086387681, "loss": 3.2998197078704834, "step": 1827, "token_acc": 0.270402033210165 }, { "epoch": 1.0715332746995017, "grad_norm": 0.6750139041590645, "learning_rate": 0.0004999825233131684, "loss": 3.3208088874816895, "step": 1828, "token_acc": 0.265082214812915 }, { "epoch": 1.0721196130167105, "grad_norm": 0.7644314555676089, "learning_rate": 0.0004999822356393196, "loss": 3.369450569152832, "step": 1829, "token_acc": 0.25960608910278193 }, { "epoch": 1.0727059513339197, "grad_norm": 0.8627228572789439, "learning_rate": 0.0004999819456172246, "loss": 3.406212329864502, "step": 1830, "token_acc": 0.25401668360281865 }, { "epoch": 1.0732922896511288, "grad_norm": 0.7150873148017909, "learning_rate": 0.0004999816532468862, "loss": 3.3242688179016113, "step": 1831, "token_acc": 0.2687652418103312 }, { "epoch": 1.0738786279683377, "grad_norm": 0.5205099399792226, "learning_rate": 0.0004999813585283069, "loss": 3.2824041843414307, "step": 1832, "token_acc": 0.27185962055895924 }, { "epoch": 1.0744649662855468, "grad_norm": 0.48907368406473145, "learning_rate": 0.0004999810614614897, "loss": 3.332336902618408, "step": 1833, "token_acc": 0.265657668903105 }, { "epoch": 1.0750513046027559, "grad_norm": 0.5768644799840614, "learning_rate": 0.0004999807620464371, "loss": 3.312974691390991, "step": 1834, "token_acc": 0.2683395486341777 }, { "epoch": 1.0756376429199648, "grad_norm": 0.6864426625179267, "learning_rate": 0.0004999804602831522, "loss": 3.3786778450012207, "step": 1835, "token_acc": 0.2609175577785443 }, { "epoch": 1.0762239812371739, "grad_norm": 0.6407044114194256, "learning_rate": 0.0004999801561716378, "loss": 3.327113151550293, "step": 1836, "token_acc": 0.26692839676485003 }, { "epoch": 1.0768103195543828, "grad_norm": 0.6681801364819084, "learning_rate": 0.0004999798497118966, "loss": 3.3837780952453613, "step": 1837, "token_acc": 0.26060003444004376 }, { "epoch": 1.077396657871592, "grad_norm": 0.6652683721978996, "learning_rate": 0.0004999795409039316, "loss": 3.3408524990081787, "step": 1838, "token_acc": 0.26348956495779846 }, { "epoch": 1.077982996188801, "grad_norm": 0.6039194938169614, "learning_rate": 0.0004999792297477457, "loss": 3.3619892597198486, "step": 1839, "token_acc": 0.2623747790218032 }, { "epoch": 1.07856933450601, "grad_norm": 0.547788902364413, "learning_rate": 0.0004999789162433417, "loss": 3.3704473972320557, "step": 1840, "token_acc": 0.26020085208409227 }, { "epoch": 1.079155672823219, "grad_norm": 0.6651554678105396, "learning_rate": 0.0004999786003907226, "loss": 3.3197226524353027, "step": 1841, "token_acc": 0.2675078050416699 }, { "epoch": 1.0797420111404281, "grad_norm": 0.7083009286521162, "learning_rate": 0.0004999782821898915, "loss": 3.333406925201416, "step": 1842, "token_acc": 0.2653315556874858 }, { "epoch": 1.080328349457637, "grad_norm": 0.5973029569690527, "learning_rate": 0.0004999779616408513, "loss": 3.379715919494629, "step": 1843, "token_acc": 0.2600905616372774 }, { "epoch": 1.0809146877748461, "grad_norm": 0.5147418838862436, "learning_rate": 0.000499977638743605, "loss": 3.347729206085205, "step": 1844, "token_acc": 0.2622559923610282 }, { "epoch": 1.081501026092055, "grad_norm": 0.6218856387475501, "learning_rate": 0.0004999773134981555, "loss": 3.33701753616333, "step": 1845, "token_acc": 0.26537342896624333 }, { "epoch": 1.0820873644092641, "grad_norm": 0.6766762559448689, "learning_rate": 0.0004999769859045061, "loss": 3.293914794921875, "step": 1846, "token_acc": 0.27081108281301347 }, { "epoch": 1.0826737027264732, "grad_norm": 0.6075565064024652, "learning_rate": 0.0004999766559626597, "loss": 3.3183226585388184, "step": 1847, "token_acc": 0.2680923613695326 }, { "epoch": 1.0832600410436821, "grad_norm": 0.5895830466848409, "learning_rate": 0.0004999763236726196, "loss": 3.292226791381836, "step": 1848, "token_acc": 0.2695374329179644 }, { "epoch": 1.0838463793608912, "grad_norm": 0.4498300994789812, "learning_rate": 0.0004999759890343886, "loss": 3.255733013153076, "step": 1849, "token_acc": 0.2766067382638417 }, { "epoch": 1.0844327176781003, "grad_norm": 0.5709093043660186, "learning_rate": 0.0004999756520479701, "loss": 3.348172426223755, "step": 1850, "token_acc": 0.2636486823806298 }, { "epoch": 1.0850190559953092, "grad_norm": 0.5794314239898045, "learning_rate": 0.0004999753127133673, "loss": 3.3362908363342285, "step": 1851, "token_acc": 0.2637277857505098 }, { "epoch": 1.0856053943125183, "grad_norm": 0.6140688818797233, "learning_rate": 0.0004999749710305832, "loss": 3.3446223735809326, "step": 1852, "token_acc": 0.26468104167060175 }, { "epoch": 1.0861917326297275, "grad_norm": 0.5738251792865984, "learning_rate": 0.0004999746269996211, "loss": 3.3634095191955566, "step": 1853, "token_acc": 0.2618142664594124 }, { "epoch": 1.0867780709469363, "grad_norm": 0.6388997649358731, "learning_rate": 0.0004999742806204842, "loss": 3.3830952644348145, "step": 1854, "token_acc": 0.26032733984677164 }, { "epoch": 1.0873644092641455, "grad_norm": 0.9324259492460218, "learning_rate": 0.0004999739318931758, "loss": 3.3028879165649414, "step": 1855, "token_acc": 0.2693825183778949 }, { "epoch": 1.0879507475813543, "grad_norm": 1.0466821770471486, "learning_rate": 0.0004999735808176992, "loss": 3.3617472648620605, "step": 1856, "token_acc": 0.2610474782686597 }, { "epoch": 1.0885370858985635, "grad_norm": 0.6822823226594966, "learning_rate": 0.0004999732273940575, "loss": 3.410457134246826, "step": 1857, "token_acc": 0.2556543242541889 }, { "epoch": 1.0891234242157726, "grad_norm": 0.5957904986127947, "learning_rate": 0.0004999728716222543, "loss": 3.3506553173065186, "step": 1858, "token_acc": 0.26247800134259847 }, { "epoch": 1.0897097625329815, "grad_norm": 0.6466425149601915, "learning_rate": 0.0004999725135022928, "loss": 3.353973150253296, "step": 1859, "token_acc": 0.2618879293331768 }, { "epoch": 1.0902961008501906, "grad_norm": 0.5617310524495857, "learning_rate": 0.0004999721530341764, "loss": 3.3509185314178467, "step": 1860, "token_acc": 0.2619573456493217 }, { "epoch": 1.0908824391673997, "grad_norm": 0.5686563991207016, "learning_rate": 0.0004999717902179083, "loss": 3.406980037689209, "step": 1861, "token_acc": 0.2577925979174264 }, { "epoch": 1.0914687774846086, "grad_norm": 0.5780438723543403, "learning_rate": 0.0004999714250534923, "loss": 3.3280835151672363, "step": 1862, "token_acc": 0.26801147419097987 }, { "epoch": 1.0920551158018177, "grad_norm": 0.5615025690471361, "learning_rate": 0.0004999710575409315, "loss": 3.3526835441589355, "step": 1863, "token_acc": 0.2635024850555529 }, { "epoch": 1.0926414541190266, "grad_norm": 0.6735715924777766, "learning_rate": 0.0004999706876802295, "loss": 3.2892231941223145, "step": 1864, "token_acc": 0.2718837628343871 }, { "epoch": 1.0932277924362357, "grad_norm": 0.7136312296739743, "learning_rate": 0.0004999703154713897, "loss": 3.304704427719116, "step": 1865, "token_acc": 0.26911732136025934 }, { "epoch": 1.0938141307534448, "grad_norm": 0.6296857619462491, "learning_rate": 0.0004999699409144156, "loss": 3.2942373752593994, "step": 1866, "token_acc": 0.2705142660277491 }, { "epoch": 1.0944004690706537, "grad_norm": 0.5584824313959695, "learning_rate": 0.0004999695640093107, "loss": 3.3270745277404785, "step": 1867, "token_acc": 0.2684870080474036 }, { "epoch": 1.0949868073878628, "grad_norm": 0.43891433906760874, "learning_rate": 0.0004999691847560787, "loss": 3.360311508178711, "step": 1868, "token_acc": 0.2626777038438418 }, { "epoch": 1.095573145705072, "grad_norm": 0.5333554592436626, "learning_rate": 0.0004999688031547229, "loss": 3.3480441570281982, "step": 1869, "token_acc": 0.26455563992981673 }, { "epoch": 1.0961594840222808, "grad_norm": 0.6366161160603616, "learning_rate": 0.0004999684192052472, "loss": 3.366748571395874, "step": 1870, "token_acc": 0.2623923753882953 }, { "epoch": 1.09674582233949, "grad_norm": 0.6133990830848296, "learning_rate": 0.000499968032907655, "loss": 3.32450532913208, "step": 1871, "token_acc": 0.2667108760268192 }, { "epoch": 1.0973321606566988, "grad_norm": 0.5519151161152714, "learning_rate": 0.0004999676442619498, "loss": 3.3259434700012207, "step": 1872, "token_acc": 0.266930347960166 }, { "epoch": 1.097918498973908, "grad_norm": 0.6297742425965059, "learning_rate": 0.0004999672532681357, "loss": 3.309068202972412, "step": 1873, "token_acc": 0.2677486830489108 }, { "epoch": 1.098504837291117, "grad_norm": 0.5514107391268099, "learning_rate": 0.0004999668599262159, "loss": 3.3154306411743164, "step": 1874, "token_acc": 0.27020064833872764 }, { "epoch": 1.099091175608326, "grad_norm": 0.4772557412534295, "learning_rate": 0.0004999664642361943, "loss": 3.3363611698150635, "step": 1875, "token_acc": 0.263571445319522 }, { "epoch": 1.099677513925535, "grad_norm": 0.5123964763096657, "learning_rate": 0.0004999660661980746, "loss": 3.3685688972473145, "step": 1876, "token_acc": 0.260429188084458 }, { "epoch": 1.1002638522427441, "grad_norm": 0.5863914556804758, "learning_rate": 0.0004999656658118605, "loss": 3.2840476036071777, "step": 1877, "token_acc": 0.2736421947713758 }, { "epoch": 1.100850190559953, "grad_norm": 0.5891678446157472, "learning_rate": 0.0004999652630775559, "loss": 3.325604200363159, "step": 1878, "token_acc": 0.2674743370235364 }, { "epoch": 1.1014365288771621, "grad_norm": 0.5704012617130357, "learning_rate": 0.0004999648579951645, "loss": 3.423212766647339, "step": 1879, "token_acc": 0.2529669515451143 }, { "epoch": 1.1020228671943713, "grad_norm": 0.4671860749231948, "learning_rate": 0.0004999644505646899, "loss": 3.292140245437622, "step": 1880, "token_acc": 0.27090793847766725 }, { "epoch": 1.1026092055115801, "grad_norm": 0.5532526950883575, "learning_rate": 0.0004999640407861364, "loss": 3.4039430618286133, "step": 1881, "token_acc": 0.2583785466981929 }, { "epoch": 1.1031955438287893, "grad_norm": 0.7362505440893078, "learning_rate": 0.0004999636286595075, "loss": 3.3160059452056885, "step": 1882, "token_acc": 0.2665891730322152 }, { "epoch": 1.1037818821459981, "grad_norm": 0.8003015890089482, "learning_rate": 0.0004999632141848069, "loss": 3.3513550758361816, "step": 1883, "token_acc": 0.26512444616860975 }, { "epoch": 1.1043682204632073, "grad_norm": 0.7668373017060995, "learning_rate": 0.000499962797362039, "loss": 3.393906354904175, "step": 1884, "token_acc": 0.25884256099530584 }, { "epoch": 1.1049545587804164, "grad_norm": 0.690665700931511, "learning_rate": 0.0004999623781912074, "loss": 3.346266746520996, "step": 1885, "token_acc": 0.26421327937376304 }, { "epoch": 1.1055408970976253, "grad_norm": 0.6975919825549942, "learning_rate": 0.000499961956672316, "loss": 3.3419108390808105, "step": 1886, "token_acc": 0.2651025889385862 }, { "epoch": 1.1061272354148344, "grad_norm": 0.6737058837854076, "learning_rate": 0.0004999615328053688, "loss": 3.3344521522521973, "step": 1887, "token_acc": 0.2672704262373105 }, { "epoch": 1.1067135737320435, "grad_norm": 0.6398945191049568, "learning_rate": 0.0004999611065903699, "loss": 3.3697056770324707, "step": 1888, "token_acc": 0.2620787687918469 }, { "epoch": 1.1072999120492524, "grad_norm": 0.6698940892123584, "learning_rate": 0.0004999606780273232, "loss": 3.36635160446167, "step": 1889, "token_acc": 0.26058160720010637 }, { "epoch": 1.1078862503664615, "grad_norm": 0.592741016520332, "learning_rate": 0.0004999602471162329, "loss": 3.3192174434661865, "step": 1890, "token_acc": 0.2680414890439599 }, { "epoch": 1.1084725886836704, "grad_norm": 0.5222829067995212, "learning_rate": 0.0004999598138571027, "loss": 3.3659634590148926, "step": 1891, "token_acc": 0.26195347190771423 }, { "epoch": 1.1090589270008795, "grad_norm": 0.5941686575947177, "learning_rate": 0.000499959378249937, "loss": 3.3365256786346436, "step": 1892, "token_acc": 0.2647146916419458 }, { "epoch": 1.1096452653180886, "grad_norm": 0.5645723417676275, "learning_rate": 0.0004999589402947397, "loss": 3.327695846557617, "step": 1893, "token_acc": 0.2664400041845381 }, { "epoch": 1.1102316036352975, "grad_norm": 0.4825747104994076, "learning_rate": 0.0004999584999915151, "loss": 3.331441879272461, "step": 1894, "token_acc": 0.26707964100093023 }, { "epoch": 1.1108179419525066, "grad_norm": 0.532321136983003, "learning_rate": 0.0004999580573402671, "loss": 3.3440840244293213, "step": 1895, "token_acc": 0.26270350772859197 }, { "epoch": 1.1114042802697157, "grad_norm": 0.5461527069716696, "learning_rate": 0.000499957612341, "loss": 3.3447318077087402, "step": 1896, "token_acc": 0.26430457046531597 }, { "epoch": 1.1119906185869246, "grad_norm": 0.5639976855880591, "learning_rate": 0.000499957164993718, "loss": 3.3305516242980957, "step": 1897, "token_acc": 0.26659962087404737 }, { "epoch": 1.1125769569041337, "grad_norm": 0.6025613452212991, "learning_rate": 0.0004999567152984253, "loss": 3.3148622512817383, "step": 1898, "token_acc": 0.2698700237590978 }, { "epoch": 1.1131632952213426, "grad_norm": 0.6505915523023115, "learning_rate": 0.0004999562632551259, "loss": 3.366891384124756, "step": 1899, "token_acc": 0.26387677236943063 }, { "epoch": 1.1137496335385517, "grad_norm": 0.5666705951303488, "learning_rate": 0.0004999558088638243, "loss": 3.278472900390625, "step": 1900, "token_acc": 0.27175363999484603 }, { "epoch": 1.1143359718557608, "grad_norm": 0.47537337236353827, "learning_rate": 0.0004999553521245247, "loss": 3.3631415367126465, "step": 1901, "token_acc": 0.26305859480138705 }, { "epoch": 1.1149223101729697, "grad_norm": 0.6173271370983678, "learning_rate": 0.0004999548930372314, "loss": 3.3516504764556885, "step": 1902, "token_acc": 0.2643220486268732 }, { "epoch": 1.1155086484901788, "grad_norm": 0.8013410822645078, "learning_rate": 0.0004999544316019488, "loss": 3.30094051361084, "step": 1903, "token_acc": 0.27045977385544673 }, { "epoch": 1.116094986807388, "grad_norm": 0.6951029556078197, "learning_rate": 0.0004999539678186809, "loss": 3.376927137374878, "step": 1904, "token_acc": 0.2605311514522544 }, { "epoch": 1.1166813251245968, "grad_norm": 0.4459216631307039, "learning_rate": 0.0004999535016874325, "loss": 3.2677159309387207, "step": 1905, "token_acc": 0.2740752526470845 }, { "epoch": 1.117267663441806, "grad_norm": 0.6874695730036623, "learning_rate": 0.0004999530332082077, "loss": 3.3685755729675293, "step": 1906, "token_acc": 0.2614025943320186 }, { "epoch": 1.117854001759015, "grad_norm": 1.0556619445726723, "learning_rate": 0.0004999525623810109, "loss": 3.399080276489258, "step": 1907, "token_acc": 0.25721220401435585 }, { "epoch": 1.118440340076224, "grad_norm": 0.8531936734435709, "learning_rate": 0.0004999520892058467, "loss": 3.3394012451171875, "step": 1908, "token_acc": 0.26410148472021716 }, { "epoch": 1.119026678393433, "grad_norm": 0.6192289943454509, "learning_rate": 0.0004999516136827194, "loss": 3.2935938835144043, "step": 1909, "token_acc": 0.2682424447070741 }, { "epoch": 1.119613016710642, "grad_norm": 0.572382491857413, "learning_rate": 0.0004999511358116335, "loss": 3.3077783584594727, "step": 1910, "token_acc": 0.2691926921677334 }, { "epoch": 1.120199355027851, "grad_norm": 0.5894869325414929, "learning_rate": 0.0004999506555925934, "loss": 3.3611085414886475, "step": 1911, "token_acc": 0.26167399275030206 }, { "epoch": 1.1207856933450602, "grad_norm": 0.5529352808543856, "learning_rate": 0.0004999501730256038, "loss": 3.3481454849243164, "step": 1912, "token_acc": 0.26226218509962673 }, { "epoch": 1.121372031662269, "grad_norm": 0.503615500815024, "learning_rate": 0.0004999496881106692, "loss": 3.3238844871520996, "step": 1913, "token_acc": 0.2676295536172396 }, { "epoch": 1.1219583699794782, "grad_norm": 0.592746502645452, "learning_rate": 0.0004999492008477941, "loss": 3.3782854080200195, "step": 1914, "token_acc": 0.26008726398937654 }, { "epoch": 1.1225447082966873, "grad_norm": 0.5593075911734255, "learning_rate": 0.0004999487112369829, "loss": 3.340618133544922, "step": 1915, "token_acc": 0.26365046021027705 }, { "epoch": 1.1231310466138962, "grad_norm": 0.6167612992835367, "learning_rate": 0.0004999482192782405, "loss": 3.3364768028259277, "step": 1916, "token_acc": 0.2636481941992759 }, { "epoch": 1.1237173849311053, "grad_norm": 0.6646407615293147, "learning_rate": 0.0004999477249715713, "loss": 3.3232669830322266, "step": 1917, "token_acc": 0.26646408302424435 }, { "epoch": 1.1243037232483142, "grad_norm": 0.5352018803183902, "learning_rate": 0.0004999472283169801, "loss": 3.3411102294921875, "step": 1918, "token_acc": 0.265033784237244 }, { "epoch": 1.1248900615655233, "grad_norm": 0.6226158618441165, "learning_rate": 0.0004999467293144715, "loss": 3.365734577178955, "step": 1919, "token_acc": 0.2614583032874363 }, { "epoch": 1.1254763998827324, "grad_norm": 0.6410674399817448, "learning_rate": 0.0004999462279640501, "loss": 3.3511767387390137, "step": 1920, "token_acc": 0.26362318879547003 }, { "epoch": 1.1260627381999413, "grad_norm": 0.5671011036793839, "learning_rate": 0.0004999457242657209, "loss": 3.3141424655914307, "step": 1921, "token_acc": 0.26580010373607027 }, { "epoch": 1.1266490765171504, "grad_norm": 0.4911033285680153, "learning_rate": 0.0004999452182194882, "loss": 3.345874309539795, "step": 1922, "token_acc": 0.2666244865730989 }, { "epoch": 1.1272354148343595, "grad_norm": 0.49938218798965683, "learning_rate": 0.000499944709825357, "loss": 3.3207216262817383, "step": 1923, "token_acc": 0.26815428941024283 }, { "epoch": 1.1278217531515684, "grad_norm": 0.4872232565360168, "learning_rate": 0.0004999441990833321, "loss": 3.3287510871887207, "step": 1924, "token_acc": 0.26651184030269426 }, { "epoch": 1.1284080914687775, "grad_norm": 0.6002348136143194, "learning_rate": 0.0004999436859934183, "loss": 3.328094959259033, "step": 1925, "token_acc": 0.26770943040196593 }, { "epoch": 1.1289944297859864, "grad_norm": 0.6679810165256036, "learning_rate": 0.0004999431705556203, "loss": 3.3474817276000977, "step": 1926, "token_acc": 0.26200465022240194 }, { "epoch": 1.1295807681031955, "grad_norm": 0.6669379184892937, "learning_rate": 0.0004999426527699431, "loss": 3.3377952575683594, "step": 1927, "token_acc": 0.26330037694986236 }, { "epoch": 1.1301671064204046, "grad_norm": 0.6382859696933323, "learning_rate": 0.0004999421326363914, "loss": 3.348890781402588, "step": 1928, "token_acc": 0.2635761179877766 }, { "epoch": 1.1307534447376135, "grad_norm": 0.5763306563268678, "learning_rate": 0.0004999416101549703, "loss": 3.2861194610595703, "step": 1929, "token_acc": 0.2722027875968424 }, { "epoch": 1.1313397830548226, "grad_norm": 0.6358846500409145, "learning_rate": 0.0004999410853256844, "loss": 3.3237404823303223, "step": 1930, "token_acc": 0.2668121316779103 }, { "epoch": 1.1319261213720317, "grad_norm": 0.545416214518993, "learning_rate": 0.0004999405581485389, "loss": 3.3272385597229004, "step": 1931, "token_acc": 0.26671641272108243 }, { "epoch": 1.1325124596892406, "grad_norm": 0.5281814969688989, "learning_rate": 0.0004999400286235387, "loss": 3.3143866062164307, "step": 1932, "token_acc": 0.26942847342040455 }, { "epoch": 1.1330987980064497, "grad_norm": 0.5410201660790362, "learning_rate": 0.0004999394967506886, "loss": 3.3470473289489746, "step": 1933, "token_acc": 0.26370858566918537 }, { "epoch": 1.1336851363236589, "grad_norm": 0.4977793139044205, "learning_rate": 0.0004999389625299939, "loss": 3.3750386238098145, "step": 1934, "token_acc": 0.2593037903774528 }, { "epoch": 1.1342714746408677, "grad_norm": 0.4570057985395873, "learning_rate": 0.0004999384259614593, "loss": 3.3094420433044434, "step": 1935, "token_acc": 0.268883009806734 }, { "epoch": 1.1348578129580769, "grad_norm": 0.45848330350191957, "learning_rate": 0.00049993788704509, "loss": 3.294532299041748, "step": 1936, "token_acc": 0.2720610655462359 }, { "epoch": 1.1354441512752858, "grad_norm": 0.5463846156054845, "learning_rate": 0.0004999373457808911, "loss": 3.334261417388916, "step": 1937, "token_acc": 0.26462669401470224 }, { "epoch": 1.1360304895924949, "grad_norm": 0.5651418954888727, "learning_rate": 0.0004999368021688676, "loss": 3.330078601837158, "step": 1938, "token_acc": 0.26544012309651405 }, { "epoch": 1.136616827909704, "grad_norm": 0.549910936933279, "learning_rate": 0.0004999362562090246, "loss": 3.366504192352295, "step": 1939, "token_acc": 0.260170889867471 }, { "epoch": 1.1372031662269129, "grad_norm": 0.5246737950631898, "learning_rate": 0.0004999357079013674, "loss": 3.3516793251037598, "step": 1940, "token_acc": 0.26385470257022425 }, { "epoch": 1.137789504544122, "grad_norm": 0.6110038833137961, "learning_rate": 0.0004999351572459007, "loss": 3.3069732189178467, "step": 1941, "token_acc": 0.26802802845200246 }, { "epoch": 1.1383758428613309, "grad_norm": 0.5901884805530477, "learning_rate": 0.0004999346042426303, "loss": 3.32161283493042, "step": 1942, "token_acc": 0.2678407694645193 }, { "epoch": 1.13896218117854, "grad_norm": 0.5773580003365661, "learning_rate": 0.000499934048891561, "loss": 3.363138198852539, "step": 1943, "token_acc": 0.26155261862773244 }, { "epoch": 1.139548519495749, "grad_norm": 0.5687998878253862, "learning_rate": 0.0004999334911926981, "loss": 3.3818531036376953, "step": 1944, "token_acc": 0.2597661416453294 }, { "epoch": 1.140134857812958, "grad_norm": 0.5312921631730979, "learning_rate": 0.0004999329311460469, "loss": 3.29067325592041, "step": 1945, "token_acc": 0.2702794951555724 }, { "epoch": 1.140721196130167, "grad_norm": 0.6673850120935002, "learning_rate": 0.0004999323687516125, "loss": 3.3104586601257324, "step": 1946, "token_acc": 0.26910676506018 }, { "epoch": 1.1413075344473762, "grad_norm": 0.7037292888804452, "learning_rate": 0.0004999318040094003, "loss": 3.378039598464966, "step": 1947, "token_acc": 0.2598437712884327 }, { "epoch": 1.141893872764585, "grad_norm": 0.6704269240044067, "learning_rate": 0.0004999312369194156, "loss": 3.296900749206543, "step": 1948, "token_acc": 0.2706122418396477 }, { "epoch": 1.1424802110817942, "grad_norm": 0.5668683720984143, "learning_rate": 0.0004999306674816637, "loss": 3.3289437294006348, "step": 1949, "token_acc": 0.2674995401975528 }, { "epoch": 1.1430665493990033, "grad_norm": 0.4909697172829595, "learning_rate": 0.00049993009569615, "loss": 3.3821184635162354, "step": 1950, "token_acc": 0.2589990517114412 }, { "epoch": 1.1436528877162122, "grad_norm": 0.7546160577081155, "learning_rate": 0.0004999295215628799, "loss": 3.361788749694824, "step": 1951, "token_acc": 0.26076422734953486 }, { "epoch": 1.1442392260334213, "grad_norm": 0.7942132246360624, "learning_rate": 0.0004999289450818587, "loss": 3.3548426628112793, "step": 1952, "token_acc": 0.26182707993474713 }, { "epoch": 1.1448255643506302, "grad_norm": 0.7251923793813759, "learning_rate": 0.0004999283662530917, "loss": 3.322568893432617, "step": 1953, "token_acc": 0.26760266891207724 }, { "epoch": 1.1454119026678393, "grad_norm": 0.5260972020853423, "learning_rate": 0.0004999277850765845, "loss": 3.3087902069091797, "step": 1954, "token_acc": 0.2683812284737829 }, { "epoch": 1.1459982409850484, "grad_norm": 0.6211888120211889, "learning_rate": 0.0004999272015523427, "loss": 3.3634419441223145, "step": 1955, "token_acc": 0.2624973219540108 }, { "epoch": 1.1465845793022573, "grad_norm": 0.6463600187087004, "learning_rate": 0.0004999266156803715, "loss": 3.369861364364624, "step": 1956, "token_acc": 0.25803941571267597 }, { "epoch": 1.1471709176194664, "grad_norm": 0.5679036380986741, "learning_rate": 0.0004999260274606766, "loss": 3.337130546569824, "step": 1957, "token_acc": 0.26478252672412234 }, { "epoch": 1.1477572559366755, "grad_norm": 0.47638931566558745, "learning_rate": 0.0004999254368932635, "loss": 3.300784111022949, "step": 1958, "token_acc": 0.2696280270779538 }, { "epoch": 1.1483435942538844, "grad_norm": 0.4778844444996467, "learning_rate": 0.0004999248439781375, "loss": 3.331264019012451, "step": 1959, "token_acc": 0.26758135526101234 }, { "epoch": 1.1489299325710935, "grad_norm": 0.4466850287285112, "learning_rate": 0.0004999242487153045, "loss": 3.333454132080078, "step": 1960, "token_acc": 0.2660251451944885 }, { "epoch": 1.1495162708883027, "grad_norm": 0.45498073276311674, "learning_rate": 0.00049992365110477, "loss": 3.2760415077209473, "step": 1961, "token_acc": 0.27177966482454957 }, { "epoch": 1.1501026092055116, "grad_norm": 0.48689519718408053, "learning_rate": 0.0004999230511465395, "loss": 3.349882125854492, "step": 1962, "token_acc": 0.26373021335168617 }, { "epoch": 1.1506889475227207, "grad_norm": 0.5021402612562448, "learning_rate": 0.0004999224488406187, "loss": 3.3303253650665283, "step": 1963, "token_acc": 0.2658426835894433 }, { "epoch": 1.1512752858399296, "grad_norm": 0.4930948466902079, "learning_rate": 0.0004999218441870133, "loss": 3.2795398235321045, "step": 1964, "token_acc": 0.27140242489964134 }, { "epoch": 1.1518616241571387, "grad_norm": 0.4089300845606071, "learning_rate": 0.0004999212371857289, "loss": 3.3557021617889404, "step": 1965, "token_acc": 0.2634841368707698 }, { "epoch": 1.1524479624743478, "grad_norm": 0.46428686106387707, "learning_rate": 0.0004999206278367713, "loss": 3.3432679176330566, "step": 1966, "token_acc": 0.262956020114102 }, { "epoch": 1.1530343007915567, "grad_norm": 0.4827792193358561, "learning_rate": 0.0004999200161401462, "loss": 3.3332736492156982, "step": 1967, "token_acc": 0.26565287818896655 }, { "epoch": 1.1536206391087658, "grad_norm": 0.4955232974253411, "learning_rate": 0.0004999194020958594, "loss": 3.3183631896972656, "step": 1968, "token_acc": 0.266028720458497 }, { "epoch": 1.1542069774259747, "grad_norm": 0.5387027148881859, "learning_rate": 0.0004999187857039164, "loss": 3.27771258354187, "step": 1969, "token_acc": 0.27252718111906654 }, { "epoch": 1.1547933157431838, "grad_norm": 0.5097067438738796, "learning_rate": 0.0004999181669643232, "loss": 3.284806728363037, "step": 1970, "token_acc": 0.27260333575054385 }, { "epoch": 1.155379654060393, "grad_norm": 0.5376775667793493, "learning_rate": 0.0004999175458770857, "loss": 3.367516040802002, "step": 1971, "token_acc": 0.2622917731908824 }, { "epoch": 1.1559659923776018, "grad_norm": 0.6902034020546416, "learning_rate": 0.0004999169224422096, "loss": 3.2984836101531982, "step": 1972, "token_acc": 0.27052429375085757 }, { "epoch": 1.156552330694811, "grad_norm": 0.7599261340523564, "learning_rate": 0.0004999162966597007, "loss": 3.289255142211914, "step": 1973, "token_acc": 0.2711683833943151 }, { "epoch": 1.15713866901202, "grad_norm": 0.7883124066125632, "learning_rate": 0.000499915668529565, "loss": 3.368717670440674, "step": 1974, "token_acc": 0.2599040848881709 }, { "epoch": 1.157725007329229, "grad_norm": 0.708867332249208, "learning_rate": 0.0004999150380518084, "loss": 3.324082374572754, "step": 1975, "token_acc": 0.2677142063459402 }, { "epoch": 1.158311345646438, "grad_norm": 0.6819957599965444, "learning_rate": 0.0004999144052264368, "loss": 3.280559778213501, "step": 1976, "token_acc": 0.27372438667951865 }, { "epoch": 1.1588976839636471, "grad_norm": 0.6193460370668442, "learning_rate": 0.0004999137700534561, "loss": 3.3649582862854004, "step": 1977, "token_acc": 0.26222782048207693 }, { "epoch": 1.159484022280856, "grad_norm": 0.6925984551908982, "learning_rate": 0.0004999131325328722, "loss": 3.358515501022339, "step": 1978, "token_acc": 0.2620345398383878 }, { "epoch": 1.1600703605980651, "grad_norm": 0.6343771196827566, "learning_rate": 0.0004999124926646913, "loss": 3.3289148807525635, "step": 1979, "token_acc": 0.267540932722899 }, { "epoch": 1.160656698915274, "grad_norm": 0.5391867951746058, "learning_rate": 0.0004999118504489192, "loss": 3.3240575790405273, "step": 1980, "token_acc": 0.2653484716698198 }, { "epoch": 1.1612430372324831, "grad_norm": 0.5134899132205668, "learning_rate": 0.0004999112058855622, "loss": 3.3434672355651855, "step": 1981, "token_acc": 0.2659219217965709 }, { "epoch": 1.1618293755496922, "grad_norm": 0.49861589487998115, "learning_rate": 0.000499910558974626, "loss": 3.3181262016296387, "step": 1982, "token_acc": 0.26775059905758997 }, { "epoch": 1.1624157138669011, "grad_norm": 0.5340075308783796, "learning_rate": 0.0004999099097161169, "loss": 3.3119540214538574, "step": 1983, "token_acc": 0.2680917750317468 }, { "epoch": 1.1630020521841102, "grad_norm": 0.5318725733219032, "learning_rate": 0.0004999092581100409, "loss": 3.316734552383423, "step": 1984, "token_acc": 0.2680798699421582 }, { "epoch": 1.1635883905013193, "grad_norm": 0.5011606879683798, "learning_rate": 0.0004999086041564042, "loss": 3.3209729194641113, "step": 1985, "token_acc": 0.265934351641209 }, { "epoch": 1.1641747288185282, "grad_norm": 0.541041190243588, "learning_rate": 0.0004999079478552131, "loss": 3.341071128845215, "step": 1986, "token_acc": 0.2649006797333861 }, { "epoch": 1.1647610671357373, "grad_norm": 0.48645049943106616, "learning_rate": 0.0004999072892064734, "loss": 3.2668838500976562, "step": 1987, "token_acc": 0.27502875405864813 }, { "epoch": 1.1653474054529465, "grad_norm": 0.466205060143751, "learning_rate": 0.0004999066282101915, "loss": 3.3229565620422363, "step": 1988, "token_acc": 0.2666699778326153 }, { "epoch": 1.1659337437701554, "grad_norm": 0.5275081305958017, "learning_rate": 0.0004999059648663737, "loss": 3.2949752807617188, "step": 1989, "token_acc": 0.2690598363427695 }, { "epoch": 1.1665200820873645, "grad_norm": 0.46418700701633336, "learning_rate": 0.0004999052991750259, "loss": 3.350149154663086, "step": 1990, "token_acc": 0.26396976033456654 }, { "epoch": 1.1671064204045734, "grad_norm": 0.4142658577256744, "learning_rate": 0.0004999046311361547, "loss": 3.3271024227142334, "step": 1991, "token_acc": 0.2648428450575909 }, { "epoch": 1.1676927587217825, "grad_norm": 0.48063041404868223, "learning_rate": 0.0004999039607497663, "loss": 3.3046584129333496, "step": 1992, "token_acc": 0.2703142290964305 }, { "epoch": 1.1682790970389916, "grad_norm": 0.4561730771656601, "learning_rate": 0.0004999032880158668, "loss": 3.357513904571533, "step": 1993, "token_acc": 0.26053569328772935 }, { "epoch": 1.1688654353562005, "grad_norm": 0.559179657980282, "learning_rate": 0.0004999026129344627, "loss": 3.360114574432373, "step": 1994, "token_acc": 0.2621969072612128 }, { "epoch": 1.1694517736734096, "grad_norm": 0.6341301938141158, "learning_rate": 0.0004999019355055604, "loss": 3.3208227157592773, "step": 1995, "token_acc": 0.26457756005305944 }, { "epoch": 1.1700381119906185, "grad_norm": 0.8285580891347714, "learning_rate": 0.0004999012557291661, "loss": 3.38165283203125, "step": 1996, "token_acc": 0.2595735683371358 }, { "epoch": 1.1706244503078276, "grad_norm": 0.9027796847159149, "learning_rate": 0.0004999005736052862, "loss": 3.3802688121795654, "step": 1997, "token_acc": 0.2578397299475314 }, { "epoch": 1.1712107886250367, "grad_norm": 0.724674578428661, "learning_rate": 0.0004998998891339271, "loss": 3.3230371475219727, "step": 1998, "token_acc": 0.2671989337570965 }, { "epoch": 1.1717971269422456, "grad_norm": 0.5813224091701463, "learning_rate": 0.0004998992023150955, "loss": 3.3323206901550293, "step": 1999, "token_acc": 0.2647921108742004 }, { "epoch": 1.1723834652594547, "grad_norm": 0.8201661939092288, "learning_rate": 0.0004998985131487975, "loss": 3.343085289001465, "step": 2000, "token_acc": 0.26475652556830914 }, { "epoch": 1.1729698035766638, "grad_norm": 0.797415017670064, "learning_rate": 0.0004998978216350398, "loss": 3.3278908729553223, "step": 2001, "token_acc": 0.2651759870178098 }, { "epoch": 1.1735561418938727, "grad_norm": 0.4927705292189527, "learning_rate": 0.0004998971277738286, "loss": 3.292464256286621, "step": 2002, "token_acc": 0.2699018290896971 }, { "epoch": 1.1741424802110818, "grad_norm": 0.5610036063882936, "learning_rate": 0.0004998964315651708, "loss": 3.3572187423706055, "step": 2003, "token_acc": 0.26312081448195646 }, { "epoch": 1.174728818528291, "grad_norm": 0.6355199880436163, "learning_rate": 0.0004998957330090727, "loss": 3.3247575759887695, "step": 2004, "token_acc": 0.26561243987612837 }, { "epoch": 1.1753151568454998, "grad_norm": 0.5248953516221727, "learning_rate": 0.000499895032105541, "loss": 3.3342525959014893, "step": 2005, "token_acc": 0.2655909725931709 }, { "epoch": 1.175901495162709, "grad_norm": 0.47433152121930633, "learning_rate": 0.0004998943288545821, "loss": 3.299567461013794, "step": 2006, "token_acc": 0.2696991600278686 }, { "epoch": 1.1764878334799178, "grad_norm": 0.5432641689028098, "learning_rate": 0.0004998936232562028, "loss": 3.363025665283203, "step": 2007, "token_acc": 0.2599285984153667 }, { "epoch": 1.177074171797127, "grad_norm": 0.5067973984940682, "learning_rate": 0.0004998929153104095, "loss": 3.347628116607666, "step": 2008, "token_acc": 0.26132443971502356 }, { "epoch": 1.177660510114336, "grad_norm": 0.5082029427180463, "learning_rate": 0.0004998922050172092, "loss": 3.303496837615967, "step": 2009, "token_acc": 0.2681056016121417 }, { "epoch": 1.178246848431545, "grad_norm": 0.4029167060868648, "learning_rate": 0.0004998914923766083, "loss": 3.3124451637268066, "step": 2010, "token_acc": 0.26707596454138616 }, { "epoch": 1.178833186748754, "grad_norm": 0.41997556626733346, "learning_rate": 0.0004998907773886136, "loss": 3.258422374725342, "step": 2011, "token_acc": 0.27420642739368045 }, { "epoch": 1.1794195250659631, "grad_norm": 0.43706206607002823, "learning_rate": 0.0004998900600532317, "loss": 3.3531785011291504, "step": 2012, "token_acc": 0.26127253539479356 }, { "epoch": 1.180005863383172, "grad_norm": 0.4151720452922065, "learning_rate": 0.0004998893403704694, "loss": 3.3099682331085205, "step": 2013, "token_acc": 0.26929813654522944 }, { "epoch": 1.1805922017003811, "grad_norm": 0.5140371857316828, "learning_rate": 0.0004998886183403335, "loss": 3.309345006942749, "step": 2014, "token_acc": 0.26859707413574935 }, { "epoch": 1.1811785400175903, "grad_norm": 0.5869395215828356, "learning_rate": 0.0004998878939628308, "loss": 3.2969727516174316, "step": 2015, "token_acc": 0.2695838018453392 }, { "epoch": 1.1817648783347992, "grad_norm": 0.59960934027287, "learning_rate": 0.000499887167237968, "loss": 3.3286962509155273, "step": 2016, "token_acc": 0.26622080472114645 }, { "epoch": 1.1823512166520083, "grad_norm": 0.5920231438797806, "learning_rate": 0.0004998864381657521, "loss": 3.28824782371521, "step": 2017, "token_acc": 0.2701760379133556 }, { "epoch": 1.1829375549692172, "grad_norm": 0.5002944280232379, "learning_rate": 0.0004998857067461897, "loss": 3.321464776992798, "step": 2018, "token_acc": 0.26673569321074636 }, { "epoch": 1.1835238932864263, "grad_norm": 0.41974701657187496, "learning_rate": 0.0004998849729792879, "loss": 3.290827989578247, "step": 2019, "token_acc": 0.2691389995457702 }, { "epoch": 1.1841102316036354, "grad_norm": 0.507182419312586, "learning_rate": 0.0004998842368650535, "loss": 3.348458766937256, "step": 2020, "token_acc": 0.2634811592388583 }, { "epoch": 1.1846965699208443, "grad_norm": 0.534986168661541, "learning_rate": 0.0004998834984034934, "loss": 3.3211283683776855, "step": 2021, "token_acc": 0.2668648255962674 }, { "epoch": 1.1852829082380534, "grad_norm": 0.4254196401649543, "learning_rate": 0.0004998827575946146, "loss": 3.316277027130127, "step": 2022, "token_acc": 0.26781443242801495 }, { "epoch": 1.1858692465552623, "grad_norm": 0.4535068332589637, "learning_rate": 0.000499882014438424, "loss": 3.2859256267547607, "step": 2023, "token_acc": 0.27251482077146083 }, { "epoch": 1.1864555848724714, "grad_norm": 0.4942597283648693, "learning_rate": 0.0004998812689349286, "loss": 3.3470168113708496, "step": 2024, "token_acc": 0.2666321005924868 }, { "epoch": 1.1870419231896805, "grad_norm": 0.5022677914197416, "learning_rate": 0.0004998805210841353, "loss": 3.3218302726745605, "step": 2025, "token_acc": 0.2659985117663473 }, { "epoch": 1.1876282615068894, "grad_norm": 0.44267495812071506, "learning_rate": 0.0004998797708860513, "loss": 3.329369068145752, "step": 2026, "token_acc": 0.2633799117325032 }, { "epoch": 1.1882145998240985, "grad_norm": 0.5023398808775057, "learning_rate": 0.0004998790183406835, "loss": 3.3358349800109863, "step": 2027, "token_acc": 0.2649848562607861 }, { "epoch": 1.1888009381413076, "grad_norm": 0.6651362621601263, "learning_rate": 0.0004998782634480391, "loss": 3.3057913780212402, "step": 2028, "token_acc": 0.26732774779369434 }, { "epoch": 1.1893872764585165, "grad_norm": 0.7907328585268072, "learning_rate": 0.0004998775062081251, "loss": 3.323847770690918, "step": 2029, "token_acc": 0.2657837384744342 }, { "epoch": 1.1899736147757256, "grad_norm": 0.687031894732298, "learning_rate": 0.0004998767466209488, "loss": 3.331947088241577, "step": 2030, "token_acc": 0.26395333117368425 }, { "epoch": 1.1905599530929347, "grad_norm": 0.5672892746009975, "learning_rate": 0.000499875984686517, "loss": 3.291057586669922, "step": 2031, "token_acc": 0.2687209329319724 }, { "epoch": 1.1911462914101436, "grad_norm": 0.5686607015619654, "learning_rate": 0.0004998752204048371, "loss": 3.2978549003601074, "step": 2032, "token_acc": 0.27016012991284327 }, { "epoch": 1.1917326297273527, "grad_norm": 0.5832588955863457, "learning_rate": 0.0004998744537759161, "loss": 3.3795387744903564, "step": 2033, "token_acc": 0.2593577911917112 }, { "epoch": 1.1923189680445616, "grad_norm": 0.6169656167197718, "learning_rate": 0.0004998736847997615, "loss": 3.2823522090911865, "step": 2034, "token_acc": 0.27315726309349186 }, { "epoch": 1.1929053063617707, "grad_norm": 0.46286552827349114, "learning_rate": 0.0004998729134763802, "loss": 3.355337619781494, "step": 2035, "token_acc": 0.26245440306117085 }, { "epoch": 1.1934916446789798, "grad_norm": 0.44136780811216014, "learning_rate": 0.0004998721398057797, "loss": 3.2957332134246826, "step": 2036, "token_acc": 0.27055936738028835 }, { "epoch": 1.1940779829961887, "grad_norm": 0.4413040541938301, "learning_rate": 0.000499871363787967, "loss": 3.3235702514648438, "step": 2037, "token_acc": 0.26550061061581887 }, { "epoch": 1.1946643213133978, "grad_norm": 0.5388273982131709, "learning_rate": 0.0004998705854229497, "loss": 3.275033473968506, "step": 2038, "token_acc": 0.27238143882863397 }, { "epoch": 1.195250659630607, "grad_norm": 0.5449101509333466, "learning_rate": 0.0004998698047107349, "loss": 3.311260461807251, "step": 2039, "token_acc": 0.268357345969066 }, { "epoch": 1.1958369979478158, "grad_norm": 0.49513579424566173, "learning_rate": 0.0004998690216513299, "loss": 3.304572105407715, "step": 2040, "token_acc": 0.2675595750340317 }, { "epoch": 1.196423336265025, "grad_norm": 0.45019171030117655, "learning_rate": 0.0004998682362447423, "loss": 3.3287153244018555, "step": 2041, "token_acc": 0.26570323725696066 }, { "epoch": 1.197009674582234, "grad_norm": 0.4619610040695394, "learning_rate": 0.0004998674484909794, "loss": 3.3112473487854004, "step": 2042, "token_acc": 0.26907846887374404 }, { "epoch": 1.197596012899443, "grad_norm": 0.5159074386659498, "learning_rate": 0.0004998666583900483, "loss": 3.353604555130005, "step": 2043, "token_acc": 0.2624648839870981 }, { "epoch": 1.198182351216652, "grad_norm": 0.5278940524411757, "learning_rate": 0.0004998658659419568, "loss": 3.2849254608154297, "step": 2044, "token_acc": 0.27182364120902425 }, { "epoch": 1.198768689533861, "grad_norm": 0.4853549663762368, "learning_rate": 0.000499865071146712, "loss": 3.33880615234375, "step": 2045, "token_acc": 0.26337082955400865 }, { "epoch": 1.19935502785107, "grad_norm": 0.4967489983274647, "learning_rate": 0.0004998642740043217, "loss": 3.300752878189087, "step": 2046, "token_acc": 0.2708160560623555 }, { "epoch": 1.1999413661682792, "grad_norm": 0.5064700710445951, "learning_rate": 0.0004998634745147931, "loss": 3.255930185317993, "step": 2047, "token_acc": 0.2761124152152204 }, { "epoch": 1.200527704485488, "grad_norm": 0.43717318165201213, "learning_rate": 0.000499862672678134, "loss": 3.3873236179351807, "step": 2048, "token_acc": 0.2602438504951549 }, { "epoch": 1.2011140428026972, "grad_norm": 0.5229383969261472, "learning_rate": 0.0004998618684943517, "loss": 3.313478946685791, "step": 2049, "token_acc": 0.2663258537185066 }, { "epoch": 1.201700381119906, "grad_norm": 0.5486282738089729, "learning_rate": 0.0004998610619634539, "loss": 3.333477735519409, "step": 2050, "token_acc": 0.26640099588201765 }, { "epoch": 1.2022867194371152, "grad_norm": 0.6476656598009072, "learning_rate": 0.0004998602530854481, "loss": 3.2968034744262695, "step": 2051, "token_acc": 0.27081136142650625 }, { "epoch": 1.2028730577543243, "grad_norm": 0.6190001487911446, "learning_rate": 0.0004998594418603419, "loss": 3.30812668800354, "step": 2052, "token_acc": 0.26787274997398813 }, { "epoch": 1.2034593960715332, "grad_norm": 0.5754424924334861, "learning_rate": 0.0004998586282881429, "loss": 3.2833471298217773, "step": 2053, "token_acc": 0.27251918888630455 }, { "epoch": 1.2040457343887423, "grad_norm": 0.43768417940362286, "learning_rate": 0.0004998578123688589, "loss": 3.3144237995147705, "step": 2054, "token_acc": 0.26716080534090314 }, { "epoch": 1.2046320727059514, "grad_norm": 0.44906101688761907, "learning_rate": 0.0004998569941024973, "loss": 3.249899387359619, "step": 2055, "token_acc": 0.27483905096651223 }, { "epoch": 1.2052184110231603, "grad_norm": 0.4423444422510579, "learning_rate": 0.000499856173489066, "loss": 3.330540180206299, "step": 2056, "token_acc": 0.2658774678582021 }, { "epoch": 1.2058047493403694, "grad_norm": 0.5284401008979768, "learning_rate": 0.0004998553505285725, "loss": 3.298649311065674, "step": 2057, "token_acc": 0.26917473693036514 }, { "epoch": 1.2063910876575785, "grad_norm": 0.697519914652804, "learning_rate": 0.0004998545252210249, "loss": 3.3638739585876465, "step": 2058, "token_acc": 0.26099888266984206 }, { "epoch": 1.2069774259747874, "grad_norm": 0.6063135151966607, "learning_rate": 0.0004998536975664306, "loss": 3.3142952919006348, "step": 2059, "token_acc": 0.26800522025803425 }, { "epoch": 1.2075637642919965, "grad_norm": 0.5454980827361674, "learning_rate": 0.0004998528675647974, "loss": 3.302705764770508, "step": 2060, "token_acc": 0.26873167715042007 }, { "epoch": 1.2081501026092054, "grad_norm": 0.5978719029382357, "learning_rate": 0.0004998520352161334, "loss": 3.3411378860473633, "step": 2061, "token_acc": 0.2640342038340417 }, { "epoch": 1.2087364409264145, "grad_norm": 0.5450771873655044, "learning_rate": 0.000499851200520446, "loss": 3.320988893508911, "step": 2062, "token_acc": 0.2677429261267367 }, { "epoch": 1.2093227792436236, "grad_norm": 0.48164115859238466, "learning_rate": 0.0004998503634777434, "loss": 3.3001298904418945, "step": 2063, "token_acc": 0.2694761614890696 }, { "epoch": 1.2099091175608325, "grad_norm": 0.5311665158489401, "learning_rate": 0.0004998495240880333, "loss": 3.2426795959472656, "step": 2064, "token_acc": 0.2765058602603121 }, { "epoch": 1.2104954558780416, "grad_norm": 0.5144423843601922, "learning_rate": 0.0004998486823513236, "loss": 3.3316783905029297, "step": 2065, "token_acc": 0.26528989157233374 }, { "epoch": 1.2110817941952507, "grad_norm": 0.4624926794183494, "learning_rate": 0.0004998478382676221, "loss": 3.338933229446411, "step": 2066, "token_acc": 0.2627043226262826 }, { "epoch": 1.2116681325124596, "grad_norm": 0.4484913570209329, "learning_rate": 0.0004998469918369369, "loss": 3.2829103469848633, "step": 2067, "token_acc": 0.2709052799136505 }, { "epoch": 1.2122544708296688, "grad_norm": 0.4221430128613876, "learning_rate": 0.0004998461430592758, "loss": 3.3139305114746094, "step": 2068, "token_acc": 0.2679495963691252 }, { "epoch": 1.2128408091468779, "grad_norm": 0.4490465945185443, "learning_rate": 0.000499845291934647, "loss": 3.26668643951416, "step": 2069, "token_acc": 0.27291121594981615 }, { "epoch": 1.2134271474640868, "grad_norm": 0.4291015903904159, "learning_rate": 0.0004998444384630582, "loss": 3.27933931350708, "step": 2070, "token_acc": 0.2720215409030009 }, { "epoch": 1.2140134857812959, "grad_norm": 0.4649709197684189, "learning_rate": 0.0004998435826445177, "loss": 3.3592960834503174, "step": 2071, "token_acc": 0.26242346537582933 }, { "epoch": 1.2145998240985048, "grad_norm": 0.5130252359673275, "learning_rate": 0.0004998427244790333, "loss": 3.3406920433044434, "step": 2072, "token_acc": 0.2639213790714686 }, { "epoch": 1.2151861624157139, "grad_norm": 0.5197240919438159, "learning_rate": 0.0004998418639666133, "loss": 3.3269901275634766, "step": 2073, "token_acc": 0.2651729202136647 }, { "epoch": 1.215772500732923, "grad_norm": 0.5873915998181899, "learning_rate": 0.0004998410011072656, "loss": 3.31498384475708, "step": 2074, "token_acc": 0.26813710894586074 }, { "epoch": 1.2163588390501319, "grad_norm": 0.5929268493264973, "learning_rate": 0.0004998401359009983, "loss": 3.306807518005371, "step": 2075, "token_acc": 0.2696637558755186 }, { "epoch": 1.216945177367341, "grad_norm": 0.5658766916562017, "learning_rate": 0.0004998392683478196, "loss": 3.266423225402832, "step": 2076, "token_acc": 0.27343681169184525 }, { "epoch": 1.2175315156845499, "grad_norm": 0.590874438978482, "learning_rate": 0.0004998383984477378, "loss": 3.2856032848358154, "step": 2077, "token_acc": 0.2710271632523156 }, { "epoch": 1.218117854001759, "grad_norm": 0.5590312966286752, "learning_rate": 0.0004998375262007607, "loss": 3.2995848655700684, "step": 2078, "token_acc": 0.26880451380800785 }, { "epoch": 1.218704192318968, "grad_norm": 0.5657210950305598, "learning_rate": 0.0004998366516068968, "loss": 3.287076711654663, "step": 2079, "token_acc": 0.27062500641005915 }, { "epoch": 1.219290530636177, "grad_norm": 0.6294065023184018, "learning_rate": 0.0004998357746661542, "loss": 3.3046817779541016, "step": 2080, "token_acc": 0.26814411578118874 }, { "epoch": 1.219876868953386, "grad_norm": 0.5717863710469163, "learning_rate": 0.0004998348953785412, "loss": 3.2898173332214355, "step": 2081, "token_acc": 0.27171867972781294 }, { "epoch": 1.2204632072705952, "grad_norm": 0.5591427533700243, "learning_rate": 0.000499834013744066, "loss": 3.3410966396331787, "step": 2082, "token_acc": 0.2637180668862226 }, { "epoch": 1.221049545587804, "grad_norm": 0.5732897584919422, "learning_rate": 0.0004998331297627368, "loss": 3.3258461952209473, "step": 2083, "token_acc": 0.265448460822719 }, { "epoch": 1.2216358839050132, "grad_norm": 0.5453331744023011, "learning_rate": 0.0004998322434345621, "loss": 3.301168441772461, "step": 2084, "token_acc": 0.26884017989131564 }, { "epoch": 1.2222222222222223, "grad_norm": 0.5236881772813752, "learning_rate": 0.0004998313547595501, "loss": 3.3436872959136963, "step": 2085, "token_acc": 0.26186578100069496 }, { "epoch": 1.2228085605394312, "grad_norm": 0.5535504529491678, "learning_rate": 0.0004998304637377091, "loss": 3.3017029762268066, "step": 2086, "token_acc": 0.26890689984453053 }, { "epoch": 1.2233948988566403, "grad_norm": 0.5235068998876725, "learning_rate": 0.0004998295703690476, "loss": 3.330578327178955, "step": 2087, "token_acc": 0.2669153282634098 }, { "epoch": 1.2239812371738492, "grad_norm": 0.4616826402754192, "learning_rate": 0.000499828674653574, "loss": 3.28640079498291, "step": 2088, "token_acc": 0.27065902187996704 }, { "epoch": 1.2245675754910583, "grad_norm": 0.48382335926060116, "learning_rate": 0.0004998277765912966, "loss": 3.3055338859558105, "step": 2089, "token_acc": 0.26869214531536195 }, { "epoch": 1.2251539138082674, "grad_norm": 0.5245911342083793, "learning_rate": 0.0004998268761822239, "loss": 3.306577682495117, "step": 2090, "token_acc": 0.2673301839673687 }, { "epoch": 1.2257402521254763, "grad_norm": 0.5675000733721677, "learning_rate": 0.0004998259734263643, "loss": 3.313633441925049, "step": 2091, "token_acc": 0.2680983913723904 }, { "epoch": 1.2263265904426854, "grad_norm": 0.44073167989521644, "learning_rate": 0.0004998250683237264, "loss": 3.2866435050964355, "step": 2092, "token_acc": 0.27021551311475833 }, { "epoch": 1.2269129287598945, "grad_norm": 0.47749182330955664, "learning_rate": 0.0004998241608743185, "loss": 3.322282314300537, "step": 2093, "token_acc": 0.26587579589019494 }, { "epoch": 1.2274992670771034, "grad_norm": 0.5065970450133327, "learning_rate": 0.0004998232510781494, "loss": 3.3330321311950684, "step": 2094, "token_acc": 0.2635963820851256 }, { "epoch": 1.2280856053943126, "grad_norm": 0.407741121959121, "learning_rate": 0.0004998223389352275, "loss": 3.3167710304260254, "step": 2095, "token_acc": 0.26751700454780497 }, { "epoch": 1.2286719437115217, "grad_norm": 0.46011877455774647, "learning_rate": 0.0004998214244455612, "loss": 3.2706167697906494, "step": 2096, "token_acc": 0.27219761310604523 }, { "epoch": 1.2292582820287306, "grad_norm": 0.46780702669159663, "learning_rate": 0.0004998205076091593, "loss": 3.3330459594726562, "step": 2097, "token_acc": 0.2654161433645289 }, { "epoch": 1.2298446203459397, "grad_norm": 0.4105843333920586, "learning_rate": 0.0004998195884260304, "loss": 3.30873966217041, "step": 2098, "token_acc": 0.2671012527392933 }, { "epoch": 1.2304309586631486, "grad_norm": 0.4387013374158282, "learning_rate": 0.0004998186668961832, "loss": 3.2975692749023438, "step": 2099, "token_acc": 0.2701121130568983 }, { "epoch": 1.2310172969803577, "grad_norm": 0.4198984972799839, "learning_rate": 0.0004998177430196261, "loss": 3.34445858001709, "step": 2100, "token_acc": 0.2638136752181451 }, { "epoch": 1.2316036352975668, "grad_norm": 0.4457632350662402, "learning_rate": 0.000499816816796368, "loss": 3.328608512878418, "step": 2101, "token_acc": 0.2658296100345539 }, { "epoch": 1.2321899736147757, "grad_norm": 0.47800049833001435, "learning_rate": 0.0004998158882264177, "loss": 3.398808479309082, "step": 2102, "token_acc": 0.25723926445465445 }, { "epoch": 1.2327763119319848, "grad_norm": 0.531390804076189, "learning_rate": 0.0004998149573097835, "loss": 3.3021271228790283, "step": 2103, "token_acc": 0.2702262069113705 }, { "epoch": 1.2333626502491937, "grad_norm": 0.5415415266687958, "learning_rate": 0.0004998140240464746, "loss": 3.3238556385040283, "step": 2104, "token_acc": 0.2662548984127927 }, { "epoch": 1.2339489885664028, "grad_norm": 0.5520885664623458, "learning_rate": 0.0004998130884364994, "loss": 3.3366012573242188, "step": 2105, "token_acc": 0.2627775608087584 }, { "epoch": 1.234535326883612, "grad_norm": 0.6038190730308144, "learning_rate": 0.000499812150479867, "loss": 3.3262522220611572, "step": 2106, "token_acc": 0.26526967337440255 }, { "epoch": 1.2351216652008208, "grad_norm": 0.6860717149168699, "learning_rate": 0.0004998112101765861, "loss": 3.3490939140319824, "step": 2107, "token_acc": 0.26322550628739805 }, { "epoch": 1.23570800351803, "grad_norm": 0.6127259214853676, "learning_rate": 0.0004998102675266654, "loss": 3.362032413482666, "step": 2108, "token_acc": 0.2611543173846971 }, { "epoch": 1.236294341835239, "grad_norm": 0.5163032196851721, "learning_rate": 0.0004998093225301139, "loss": 3.2941439151763916, "step": 2109, "token_acc": 0.26956833872212715 }, { "epoch": 1.236880680152448, "grad_norm": 0.5091333054474084, "learning_rate": 0.0004998083751869405, "loss": 3.3019800186157227, "step": 2110, "token_acc": 0.26967103552774285 }, { "epoch": 1.237467018469657, "grad_norm": 0.4794440213897465, "learning_rate": 0.0004998074254971539, "loss": 3.3275718688964844, "step": 2111, "token_acc": 0.26466550531707445 }, { "epoch": 1.2380533567868661, "grad_norm": 0.5205151099699346, "learning_rate": 0.0004998064734607632, "loss": 3.2655792236328125, "step": 2112, "token_acc": 0.27337097576150426 }, { "epoch": 1.238639695104075, "grad_norm": 0.4936815382841612, "learning_rate": 0.0004998055190777774, "loss": 3.3203606605529785, "step": 2113, "token_acc": 0.2668731624807244 }, { "epoch": 1.2392260334212841, "grad_norm": 0.5191613155982038, "learning_rate": 0.0004998045623482053, "loss": 3.2999205589294434, "step": 2114, "token_acc": 0.26844001467003215 }, { "epoch": 1.239812371738493, "grad_norm": 0.6418739712426673, "learning_rate": 0.0004998036032720558, "loss": 3.320920467376709, "step": 2115, "token_acc": 0.26616907268486417 }, { "epoch": 1.2403987100557021, "grad_norm": 0.6718805892842361, "learning_rate": 0.0004998026418493383, "loss": 3.326570987701416, "step": 2116, "token_acc": 0.26596548444082074 }, { "epoch": 1.2409850483729112, "grad_norm": 0.5820633407029908, "learning_rate": 0.0004998016780800615, "loss": 3.286497116088867, "step": 2117, "token_acc": 0.27011828233479046 }, { "epoch": 1.2415713866901201, "grad_norm": 0.4704664072097044, "learning_rate": 0.0004998007119642345, "loss": 3.347278118133545, "step": 2118, "token_acc": 0.262891077998943 }, { "epoch": 1.2421577250073292, "grad_norm": 0.49139195510202993, "learning_rate": 0.0004997997435018665, "loss": 3.3077545166015625, "step": 2119, "token_acc": 0.26853775102858224 }, { "epoch": 1.2427440633245384, "grad_norm": 0.5887750309598477, "learning_rate": 0.0004997987726929664, "loss": 3.336679458618164, "step": 2120, "token_acc": 0.2630566379258954 }, { "epoch": 1.2433304016417472, "grad_norm": 0.5823319978781171, "learning_rate": 0.0004997977995375436, "loss": 3.3195934295654297, "step": 2121, "token_acc": 0.26626570901608426 }, { "epoch": 1.2439167399589564, "grad_norm": 0.5267169948019839, "learning_rate": 0.000499796824035607, "loss": 3.3138718605041504, "step": 2122, "token_acc": 0.2679616646298321 }, { "epoch": 1.2445030782761655, "grad_norm": 0.4571683439465644, "learning_rate": 0.0004997958461871658, "loss": 3.2633495330810547, "step": 2123, "token_acc": 0.2727988990018367 }, { "epoch": 1.2450894165933744, "grad_norm": 0.46302262742243044, "learning_rate": 0.0004997948659922293, "loss": 3.3327198028564453, "step": 2124, "token_acc": 0.2654166890699931 }, { "epoch": 1.2456757549105835, "grad_norm": 0.4175375683138514, "learning_rate": 0.0004997938834508067, "loss": 3.3322274684906006, "step": 2125, "token_acc": 0.26659637800390845 }, { "epoch": 1.2462620932277924, "grad_norm": 0.40134172357393805, "learning_rate": 0.000499792898562907, "loss": 3.36427640914917, "step": 2126, "token_acc": 0.25978651339423176 }, { "epoch": 1.2468484315450015, "grad_norm": 0.40208596100362987, "learning_rate": 0.0004997919113285397, "loss": 3.345595121383667, "step": 2127, "token_acc": 0.2607705988188507 }, { "epoch": 1.2474347698622106, "grad_norm": 0.4513734282016161, "learning_rate": 0.000499790921747714, "loss": 3.3036911487579346, "step": 2128, "token_acc": 0.2693736034325399 }, { "epoch": 1.2480211081794195, "grad_norm": 0.47616020535154974, "learning_rate": 0.0004997899298204391, "loss": 3.2801637649536133, "step": 2129, "token_acc": 0.27253698856088465 }, { "epoch": 1.2486074464966286, "grad_norm": 0.5012025818456276, "learning_rate": 0.0004997889355467245, "loss": 3.3106446266174316, "step": 2130, "token_acc": 0.2670502049868943 }, { "epoch": 1.2491937848138375, "grad_norm": 0.4650654240770107, "learning_rate": 0.0004997879389265795, "loss": 3.3095996379852295, "step": 2131, "token_acc": 0.2683281280551284 }, { "epoch": 1.2497801231310466, "grad_norm": 0.48766268190844136, "learning_rate": 0.0004997869399600134, "loss": 3.319863796234131, "step": 2132, "token_acc": 0.2664853453523462 }, { "epoch": 1.2503664614482557, "grad_norm": 0.6404063018357209, "learning_rate": 0.0004997859386470355, "loss": 3.3398847579956055, "step": 2133, "token_acc": 0.2614067022768643 }, { "epoch": 1.2509527997654648, "grad_norm": 0.6695813267925528, "learning_rate": 0.0004997849349876553, "loss": 3.349310874938965, "step": 2134, "token_acc": 0.26202523178187753 }, { "epoch": 1.2515391380826737, "grad_norm": 0.6495189887764161, "learning_rate": 0.0004997839289818823, "loss": 3.3054168224334717, "step": 2135, "token_acc": 0.2681828124920513 }, { "epoch": 1.2521254763998828, "grad_norm": 0.5852846148002033, "learning_rate": 0.0004997829206297257, "loss": 3.3067233562469482, "step": 2136, "token_acc": 0.2691477116460054 }, { "epoch": 1.2527118147170917, "grad_norm": 0.4558384604605822, "learning_rate": 0.0004997819099311953, "loss": 3.291916847229004, "step": 2137, "token_acc": 0.2710718645635584 }, { "epoch": 1.2532981530343008, "grad_norm": 0.4837480430820536, "learning_rate": 0.0004997808968863005, "loss": 3.2962217330932617, "step": 2138, "token_acc": 0.268129293252959 }, { "epoch": 1.25388449135151, "grad_norm": 0.5853034581896214, "learning_rate": 0.0004997798814950506, "loss": 3.2862343788146973, "step": 2139, "token_acc": 0.2698820319396517 }, { "epoch": 1.2544708296687188, "grad_norm": 0.4442622420681341, "learning_rate": 0.0004997788637574554, "loss": 3.352389335632324, "step": 2140, "token_acc": 0.2634235055776913 }, { "epoch": 1.255057167985928, "grad_norm": 0.47521460412611005, "learning_rate": 0.0004997778436735243, "loss": 3.2933759689331055, "step": 2141, "token_acc": 0.27030047410064184 }, { "epoch": 1.2556435063031368, "grad_norm": 0.4943872005971034, "learning_rate": 0.0004997768212432669, "loss": 3.2984228134155273, "step": 2142, "token_acc": 0.26990768306485785 }, { "epoch": 1.256229844620346, "grad_norm": 0.4525343170053042, "learning_rate": 0.000499775796466693, "loss": 3.297734260559082, "step": 2143, "token_acc": 0.26891125437655844 }, { "epoch": 1.256816182937555, "grad_norm": 0.5259778817212585, "learning_rate": 0.000499774769343812, "loss": 3.3735077381134033, "step": 2144, "token_acc": 0.26067582715786375 }, { "epoch": 1.257402521254764, "grad_norm": 0.591554418910151, "learning_rate": 0.0004997737398746336, "loss": 3.3246655464172363, "step": 2145, "token_acc": 0.26484505129592495 }, { "epoch": 1.257988859571973, "grad_norm": 0.5442635455552316, "learning_rate": 0.0004997727080591674, "loss": 3.3255317211151123, "step": 2146, "token_acc": 0.26505455770676206 }, { "epoch": 1.258575197889182, "grad_norm": 0.4990162605324992, "learning_rate": 0.0004997716738974233, "loss": 3.2527995109558105, "step": 2147, "token_acc": 0.2746930182999369 }, { "epoch": 1.259161536206391, "grad_norm": 0.500296782374342, "learning_rate": 0.0004997706373894109, "loss": 3.3446402549743652, "step": 2148, "token_acc": 0.2630214093401774 }, { "epoch": 1.2597478745236002, "grad_norm": 0.45665751983693237, "learning_rate": 0.0004997695985351398, "loss": 3.256342887878418, "step": 2149, "token_acc": 0.27315003671371463 }, { "epoch": 1.2603342128408093, "grad_norm": 0.3918018089168429, "learning_rate": 0.00049976855733462, "loss": 3.2846970558166504, "step": 2150, "token_acc": 0.27114200481390005 }, { "epoch": 1.2609205511580182, "grad_norm": 0.41335462526156974, "learning_rate": 0.0004997675137878611, "loss": 3.288651943206787, "step": 2151, "token_acc": 0.2700356643268516 }, { "epoch": 1.2615068894752273, "grad_norm": 0.40587142651340774, "learning_rate": 0.000499766467894873, "loss": 3.2813515663146973, "step": 2152, "token_acc": 0.27146011808742226 }, { "epoch": 1.2620932277924362, "grad_norm": 0.39450349765745246, "learning_rate": 0.0004997654196556656, "loss": 3.286311388015747, "step": 2153, "token_acc": 0.2697991785183897 }, { "epoch": 1.2626795661096453, "grad_norm": 0.44544252814626434, "learning_rate": 0.0004997643690702486, "loss": 3.301556348800659, "step": 2154, "token_acc": 0.26922668727600274 }, { "epoch": 1.2632659044268544, "grad_norm": 0.5963410556025828, "learning_rate": 0.0004997633161386318, "loss": 3.2753562927246094, "step": 2155, "token_acc": 0.27290407705502046 }, { "epoch": 1.2638522427440633, "grad_norm": 0.44640056642794085, "learning_rate": 0.0004997622608608253, "loss": 3.2942678928375244, "step": 2156, "token_acc": 0.26906797266136234 }, { "epoch": 1.2644385810612724, "grad_norm": 0.48046694174735866, "learning_rate": 0.000499761203236839, "loss": 3.3624701499938965, "step": 2157, "token_acc": 0.2598998013347575 }, { "epoch": 1.2650249193784813, "grad_norm": 0.5122597343770945, "learning_rate": 0.0004997601432666826, "loss": 3.335897922515869, "step": 2158, "token_acc": 0.2632955307042328 }, { "epoch": 1.2656112576956904, "grad_norm": 0.5243895145402654, "learning_rate": 0.0004997590809503662, "loss": 3.283290386199951, "step": 2159, "token_acc": 0.2707971217154701 }, { "epoch": 1.2661975960128995, "grad_norm": 0.4555532403765818, "learning_rate": 0.0004997580162879, "loss": 3.280341386795044, "step": 2160, "token_acc": 0.27032714112894957 }, { "epoch": 1.2667839343301086, "grad_norm": 0.4809312677690126, "learning_rate": 0.0004997569492792936, "loss": 3.299182891845703, "step": 2161, "token_acc": 0.26816311087431566 }, { "epoch": 1.2673702726473175, "grad_norm": 0.5189217449957898, "learning_rate": 0.0004997558799245572, "loss": 3.269404649734497, "step": 2162, "token_acc": 0.2733627899637052 }, { "epoch": 1.2679566109645266, "grad_norm": 0.47306107009117765, "learning_rate": 0.000499754808223701, "loss": 3.3310341835021973, "step": 2163, "token_acc": 0.26330186040937 }, { "epoch": 1.2685429492817355, "grad_norm": 0.5576696509813547, "learning_rate": 0.0004997537341767348, "loss": 3.2655444145202637, "step": 2164, "token_acc": 0.27113668662488644 }, { "epoch": 1.2691292875989446, "grad_norm": 0.5634013829567075, "learning_rate": 0.0004997526577836689, "loss": 3.2641098499298096, "step": 2165, "token_acc": 0.27565313904434147 }, { "epoch": 1.2697156259161537, "grad_norm": 0.5006032753112797, "learning_rate": 0.0004997515790445133, "loss": 3.29668927192688, "step": 2166, "token_acc": 0.26810630976886207 }, { "epoch": 1.2703019642333626, "grad_norm": 0.5463232411828371, "learning_rate": 0.0004997504979592781, "loss": 3.3136603832244873, "step": 2167, "token_acc": 0.26847550675675674 }, { "epoch": 1.2708883025505717, "grad_norm": 0.6108799668056303, "learning_rate": 0.0004997494145279735, "loss": 3.3317110538482666, "step": 2168, "token_acc": 0.26250752845369424 }, { "epoch": 1.2714746408677806, "grad_norm": 0.5068173858876764, "learning_rate": 0.0004997483287506098, "loss": 3.325237274169922, "step": 2169, "token_acc": 0.2638804811796663 }, { "epoch": 1.2720609791849897, "grad_norm": 0.4936194502911628, "learning_rate": 0.000499747240627197, "loss": 3.3076202869415283, "step": 2170, "token_acc": 0.2673911492882544 }, { "epoch": 1.2726473175021988, "grad_norm": 0.5033630673852391, "learning_rate": 0.0004997461501577455, "loss": 3.3397345542907715, "step": 2171, "token_acc": 0.2633098611537578 }, { "epoch": 1.2732336558194077, "grad_norm": 0.4178462490460003, "learning_rate": 0.0004997450573422654, "loss": 3.327944755554199, "step": 2172, "token_acc": 0.2650836931585574 }, { "epoch": 1.2738199941366168, "grad_norm": 0.452308088754542, "learning_rate": 0.0004997439621807671, "loss": 3.3097381591796875, "step": 2173, "token_acc": 0.267644548518405 }, { "epoch": 1.2744063324538257, "grad_norm": 0.4931493824872053, "learning_rate": 0.0004997428646732607, "loss": 3.303246259689331, "step": 2174, "token_acc": 0.26850617613080957 }, { "epoch": 1.2749926707710348, "grad_norm": 0.43023303548659314, "learning_rate": 0.0004997417648197566, "loss": 3.3206167221069336, "step": 2175, "token_acc": 0.2657444993148383 }, { "epoch": 1.275579009088244, "grad_norm": 0.41911010236334423, "learning_rate": 0.0004997406626202653, "loss": 3.3061723709106445, "step": 2176, "token_acc": 0.26769735288852453 }, { "epoch": 1.276165347405453, "grad_norm": 0.4209302323210968, "learning_rate": 0.0004997395580747969, "loss": 3.298943519592285, "step": 2177, "token_acc": 0.2690348106492787 }, { "epoch": 1.276751685722662, "grad_norm": 0.4617857219841965, "learning_rate": 0.0004997384511833619, "loss": 3.3021743297576904, "step": 2178, "token_acc": 0.26724531749657376 }, { "epoch": 1.277338024039871, "grad_norm": 0.41443708513598293, "learning_rate": 0.0004997373419459707, "loss": 3.288419485092163, "step": 2179, "token_acc": 0.26922466332118017 }, { "epoch": 1.27792436235708, "grad_norm": 0.5068318801664615, "learning_rate": 0.0004997362303626337, "loss": 3.3216655254364014, "step": 2180, "token_acc": 0.2640912369749846 }, { "epoch": 1.278510700674289, "grad_norm": 0.5615175594461506, "learning_rate": 0.0004997351164333612, "loss": 3.3075475692749023, "step": 2181, "token_acc": 0.2668439366562251 }, { "epoch": 1.2790970389914982, "grad_norm": 0.6616765764690321, "learning_rate": 0.0004997340001581639, "loss": 3.342186689376831, "step": 2182, "token_acc": 0.26381982130697645 }, { "epoch": 1.279683377308707, "grad_norm": 0.5792090207835741, "learning_rate": 0.0004997328815370524, "loss": 3.299501657485962, "step": 2183, "token_acc": 0.26922838469685617 }, { "epoch": 1.2802697156259162, "grad_norm": 0.5036005331718914, "learning_rate": 0.0004997317605700366, "loss": 3.2714381217956543, "step": 2184, "token_acc": 0.27095021571896166 }, { "epoch": 1.280856053943125, "grad_norm": 0.5290923695339709, "learning_rate": 0.0004997306372571278, "loss": 3.3222498893737793, "step": 2185, "token_acc": 0.26354727080282764 }, { "epoch": 1.2814423922603342, "grad_norm": 0.5654401740763333, "learning_rate": 0.0004997295115983359, "loss": 3.2666594982147217, "step": 2186, "token_acc": 0.2723970572424703 }, { "epoch": 1.2820287305775433, "grad_norm": 0.4596166410423911, "learning_rate": 0.0004997283835936719, "loss": 3.336409091949463, "step": 2187, "token_acc": 0.2625966165499362 }, { "epoch": 1.2826150688947524, "grad_norm": 0.5382056117059197, "learning_rate": 0.0004997272532431462, "loss": 3.325645923614502, "step": 2188, "token_acc": 0.264492223561876 }, { "epoch": 1.2832014072119613, "grad_norm": 0.5291570545348604, "learning_rate": 0.0004997261205467694, "loss": 3.27892804145813, "step": 2189, "token_acc": 0.27071375428567046 }, { "epoch": 1.2837877455291704, "grad_norm": 0.4862945935625387, "learning_rate": 0.0004997249855045523, "loss": 3.2991814613342285, "step": 2190, "token_acc": 0.2692623266410517 }, { "epoch": 1.2843740838463793, "grad_norm": 0.49025649432895674, "learning_rate": 0.0004997238481165055, "loss": 3.299318790435791, "step": 2191, "token_acc": 0.26810181380684595 }, { "epoch": 1.2849604221635884, "grad_norm": 0.47720798043497037, "learning_rate": 0.0004997227083826396, "loss": 3.3071675300598145, "step": 2192, "token_acc": 0.26961167415106335 }, { "epoch": 1.2855467604807975, "grad_norm": 0.46856350221331255, "learning_rate": 0.0004997215663029654, "loss": 3.3346195220947266, "step": 2193, "token_acc": 0.262274133468022 }, { "epoch": 1.2861330987980064, "grad_norm": 0.444839551729711, "learning_rate": 0.0004997204218774936, "loss": 3.270496368408203, "step": 2194, "token_acc": 0.27275033785707997 }, { "epoch": 1.2867194371152155, "grad_norm": 0.5146535595751268, "learning_rate": 0.0004997192751062349, "loss": 3.30252742767334, "step": 2195, "token_acc": 0.2686239244619755 }, { "epoch": 1.2873057754324244, "grad_norm": 0.5829457858013953, "learning_rate": 0.0004997181259892001, "loss": 3.376220226287842, "step": 2196, "token_acc": 0.258965831284834 }, { "epoch": 1.2878921137496335, "grad_norm": 0.5547005144293703, "learning_rate": 0.0004997169745264, "loss": 3.288825750350952, "step": 2197, "token_acc": 0.2712078192662799 }, { "epoch": 1.2884784520668426, "grad_norm": 0.4379835936649123, "learning_rate": 0.0004997158207178454, "loss": 3.3246407508850098, "step": 2198, "token_acc": 0.26629575356329593 }, { "epoch": 1.2890647903840515, "grad_norm": 0.48753978266355136, "learning_rate": 0.0004997146645635473, "loss": 3.3396501541137695, "step": 2199, "token_acc": 0.2652016704013037 }, { "epoch": 1.2896511287012606, "grad_norm": 0.6188543684437843, "learning_rate": 0.0004997135060635163, "loss": 3.2410683631896973, "step": 2200, "token_acc": 0.2769529270604421 }, { "epoch": 1.2902374670184695, "grad_norm": 0.5105522784304846, "learning_rate": 0.0004997123452177635, "loss": 3.3327670097351074, "step": 2201, "token_acc": 0.2650352519103809 }, { "epoch": 1.2908238053356786, "grad_norm": 0.49442579958638455, "learning_rate": 0.0004997111820262995, "loss": 3.271937608718872, "step": 2202, "token_acc": 0.27052095629759754 }, { "epoch": 1.2914101436528878, "grad_norm": 0.5361497376013924, "learning_rate": 0.0004997100164891356, "loss": 3.231008291244507, "step": 2203, "token_acc": 0.27780742507952455 }, { "epoch": 1.2919964819700969, "grad_norm": 0.4377373573893023, "learning_rate": 0.0004997088486062825, "loss": 3.3321752548217773, "step": 2204, "token_acc": 0.26604221105527637 }, { "epoch": 1.2925828202873058, "grad_norm": 0.46926770006513724, "learning_rate": 0.0004997076783777513, "loss": 3.2693886756896973, "step": 2205, "token_acc": 0.2738393943343761 }, { "epoch": 1.2931691586045149, "grad_norm": 0.4960038827735289, "learning_rate": 0.0004997065058035531, "loss": 3.2350211143493652, "step": 2206, "token_acc": 0.27611864695971167 }, { "epoch": 1.2937554969217238, "grad_norm": 0.5083223173561959, "learning_rate": 0.0004997053308836985, "loss": 3.3048973083496094, "step": 2207, "token_acc": 0.26771323662834756 }, { "epoch": 1.2943418352389329, "grad_norm": 0.4522092033218727, "learning_rate": 0.0004997041536181989, "loss": 3.2984819412231445, "step": 2208, "token_acc": 0.2691531380152969 }, { "epoch": 1.294928173556142, "grad_norm": 0.48258725121531354, "learning_rate": 0.0004997029740070653, "loss": 3.3176679611206055, "step": 2209, "token_acc": 0.26765786536963343 }, { "epoch": 1.2955145118733509, "grad_norm": 0.42293475133752956, "learning_rate": 0.0004997017920503088, "loss": 3.3052749633789062, "step": 2210, "token_acc": 0.2670055158665964 }, { "epoch": 1.29610085019056, "grad_norm": 0.44979381404927116, "learning_rate": 0.0004997006077479402, "loss": 3.3270926475524902, "step": 2211, "token_acc": 0.26639968670536357 }, { "epoch": 1.2966871885077689, "grad_norm": 0.450920512801608, "learning_rate": 0.0004996994210999711, "loss": 3.2868075370788574, "step": 2212, "token_acc": 0.27022886119658035 }, { "epoch": 1.297273526824978, "grad_norm": 0.4264177799939463, "learning_rate": 0.0004996982321064123, "loss": 3.282705307006836, "step": 2213, "token_acc": 0.2722188641424239 }, { "epoch": 1.297859865142187, "grad_norm": 0.4003885566222595, "learning_rate": 0.0004996970407672751, "loss": 3.299790620803833, "step": 2214, "token_acc": 0.2674795253347281 }, { "epoch": 1.2984462034593962, "grad_norm": 0.3771369900651132, "learning_rate": 0.0004996958470825706, "loss": 3.3144707679748535, "step": 2215, "token_acc": 0.26723066436790827 }, { "epoch": 1.299032541776605, "grad_norm": 0.5038678633378196, "learning_rate": 0.0004996946510523102, "loss": 3.293795585632324, "step": 2216, "token_acc": 0.2704631703250542 }, { "epoch": 1.2996188800938142, "grad_norm": 0.4888576301119778, "learning_rate": 0.0004996934526765051, "loss": 3.316286087036133, "step": 2217, "token_acc": 0.26507311601002703 }, { "epoch": 1.300205218411023, "grad_norm": 0.49161036521898716, "learning_rate": 0.0004996922519551663, "loss": 3.3045406341552734, "step": 2218, "token_acc": 0.2670406980069572 }, { "epoch": 1.3007915567282322, "grad_norm": 0.49902279908074704, "learning_rate": 0.0004996910488883053, "loss": 3.2669148445129395, "step": 2219, "token_acc": 0.2716930840395937 }, { "epoch": 1.3013778950454413, "grad_norm": 0.4951241312204706, "learning_rate": 0.0004996898434759334, "loss": 3.271190643310547, "step": 2220, "token_acc": 0.2707422658084419 }, { "epoch": 1.3019642333626502, "grad_norm": 0.47242105791113687, "learning_rate": 0.0004996886357180619, "loss": 3.3263590335845947, "step": 2221, "token_acc": 0.2663918028399651 }, { "epoch": 1.3025505716798593, "grad_norm": 0.4470259326086328, "learning_rate": 0.0004996874256147021, "loss": 3.2767844200134277, "step": 2222, "token_acc": 0.2722967517545494 }, { "epoch": 1.3031369099970682, "grad_norm": 0.4701214307860167, "learning_rate": 0.0004996862131658653, "loss": 3.268568277359009, "step": 2223, "token_acc": 0.27282175690822885 }, { "epoch": 1.3037232483142773, "grad_norm": 0.45613686172146267, "learning_rate": 0.0004996849983715631, "loss": 3.2823128700256348, "step": 2224, "token_acc": 0.2709843818683501 }, { "epoch": 1.3043095866314864, "grad_norm": 0.421122404917672, "learning_rate": 0.0004996837812318068, "loss": 3.2828450202941895, "step": 2225, "token_acc": 0.2702336235453052 }, { "epoch": 1.3048959249486953, "grad_norm": 0.45499029353645193, "learning_rate": 0.0004996825617466078, "loss": 3.3510751724243164, "step": 2226, "token_acc": 0.26293417441912076 }, { "epoch": 1.3054822632659044, "grad_norm": 0.49747402890776315, "learning_rate": 0.0004996813399159776, "loss": 3.3093278408050537, "step": 2227, "token_acc": 0.2676202149372541 }, { "epoch": 1.3060686015831133, "grad_norm": 0.4645288054654548, "learning_rate": 0.0004996801157399277, "loss": 3.2721948623657227, "step": 2228, "token_acc": 0.27333101538600074 }, { "epoch": 1.3066549399003224, "grad_norm": 0.4128075605110155, "learning_rate": 0.0004996788892184694, "loss": 3.2565155029296875, "step": 2229, "token_acc": 0.2749232853374932 }, { "epoch": 1.3072412782175316, "grad_norm": 0.38658078362225445, "learning_rate": 0.0004996776603516146, "loss": 3.2716047763824463, "step": 2230, "token_acc": 0.27161358250796197 }, { "epoch": 1.3078276165347407, "grad_norm": 0.39847331921011103, "learning_rate": 0.0004996764291393744, "loss": 3.283726453781128, "step": 2231, "token_acc": 0.271840397646942 }, { "epoch": 1.3084139548519496, "grad_norm": 0.4277870017027911, "learning_rate": 0.0004996751955817607, "loss": 3.3273580074310303, "step": 2232, "token_acc": 0.26410638319578056 }, { "epoch": 1.3090002931691587, "grad_norm": 0.4483263796533055, "learning_rate": 0.0004996739596787851, "loss": 3.295280694961548, "step": 2233, "token_acc": 0.268735041314783 }, { "epoch": 1.3095866314863676, "grad_norm": 0.4308812073059536, "learning_rate": 0.0004996727214304588, "loss": 3.275662422180176, "step": 2234, "token_acc": 0.2710301776487783 }, { "epoch": 1.3101729698035767, "grad_norm": 0.5177150331109224, "learning_rate": 0.0004996714808367939, "loss": 3.3067426681518555, "step": 2235, "token_acc": 0.2681522664155393 }, { "epoch": 1.3107593081207858, "grad_norm": 0.6754298317599252, "learning_rate": 0.000499670237897802, "loss": 3.338925361633301, "step": 2236, "token_acc": 0.26438869278413474 }, { "epoch": 1.3113456464379947, "grad_norm": 0.6639512376373334, "learning_rate": 0.0004996689926134944, "loss": 3.334921360015869, "step": 2237, "token_acc": 0.26212603537283896 }, { "epoch": 1.3119319847552038, "grad_norm": 0.5436504678072306, "learning_rate": 0.0004996677449838833, "loss": 3.310713052749634, "step": 2238, "token_acc": 0.26560021293585306 }, { "epoch": 1.3125183230724127, "grad_norm": 0.44233419944564484, "learning_rate": 0.0004996664950089799, "loss": 3.325927495956421, "step": 2239, "token_acc": 0.26413510715070126 }, { "epoch": 1.3131046613896218, "grad_norm": 0.4728014412617763, "learning_rate": 0.0004996652426887964, "loss": 3.269191265106201, "step": 2240, "token_acc": 0.2717898837587282 }, { "epoch": 1.313690999706831, "grad_norm": 0.4594942925677933, "learning_rate": 0.0004996639880233443, "loss": 3.245236396789551, "step": 2241, "token_acc": 0.27534943081346774 }, { "epoch": 1.31427733802404, "grad_norm": 0.4047002324070713, "learning_rate": 0.0004996627310126354, "loss": 3.3222436904907227, "step": 2242, "token_acc": 0.26485410842931845 }, { "epoch": 1.314863676341249, "grad_norm": 0.43657293632927474, "learning_rate": 0.0004996614716566817, "loss": 3.2813055515289307, "step": 2243, "token_acc": 0.27142814740460713 }, { "epoch": 1.315450014658458, "grad_norm": 0.4510626863038871, "learning_rate": 0.0004996602099554948, "loss": 3.2938055992126465, "step": 2244, "token_acc": 0.26900252424179355 }, { "epoch": 1.316036352975667, "grad_norm": 0.5363536105881868, "learning_rate": 0.0004996589459090867, "loss": 3.26090669631958, "step": 2245, "token_acc": 0.2737723614673299 }, { "epoch": 1.316622691292876, "grad_norm": 0.5140672080798115, "learning_rate": 0.0004996576795174692, "loss": 3.293609142303467, "step": 2246, "token_acc": 0.26938525775755917 }, { "epoch": 1.3172090296100851, "grad_norm": 0.5974246786366461, "learning_rate": 0.0004996564107806542, "loss": 3.295177459716797, "step": 2247, "token_acc": 0.26836824134277004 }, { "epoch": 1.317795367927294, "grad_norm": 0.5758398581578759, "learning_rate": 0.0004996551396986537, "loss": 3.301840305328369, "step": 2248, "token_acc": 0.2677889599226528 }, { "epoch": 1.3183817062445031, "grad_norm": 0.4620066722988521, "learning_rate": 0.0004996538662714795, "loss": 3.261955499649048, "step": 2249, "token_acc": 0.2727928594311194 }, { "epoch": 1.318968044561712, "grad_norm": 0.5337207741945489, "learning_rate": 0.0004996525904991437, "loss": 3.300217628479004, "step": 2250, "token_acc": 0.26733015965676765 }, { "epoch": 1.3195543828789211, "grad_norm": 0.5346345116942756, "learning_rate": 0.0004996513123816581, "loss": 3.229556083679199, "step": 2251, "token_acc": 0.2780900241338538 }, { "epoch": 1.3201407211961302, "grad_norm": 0.47281419659908236, "learning_rate": 0.000499650031919035, "loss": 3.346834421157837, "step": 2252, "token_acc": 0.2626302328413734 }, { "epoch": 1.3207270595133391, "grad_norm": 0.41286466468372207, "learning_rate": 0.0004996487491112862, "loss": 3.3068716526031494, "step": 2253, "token_acc": 0.2681019883830737 }, { "epoch": 1.3213133978305482, "grad_norm": 0.3552851497697773, "learning_rate": 0.0004996474639584239, "loss": 3.301759719848633, "step": 2254, "token_acc": 0.2693165916568965 }, { "epoch": 1.3218997361477571, "grad_norm": 0.40354750288115926, "learning_rate": 0.0004996461764604598, "loss": 3.307332992553711, "step": 2255, "token_acc": 0.26676426377360957 }, { "epoch": 1.3224860744649662, "grad_norm": 0.4415890002563942, "learning_rate": 0.0004996448866174065, "loss": 3.3165102005004883, "step": 2256, "token_acc": 0.2678464511927396 }, { "epoch": 1.3230724127821754, "grad_norm": 0.4280525055945892, "learning_rate": 0.0004996435944292759, "loss": 3.23978328704834, "step": 2257, "token_acc": 0.27339656430324333 }, { "epoch": 1.3236587510993845, "grad_norm": 0.475638259301756, "learning_rate": 0.00049964229989608, "loss": 3.2623562812805176, "step": 2258, "token_acc": 0.2739527337308803 }, { "epoch": 1.3242450894165934, "grad_norm": 0.4595647603294476, "learning_rate": 0.0004996410030178312, "loss": 3.3211758136749268, "step": 2259, "token_acc": 0.266445414396033 }, { "epoch": 1.3248314277338025, "grad_norm": 0.5053541195600768, "learning_rate": 0.0004996397037945415, "loss": 3.2797911167144775, "step": 2260, "token_acc": 0.2707438290639041 }, { "epoch": 1.3254177660510114, "grad_norm": 0.48673634177852815, "learning_rate": 0.0004996384022262233, "loss": 3.323958396911621, "step": 2261, "token_acc": 0.2645838231370508 }, { "epoch": 1.3260041043682205, "grad_norm": 0.41554370340870184, "learning_rate": 0.0004996370983128885, "loss": 3.2735414505004883, "step": 2262, "token_acc": 0.2699962467673356 }, { "epoch": 1.3265904426854296, "grad_norm": 0.5160196316368776, "learning_rate": 0.0004996357920545497, "loss": 3.2709333896636963, "step": 2263, "token_acc": 0.2716877261049651 }, { "epoch": 1.3271767810026385, "grad_norm": 0.4527932131855336, "learning_rate": 0.0004996344834512189, "loss": 3.258944034576416, "step": 2264, "token_acc": 0.27525545611971075 }, { "epoch": 1.3277631193198476, "grad_norm": 0.4675045985524346, "learning_rate": 0.0004996331725029086, "loss": 3.305373191833496, "step": 2265, "token_acc": 0.26737845288553125 }, { "epoch": 1.3283494576370565, "grad_norm": 0.4629519694568142, "learning_rate": 0.0004996318592096311, "loss": 3.296513319015503, "step": 2266, "token_acc": 0.2673530185100566 }, { "epoch": 1.3289357959542656, "grad_norm": 0.5105431300050036, "learning_rate": 0.0004996305435713985, "loss": 3.27852725982666, "step": 2267, "token_acc": 0.2728006061605707 }, { "epoch": 1.3295221342714747, "grad_norm": 0.47340393460504554, "learning_rate": 0.0004996292255882236, "loss": 3.2675280570983887, "step": 2268, "token_acc": 0.2701743534232806 }, { "epoch": 1.3301084725886836, "grad_norm": 0.5127267217317832, "learning_rate": 0.0004996279052601183, "loss": 3.330883502960205, "step": 2269, "token_acc": 0.2640624258310874 }, { "epoch": 1.3306948109058927, "grad_norm": 0.43257039177282136, "learning_rate": 0.0004996265825870952, "loss": 3.2582836151123047, "step": 2270, "token_acc": 0.2736420648906396 }, { "epoch": 1.3312811492231018, "grad_norm": 0.44015672619747376, "learning_rate": 0.0004996252575691668, "loss": 3.279317617416382, "step": 2271, "token_acc": 0.2696132804421801 }, { "epoch": 1.3318674875403107, "grad_norm": 0.42535056449967384, "learning_rate": 0.0004996239302063454, "loss": 3.2693228721618652, "step": 2272, "token_acc": 0.2711299573822672 }, { "epoch": 1.3324538258575198, "grad_norm": 0.3789332170904118, "learning_rate": 0.0004996226004986436, "loss": 3.3017120361328125, "step": 2273, "token_acc": 0.2673867070668089 }, { "epoch": 1.333040164174729, "grad_norm": 0.38054651342068413, "learning_rate": 0.0004996212684460738, "loss": 3.308743953704834, "step": 2274, "token_acc": 0.2680272828213398 }, { "epoch": 1.3336265024919378, "grad_norm": 0.47217391077928006, "learning_rate": 0.0004996199340486486, "loss": 3.3293802738189697, "step": 2275, "token_acc": 0.26308100618925445 }, { "epoch": 1.334212840809147, "grad_norm": 0.45629975872730644, "learning_rate": 0.0004996185973063805, "loss": 3.3250808715820312, "step": 2276, "token_acc": 0.26548479225069427 }, { "epoch": 1.3347991791263558, "grad_norm": 0.4577758222155869, "learning_rate": 0.000499617258219282, "loss": 3.2994184494018555, "step": 2277, "token_acc": 0.2672851523301088 }, { "epoch": 1.335385517443565, "grad_norm": 0.4226715538233513, "learning_rate": 0.0004996159167873658, "loss": 3.2190985679626465, "step": 2278, "token_acc": 0.2786667582983987 }, { "epoch": 1.335971855760774, "grad_norm": 0.44332977385741973, "learning_rate": 0.0004996145730106443, "loss": 3.303474187850952, "step": 2279, "token_acc": 0.2657028668566365 }, { "epoch": 1.336558194077983, "grad_norm": 0.4584662784562282, "learning_rate": 0.0004996132268891303, "loss": 3.287426710128784, "step": 2280, "token_acc": 0.2704973815030438 }, { "epoch": 1.337144532395192, "grad_norm": 0.42453976240035424, "learning_rate": 0.0004996118784228364, "loss": 3.30275821685791, "step": 2281, "token_acc": 0.26814612387662456 }, { "epoch": 1.337730870712401, "grad_norm": 0.4110682757956792, "learning_rate": 0.0004996105276117753, "loss": 3.2236876487731934, "step": 2282, "token_acc": 0.2783025933040254 }, { "epoch": 1.33831720902961, "grad_norm": 0.586991189478905, "learning_rate": 0.0004996091744559596, "loss": 3.2606353759765625, "step": 2283, "token_acc": 0.2726600103637461 }, { "epoch": 1.3389035473468192, "grad_norm": 0.6280868758459562, "learning_rate": 0.000499607818955402, "loss": 3.308624744415283, "step": 2284, "token_acc": 0.2663674410458432 }, { "epoch": 1.3394898856640283, "grad_norm": 0.5655773034573501, "learning_rate": 0.0004996064611101154, "loss": 3.3003199100494385, "step": 2285, "token_acc": 0.26738473743630425 }, { "epoch": 1.3400762239812372, "grad_norm": 0.5286060506208545, "learning_rate": 0.0004996051009201124, "loss": 3.3941969871520996, "step": 2286, "token_acc": 0.25605105916673454 }, { "epoch": 1.3406625622984463, "grad_norm": 0.4661800299005168, "learning_rate": 0.0004996037383854058, "loss": 3.249300241470337, "step": 2287, "token_acc": 0.2745747967330554 }, { "epoch": 1.3412489006156552, "grad_norm": 0.5251298017337019, "learning_rate": 0.0004996023735060085, "loss": 3.324374198913574, "step": 2288, "token_acc": 0.26312886577630695 }, { "epoch": 1.3418352389328643, "grad_norm": 0.550042515084715, "learning_rate": 0.0004996010062819332, "loss": 3.313662052154541, "step": 2289, "token_acc": 0.2677766580803709 }, { "epoch": 1.3424215772500734, "grad_norm": 0.5165924590640047, "learning_rate": 0.0004995996367131927, "loss": 3.2825393676757812, "step": 2290, "token_acc": 0.26989245332148015 }, { "epoch": 1.3430079155672823, "grad_norm": 0.44091511025345437, "learning_rate": 0.0004995982647998, "loss": 3.2825889587402344, "step": 2291, "token_acc": 0.2711889338944139 }, { "epoch": 1.3435942538844914, "grad_norm": 0.4724264065802057, "learning_rate": 0.0004995968905417681, "loss": 3.2880802154541016, "step": 2292, "token_acc": 0.26786588225697916 }, { "epoch": 1.3441805922017003, "grad_norm": 0.5183179885282589, "learning_rate": 0.0004995955139391095, "loss": 3.313777446746826, "step": 2293, "token_acc": 0.2675678611329499 }, { "epoch": 1.3447669305189094, "grad_norm": 0.4970497442722069, "learning_rate": 0.0004995941349918375, "loss": 3.3378379344940186, "step": 2294, "token_acc": 0.2633278464962806 }, { "epoch": 1.3453532688361185, "grad_norm": 0.4361746270254895, "learning_rate": 0.0004995927536999649, "loss": 3.228069543838501, "step": 2295, "token_acc": 0.27746875521643843 }, { "epoch": 1.3459396071533274, "grad_norm": 0.4867497115962423, "learning_rate": 0.0004995913700635049, "loss": 3.2935121059417725, "step": 2296, "token_acc": 0.26780899203780323 }, { "epoch": 1.3465259454705365, "grad_norm": 0.38188670290680665, "learning_rate": 0.0004995899840824701, "loss": 3.2966370582580566, "step": 2297, "token_acc": 0.2684492923783913 }, { "epoch": 1.3471122837877456, "grad_norm": 0.3774444653300807, "learning_rate": 0.0004995885957568738, "loss": 3.2749247550964355, "step": 2298, "token_acc": 0.27121060549967313 }, { "epoch": 1.3476986221049545, "grad_norm": 0.38471692605844315, "learning_rate": 0.0004995872050867289, "loss": 3.338170051574707, "step": 2299, "token_acc": 0.2624641109445819 }, { "epoch": 1.3482849604221636, "grad_norm": 0.46373422006992837, "learning_rate": 0.0004995858120720486, "loss": 3.276038646697998, "step": 2300, "token_acc": 0.27177721396641297 }, { "epoch": 1.3488712987393727, "grad_norm": 0.3931626896196003, "learning_rate": 0.0004995844167128458, "loss": 3.2484288215637207, "step": 2301, "token_acc": 0.27562802021468885 }, { "epoch": 1.3494576370565816, "grad_norm": 0.3881206083640519, "learning_rate": 0.0004995830190091338, "loss": 3.2473719120025635, "step": 2302, "token_acc": 0.273347781013177 }, { "epoch": 1.3500439753737907, "grad_norm": 0.395370304110828, "learning_rate": 0.0004995816189609258, "loss": 3.2789053916931152, "step": 2303, "token_acc": 0.2699337430857536 }, { "epoch": 1.3506303136909996, "grad_norm": 0.4289588746290127, "learning_rate": 0.0004995802165682346, "loss": 3.2638158798217773, "step": 2304, "token_acc": 0.2714800278416625 }, { "epoch": 1.3512166520082087, "grad_norm": 0.4488728805776394, "learning_rate": 0.0004995788118310737, "loss": 3.324845790863037, "step": 2305, "token_acc": 0.2650002583578773 }, { "epoch": 1.3518029903254178, "grad_norm": 0.43634082580398587, "learning_rate": 0.0004995774047494561, "loss": 3.2578701972961426, "step": 2306, "token_acc": 0.2740929941618015 }, { "epoch": 1.3523893286426267, "grad_norm": 0.4289543631479029, "learning_rate": 0.0004995759953233951, "loss": 3.2873027324676514, "step": 2307, "token_acc": 0.2705378412590828 }, { "epoch": 1.3529756669598358, "grad_norm": 0.43759863031655477, "learning_rate": 0.0004995745835529039, "loss": 3.3710827827453613, "step": 2308, "token_acc": 0.25938648875741843 }, { "epoch": 1.3535620052770447, "grad_norm": 0.42301958076543245, "learning_rate": 0.0004995731694379959, "loss": 3.342059850692749, "step": 2309, "token_acc": 0.2628744530461124 }, { "epoch": 1.3541483435942538, "grad_norm": 0.4192929394802892, "learning_rate": 0.0004995717529786843, "loss": 3.268679618835449, "step": 2310, "token_acc": 0.27265579027703185 }, { "epoch": 1.354734681911463, "grad_norm": 0.4133365334774994, "learning_rate": 0.0004995703341749824, "loss": 3.279419422149658, "step": 2311, "token_acc": 0.2715547606015928 }, { "epoch": 1.355321020228672, "grad_norm": 0.39031094425635504, "learning_rate": 0.0004995689130269034, "loss": 3.2595338821411133, "step": 2312, "token_acc": 0.2730151104341982 }, { "epoch": 1.355907358545881, "grad_norm": 0.388538711182958, "learning_rate": 0.0004995674895344607, "loss": 3.2669413089752197, "step": 2313, "token_acc": 0.2695651244105602 }, { "epoch": 1.35649369686309, "grad_norm": 0.43827821792690513, "learning_rate": 0.0004995660636976678, "loss": 3.3141121864318848, "step": 2314, "token_acc": 0.2647407037946106 }, { "epoch": 1.357080035180299, "grad_norm": 0.403572987995223, "learning_rate": 0.000499564635516538, "loss": 3.2570347785949707, "step": 2315, "token_acc": 0.2736133926288906 }, { "epoch": 1.357666373497508, "grad_norm": 0.38716576904109085, "learning_rate": 0.0004995632049910848, "loss": 3.2765936851501465, "step": 2316, "token_acc": 0.26916978230318456 }, { "epoch": 1.3582527118147172, "grad_norm": 0.35697105799472595, "learning_rate": 0.0004995617721213216, "loss": 3.264394521713257, "step": 2317, "token_acc": 0.2710391629471061 }, { "epoch": 1.358839050131926, "grad_norm": 0.3989666368614984, "learning_rate": 0.0004995603369072618, "loss": 3.278810977935791, "step": 2318, "token_acc": 0.2686326379286158 }, { "epoch": 1.3594253884491352, "grad_norm": 0.449243980301992, "learning_rate": 0.0004995588993489189, "loss": 3.260861396789551, "step": 2319, "token_acc": 0.27392802695466134 }, { "epoch": 1.360011726766344, "grad_norm": 0.4864857035347878, "learning_rate": 0.0004995574594463064, "loss": 3.23079514503479, "step": 2320, "token_acc": 0.27777124997062486 }, { "epoch": 1.3605980650835532, "grad_norm": 0.4936721820918022, "learning_rate": 0.0004995560171994379, "loss": 3.3202638626098633, "step": 2321, "token_acc": 0.26419777882493506 }, { "epoch": 1.3611844034007623, "grad_norm": 0.4254987751710783, "learning_rate": 0.0004995545726083269, "loss": 3.2858686447143555, "step": 2322, "token_acc": 0.2692841411428301 }, { "epoch": 1.3617707417179712, "grad_norm": 0.44402627792273136, "learning_rate": 0.000499553125672987, "loss": 3.297593593597412, "step": 2323, "token_acc": 0.2687846671060291 }, { "epoch": 1.3623570800351803, "grad_norm": 0.4434392199715045, "learning_rate": 0.0004995516763934317, "loss": 3.288408041000366, "step": 2324, "token_acc": 0.26962536631729017 }, { "epoch": 1.3629434183523892, "grad_norm": 0.4126780463451966, "learning_rate": 0.0004995502247696747, "loss": 3.310178756713867, "step": 2325, "token_acc": 0.2682862812208002 }, { "epoch": 1.3635297566695983, "grad_norm": 0.41784952101832085, "learning_rate": 0.0004995487708017297, "loss": 3.311824321746826, "step": 2326, "token_acc": 0.26665920548600264 }, { "epoch": 1.3641160949868074, "grad_norm": 0.4295150243882106, "learning_rate": 0.0004995473144896101, "loss": 3.2648866176605225, "step": 2327, "token_acc": 0.2720135374510627 }, { "epoch": 1.3647024333040165, "grad_norm": 0.41243937113242696, "learning_rate": 0.0004995458558333299, "loss": 3.3204498291015625, "step": 2328, "token_acc": 0.26296529835290516 }, { "epoch": 1.3652887716212254, "grad_norm": 0.39407214413357855, "learning_rate": 0.0004995443948329026, "loss": 3.254815101623535, "step": 2329, "token_acc": 0.2735143806105975 }, { "epoch": 1.3658751099384345, "grad_norm": 0.38047983844665134, "learning_rate": 0.0004995429314883419, "loss": 3.3011932373046875, "step": 2330, "token_acc": 0.2685477973230064 }, { "epoch": 1.3664614482556434, "grad_norm": 0.40643402206745066, "learning_rate": 0.0004995414657996617, "loss": 3.2614247798919678, "step": 2331, "token_acc": 0.27164901088919635 }, { "epoch": 1.3670477865728525, "grad_norm": 0.4471689434073979, "learning_rate": 0.0004995399977668756, "loss": 3.3060593605041504, "step": 2332, "token_acc": 0.26743122032307376 }, { "epoch": 1.3676341248900616, "grad_norm": 0.5243078654852139, "learning_rate": 0.0004995385273899976, "loss": 3.308076858520508, "step": 2333, "token_acc": 0.2677434292834062 }, { "epoch": 1.3682204632072705, "grad_norm": 0.5245984129948157, "learning_rate": 0.0004995370546690414, "loss": 3.2810144424438477, "step": 2334, "token_acc": 0.26962385997337646 }, { "epoch": 1.3688068015244796, "grad_norm": 0.5182262354992822, "learning_rate": 0.0004995355796040208, "loss": 3.255418300628662, "step": 2335, "token_acc": 0.2734937784445481 }, { "epoch": 1.3693931398416885, "grad_norm": 0.5616814596444533, "learning_rate": 0.0004995341021949496, "loss": 3.2526869773864746, "step": 2336, "token_acc": 0.27381642302755227 }, { "epoch": 1.3699794781588976, "grad_norm": 0.4682025185648729, "learning_rate": 0.0004995326224418418, "loss": 3.2829577922821045, "step": 2337, "token_acc": 0.27092491207156566 }, { "epoch": 1.3705658164761068, "grad_norm": 0.48052958982843025, "learning_rate": 0.0004995311403447112, "loss": 3.2882399559020996, "step": 2338, "token_acc": 0.2693247454068776 }, { "epoch": 1.3711521547933159, "grad_norm": 0.41281302953097315, "learning_rate": 0.0004995296559035719, "loss": 3.2644882202148438, "step": 2339, "token_acc": 0.27280497978566387 }, { "epoch": 1.3717384931105248, "grad_norm": 0.4153637311465592, "learning_rate": 0.0004995281691184377, "loss": 3.276925802230835, "step": 2340, "token_acc": 0.27157355089729424 }, { "epoch": 1.3723248314277339, "grad_norm": 0.4207738598214789, "learning_rate": 0.0004995266799893227, "loss": 3.311645746231079, "step": 2341, "token_acc": 0.26852677696318655 }, { "epoch": 1.3729111697449428, "grad_norm": 0.45763595935927626, "learning_rate": 0.0004995251885162406, "loss": 3.304471492767334, "step": 2342, "token_acc": 0.26985741787050227 }, { "epoch": 1.3734975080621519, "grad_norm": 0.4457647483233761, "learning_rate": 0.0004995236946992057, "loss": 3.270564079284668, "step": 2343, "token_acc": 0.27217660851032394 }, { "epoch": 1.374083846379361, "grad_norm": 0.503161367700388, "learning_rate": 0.0004995221985382318, "loss": 3.269784450531006, "step": 2344, "token_acc": 0.2721498195917928 }, { "epoch": 1.3746701846965699, "grad_norm": 0.5223022170174856, "learning_rate": 0.0004995207000333332, "loss": 3.3418283462524414, "step": 2345, "token_acc": 0.26224315851173097 }, { "epoch": 1.375256523013779, "grad_norm": 0.5800666288440978, "learning_rate": 0.0004995191991845238, "loss": 3.2474100589752197, "step": 2346, "token_acc": 0.27470501278144527 }, { "epoch": 1.3758428613309879, "grad_norm": 0.5152070546247417, "learning_rate": 0.0004995176959918179, "loss": 3.2900550365448, "step": 2347, "token_acc": 0.2713010399244459 }, { "epoch": 1.376429199648197, "grad_norm": 0.42299369180228974, "learning_rate": 0.0004995161904552293, "loss": 3.281383991241455, "step": 2348, "token_acc": 0.269463508425241 }, { "epoch": 1.377015537965406, "grad_norm": 0.45482496073797835, "learning_rate": 0.0004995146825747724, "loss": 3.294569969177246, "step": 2349, "token_acc": 0.2676048570655062 }, { "epoch": 1.377601876282615, "grad_norm": 0.4782786042192761, "learning_rate": 0.0004995131723504612, "loss": 3.2235167026519775, "step": 2350, "token_acc": 0.27888023149315744 }, { "epoch": 1.378188214599824, "grad_norm": 0.43237970628425565, "learning_rate": 0.0004995116597823101, "loss": 3.3272016048431396, "step": 2351, "token_acc": 0.2652090954964047 }, { "epoch": 1.378774552917033, "grad_norm": 0.3637994416873148, "learning_rate": 0.0004995101448703331, "loss": 3.293396234512329, "step": 2352, "token_acc": 0.26739950719180505 }, { "epoch": 1.379360891234242, "grad_norm": 0.36937918942289955, "learning_rate": 0.0004995086276145444, "loss": 3.31223726272583, "step": 2353, "token_acc": 0.267059812359802 }, { "epoch": 1.3799472295514512, "grad_norm": 0.41105314800442505, "learning_rate": 0.0004995071080149585, "loss": 3.3013970851898193, "step": 2354, "token_acc": 0.2671748681778832 }, { "epoch": 1.3805335678686603, "grad_norm": 0.3686336889621483, "learning_rate": 0.0004995055860715895, "loss": 3.28922700881958, "step": 2355, "token_acc": 0.27046273575167445 }, { "epoch": 1.3811199061858692, "grad_norm": 0.37225358441161743, "learning_rate": 0.0004995040617844517, "loss": 3.2997946739196777, "step": 2356, "token_acc": 0.26778001504338544 }, { "epoch": 1.3817062445030783, "grad_norm": 0.36752493475498094, "learning_rate": 0.0004995025351535596, "loss": 3.278599262237549, "step": 2357, "token_acc": 0.2707419137712428 }, { "epoch": 1.3822925828202872, "grad_norm": 0.37544761967342893, "learning_rate": 0.0004995010061789272, "loss": 3.2309446334838867, "step": 2358, "token_acc": 0.2764470685880567 }, { "epoch": 1.3828789211374963, "grad_norm": 0.4248999973595112, "learning_rate": 0.0004994994748605691, "loss": 3.2970943450927734, "step": 2359, "token_acc": 0.26786099322110557 }, { "epoch": 1.3834652594547054, "grad_norm": 0.38624077122980804, "learning_rate": 0.0004994979411984997, "loss": 3.2732152938842773, "step": 2360, "token_acc": 0.2685163721046728 }, { "epoch": 1.3840515977719143, "grad_norm": 0.41785692725258833, "learning_rate": 0.0004994964051927333, "loss": 3.324655771255493, "step": 2361, "token_acc": 0.26495019561477845 }, { "epoch": 1.3846379360891234, "grad_norm": 0.39420040668470063, "learning_rate": 0.0004994948668432844, "loss": 3.300771713256836, "step": 2362, "token_acc": 0.2674537960657567 }, { "epoch": 1.3852242744063323, "grad_norm": 0.4647716754742074, "learning_rate": 0.0004994933261501674, "loss": 3.2652158737182617, "step": 2363, "token_acc": 0.2705149971703452 }, { "epoch": 1.3858106127235414, "grad_norm": 0.4829481963299405, "learning_rate": 0.0004994917831133968, "loss": 3.2998428344726562, "step": 2364, "token_acc": 0.26695146904921824 }, { "epoch": 1.3863969510407506, "grad_norm": 0.49291837622427154, "learning_rate": 0.0004994902377329872, "loss": 3.2784509658813477, "step": 2365, "token_acc": 0.26893013020738277 }, { "epoch": 1.3869832893579597, "grad_norm": 0.48098713613840594, "learning_rate": 0.0004994886900089528, "loss": 3.314309597015381, "step": 2366, "token_acc": 0.2659981682831177 }, { "epoch": 1.3875696276751686, "grad_norm": 0.4231703034287215, "learning_rate": 0.0004994871399413085, "loss": 3.2711880207061768, "step": 2367, "token_acc": 0.2706885543471952 }, { "epoch": 1.3881559659923777, "grad_norm": 0.4364650107952657, "learning_rate": 0.0004994855875300687, "loss": 3.315361499786377, "step": 2368, "token_acc": 0.2669514764887567 }, { "epoch": 1.3887423043095866, "grad_norm": 0.4183616060764355, "learning_rate": 0.0004994840327752479, "loss": 3.3013198375701904, "step": 2369, "token_acc": 0.2665252477696106 }, { "epoch": 1.3893286426267957, "grad_norm": 0.40496518381769436, "learning_rate": 0.0004994824756768608, "loss": 3.289513349533081, "step": 2370, "token_acc": 0.26910134548039705 }, { "epoch": 1.3899149809440048, "grad_norm": 0.45478998416071864, "learning_rate": 0.0004994809162349222, "loss": 3.2043423652648926, "step": 2371, "token_acc": 0.28000396109561765 }, { "epoch": 1.3905013192612137, "grad_norm": 0.4449421931056942, "learning_rate": 0.0004994793544494464, "loss": 3.2940096855163574, "step": 2372, "token_acc": 0.2672462263742918 }, { "epoch": 1.3910876575784228, "grad_norm": 0.4158157231666166, "learning_rate": 0.0004994777903204483, "loss": 3.23759126663208, "step": 2373, "token_acc": 0.27440058125454103 }, { "epoch": 1.3916739958956317, "grad_norm": 0.361738811253168, "learning_rate": 0.0004994762238479426, "loss": 3.2651798725128174, "step": 2374, "token_acc": 0.26993871260936014 }, { "epoch": 1.3922603342128408, "grad_norm": 0.4403909832363792, "learning_rate": 0.0004994746550319438, "loss": 3.2999963760375977, "step": 2375, "token_acc": 0.26933310283165907 }, { "epoch": 1.39284667253005, "grad_norm": 0.4768469028439963, "learning_rate": 0.000499473083872467, "loss": 3.269237518310547, "step": 2376, "token_acc": 0.2718961361228744 }, { "epoch": 1.3934330108472588, "grad_norm": 0.43807070948026056, "learning_rate": 0.0004994715103695265, "loss": 3.3203930854797363, "step": 2377, "token_acc": 0.2640108122023426 }, { "epoch": 1.394019349164468, "grad_norm": 0.38994892486738564, "learning_rate": 0.0004994699345231375, "loss": 3.292973756790161, "step": 2378, "token_acc": 0.26975914223153824 }, { "epoch": 1.3946056874816768, "grad_norm": 0.4880771768020366, "learning_rate": 0.0004994683563333145, "loss": 3.267620801925659, "step": 2379, "token_acc": 0.27325219680492785 }, { "epoch": 1.395192025798886, "grad_norm": 0.5543662540661931, "learning_rate": 0.0004994667758000726, "loss": 3.333808422088623, "step": 2380, "token_acc": 0.2645320626895587 }, { "epoch": 1.395778364116095, "grad_norm": 0.5592990187397917, "learning_rate": 0.0004994651929234264, "loss": 3.345183849334717, "step": 2381, "token_acc": 0.26065891219643494 }, { "epoch": 1.3963647024333041, "grad_norm": 0.4286386821841761, "learning_rate": 0.0004994636077033909, "loss": 3.270714521408081, "step": 2382, "token_acc": 0.27153173437668904 }, { "epoch": 1.396951040750513, "grad_norm": 0.42763080389096125, "learning_rate": 0.0004994620201399809, "loss": 3.2965826988220215, "step": 2383, "token_acc": 0.2668332589003364 }, { "epoch": 1.3975373790677221, "grad_norm": 0.4663006871980911, "learning_rate": 0.0004994604302332114, "loss": 3.318204402923584, "step": 2384, "token_acc": 0.2645549366981836 }, { "epoch": 1.398123717384931, "grad_norm": 0.44304979692414576, "learning_rate": 0.0004994588379830975, "loss": 3.2938432693481445, "step": 2385, "token_acc": 0.26944825765575503 }, { "epoch": 1.3987100557021401, "grad_norm": 0.4313757948865563, "learning_rate": 0.0004994572433896537, "loss": 3.3045687675476074, "step": 2386, "token_acc": 0.2670202272523475 }, { "epoch": 1.3992963940193492, "grad_norm": 0.45052168335132975, "learning_rate": 0.0004994556464528953, "loss": 3.3109493255615234, "step": 2387, "token_acc": 0.2658245599975959 }, { "epoch": 1.3998827323365581, "grad_norm": 0.4278445495586633, "learning_rate": 0.0004994540471728373, "loss": 3.2941904067993164, "step": 2388, "token_acc": 0.27051228599425864 }, { "epoch": 1.4004690706537672, "grad_norm": 0.36429457213458977, "learning_rate": 0.0004994524455494947, "loss": 3.2693257331848145, "step": 2389, "token_acc": 0.27019142830111326 }, { "epoch": 1.4010554089709761, "grad_norm": 0.4745060657255523, "learning_rate": 0.0004994508415828826, "loss": 3.272376775741577, "step": 2390, "token_acc": 0.2712222062684608 }, { "epoch": 1.4016417472881852, "grad_norm": 0.4275148747201497, "learning_rate": 0.0004994492352730158, "loss": 3.275245189666748, "step": 2391, "token_acc": 0.2693135361804894 }, { "epoch": 1.4022280856053944, "grad_norm": 0.4060235336079872, "learning_rate": 0.0004994476266199097, "loss": 3.303415298461914, "step": 2392, "token_acc": 0.26930292285172247 }, { "epoch": 1.4028144239226035, "grad_norm": 0.4483337946757233, "learning_rate": 0.0004994460156235792, "loss": 3.2669177055358887, "step": 2393, "token_acc": 0.270843876271965 }, { "epoch": 1.4034007622398124, "grad_norm": 0.3906221992316279, "learning_rate": 0.0004994444022840396, "loss": 3.283518075942993, "step": 2394, "token_acc": 0.27247602816011274 }, { "epoch": 1.4039871005570215, "grad_norm": 0.43270813155875, "learning_rate": 0.0004994427866013058, "loss": 3.249089002609253, "step": 2395, "token_acc": 0.2728800588226467 }, { "epoch": 1.4045734388742304, "grad_norm": 0.40722232658592983, "learning_rate": 0.0004994411685753933, "loss": 3.275559186935425, "step": 2396, "token_acc": 0.26993604655789366 }, { "epoch": 1.4051597771914395, "grad_norm": 0.388132141574891, "learning_rate": 0.0004994395482063171, "loss": 3.2582221031188965, "step": 2397, "token_acc": 0.2734531995054263 }, { "epoch": 1.4057461155086486, "grad_norm": 0.3784741615173718, "learning_rate": 0.0004994379254940925, "loss": 3.256376266479492, "step": 2398, "token_acc": 0.27358913016048947 }, { "epoch": 1.4063324538258575, "grad_norm": 0.4498414540179996, "learning_rate": 0.0004994363004387347, "loss": 3.3234100341796875, "step": 2399, "token_acc": 0.2639112560905962 }, { "epoch": 1.4069187921430666, "grad_norm": 0.6080359310461864, "learning_rate": 0.0004994346730402589, "loss": 3.2941641807556152, "step": 2400, "token_acc": 0.26837289785896473 }, { "epoch": 1.4075051304602755, "grad_norm": 0.6877505090956137, "learning_rate": 0.0004994330432986806, "loss": 3.3409247398376465, "step": 2401, "token_acc": 0.26172977221158844 }, { "epoch": 1.4080914687774846, "grad_norm": 0.5597915550915546, "learning_rate": 0.0004994314112140149, "loss": 3.326296806335449, "step": 2402, "token_acc": 0.2638461885679608 }, { "epoch": 1.4086778070946937, "grad_norm": 0.49333852787937604, "learning_rate": 0.0004994297767862772, "loss": 3.2763686180114746, "step": 2403, "token_acc": 0.270786991929702 }, { "epoch": 1.4092641454119026, "grad_norm": 0.5410737702556037, "learning_rate": 0.0004994281400154828, "loss": 3.2548184394836426, "step": 2404, "token_acc": 0.27506715925446734 }, { "epoch": 1.4098504837291117, "grad_norm": 0.3837311574502268, "learning_rate": 0.0004994265009016473, "loss": 3.266517162322998, "step": 2405, "token_acc": 0.26983625660936283 }, { "epoch": 1.4104368220463206, "grad_norm": 0.46608704494965136, "learning_rate": 0.0004994248594447858, "loss": 3.290151357650757, "step": 2406, "token_acc": 0.2697969477159868 }, { "epoch": 1.4110231603635297, "grad_norm": 0.45837358308905085, "learning_rate": 0.0004994232156449139, "loss": 3.3085005283355713, "step": 2407, "token_acc": 0.2664963659349978 }, { "epoch": 1.4116094986807388, "grad_norm": 0.3732815571359981, "learning_rate": 0.000499421569502047, "loss": 3.2698757648468018, "step": 2408, "token_acc": 0.2725692626641258 }, { "epoch": 1.412195836997948, "grad_norm": 0.4438848972434387, "learning_rate": 0.0004994199210162004, "loss": 3.3200173377990723, "step": 2409, "token_acc": 0.26541926364469753 }, { "epoch": 1.4127821753151568, "grad_norm": 0.45497076739662157, "learning_rate": 0.00049941827018739, "loss": 3.285585880279541, "step": 2410, "token_acc": 0.270281418152502 }, { "epoch": 1.413368513632366, "grad_norm": 0.4449436022453608, "learning_rate": 0.0004994166170156309, "loss": 3.235724449157715, "step": 2411, "token_acc": 0.275249774347163 }, { "epoch": 1.4139548519495748, "grad_norm": 0.39202483593487275, "learning_rate": 0.0004994149615009388, "loss": 3.293283462524414, "step": 2412, "token_acc": 0.2682607661194385 }, { "epoch": 1.414541190266784, "grad_norm": 0.3698871196713657, "learning_rate": 0.0004994133036433292, "loss": 3.2842347621917725, "step": 2413, "token_acc": 0.270109723595409 }, { "epoch": 1.415127528583993, "grad_norm": 0.3930784277401731, "learning_rate": 0.0004994116434428178, "loss": 3.243622064590454, "step": 2414, "token_acc": 0.27539536669132547 }, { "epoch": 1.415713866901202, "grad_norm": 0.36735858394106163, "learning_rate": 0.0004994099808994199, "loss": 3.302119255065918, "step": 2415, "token_acc": 0.266067863230932 }, { "epoch": 1.416300205218411, "grad_norm": 0.35977657501291754, "learning_rate": 0.0004994083160131514, "loss": 3.2615537643432617, "step": 2416, "token_acc": 0.2721136938578218 }, { "epoch": 1.41688654353562, "grad_norm": 0.41114291143599424, "learning_rate": 0.000499406648784028, "loss": 3.2673168182373047, "step": 2417, "token_acc": 0.2721787332571744 }, { "epoch": 1.417472881852829, "grad_norm": 0.45010157542119594, "learning_rate": 0.0004994049792120651, "loss": 3.278304100036621, "step": 2418, "token_acc": 0.2707497179857113 }, { "epoch": 1.4180592201700382, "grad_norm": 0.44480154864729593, "learning_rate": 0.0004994033072972785, "loss": 3.309415340423584, "step": 2419, "token_acc": 0.2653199143290844 }, { "epoch": 1.4186455584872473, "grad_norm": 0.39311308357056846, "learning_rate": 0.0004994016330396838, "loss": 3.301295280456543, "step": 2420, "token_acc": 0.26797842748262124 }, { "epoch": 1.4192318968044562, "grad_norm": 0.42357246587152864, "learning_rate": 0.0004993999564392969, "loss": 3.302398204803467, "step": 2421, "token_acc": 0.2656842115760501 }, { "epoch": 1.4198182351216653, "grad_norm": 0.4386433829983227, "learning_rate": 0.0004993982774961336, "loss": 3.261169672012329, "step": 2422, "token_acc": 0.2721297522224324 }, { "epoch": 1.4204045734388742, "grad_norm": 0.442379950669629, "learning_rate": 0.0004993965962102094, "loss": 3.263920307159424, "step": 2423, "token_acc": 0.2712250122837415 }, { "epoch": 1.4209909117560833, "grad_norm": 0.43185003691438056, "learning_rate": 0.0004993949125815404, "loss": 3.2329049110412598, "step": 2424, "token_acc": 0.27541785401840757 }, { "epoch": 1.4215772500732924, "grad_norm": 0.42617143291573495, "learning_rate": 0.0004993932266101421, "loss": 3.242340564727783, "step": 2425, "token_acc": 0.27574158133642984 }, { "epoch": 1.4221635883905013, "grad_norm": 0.41735024874572524, "learning_rate": 0.0004993915382960305, "loss": 3.2836499214172363, "step": 2426, "token_acc": 0.27084416865438765 }, { "epoch": 1.4227499267077104, "grad_norm": 0.4658328761863938, "learning_rate": 0.0004993898476392215, "loss": 3.2996325492858887, "step": 2427, "token_acc": 0.2684273916485308 }, { "epoch": 1.4233362650249193, "grad_norm": 0.4735765097561413, "learning_rate": 0.0004993881546397311, "loss": 3.339658737182617, "step": 2428, "token_acc": 0.2627487185696412 }, { "epoch": 1.4239226033421284, "grad_norm": 0.5020945496923419, "learning_rate": 0.0004993864592975748, "loss": 3.262714385986328, "step": 2429, "token_acc": 0.27243149127480154 }, { "epoch": 1.4245089416593375, "grad_norm": 0.47196321610361586, "learning_rate": 0.0004993847616127689, "loss": 3.2826731204986572, "step": 2430, "token_acc": 0.2680442607652313 }, { "epoch": 1.4250952799765464, "grad_norm": 0.4224639972912335, "learning_rate": 0.0004993830615853292, "loss": 3.251896858215332, "step": 2431, "token_acc": 0.27464382980991564 }, { "epoch": 1.4256816182937555, "grad_norm": 0.4995368108113014, "learning_rate": 0.0004993813592152716, "loss": 3.245842456817627, "step": 2432, "token_acc": 0.27499923898815865 }, { "epoch": 1.4262679566109644, "grad_norm": 0.5317309844684932, "learning_rate": 0.0004993796545026123, "loss": 3.318612813949585, "step": 2433, "token_acc": 0.26326512799411467 }, { "epoch": 1.4268542949281735, "grad_norm": 0.48420607871156013, "learning_rate": 0.000499377947447367, "loss": 3.2774157524108887, "step": 2434, "token_acc": 0.27021338805227096 }, { "epoch": 1.4274406332453826, "grad_norm": 0.3958515984187191, "learning_rate": 0.0004993762380495521, "loss": 3.2691493034362793, "step": 2435, "token_acc": 0.273173824313031 }, { "epoch": 1.4280269715625917, "grad_norm": 0.45421733969826983, "learning_rate": 0.0004993745263091835, "loss": 3.280101776123047, "step": 2436, "token_acc": 0.2707349903871836 }, { "epoch": 1.4286133098798006, "grad_norm": 0.3955083766544465, "learning_rate": 0.0004993728122262772, "loss": 3.2648422718048096, "step": 2437, "token_acc": 0.2720867881435877 }, { "epoch": 1.4291996481970097, "grad_norm": 0.4069911842909755, "learning_rate": 0.0004993710958008494, "loss": 3.2858872413635254, "step": 2438, "token_acc": 0.2692404690849481 }, { "epoch": 1.4297859865142186, "grad_norm": 0.371692531984093, "learning_rate": 0.0004993693770329161, "loss": 3.2725448608398438, "step": 2439, "token_acc": 0.27045549738219893 }, { "epoch": 1.4303723248314277, "grad_norm": 0.34977394631058667, "learning_rate": 0.0004993676559224935, "loss": 3.2855043411254883, "step": 2440, "token_acc": 0.26936375100875365 }, { "epoch": 1.4309586631486368, "grad_norm": 0.3791605862301265, "learning_rate": 0.0004993659324695979, "loss": 3.2726244926452637, "step": 2441, "token_acc": 0.2693274852259728 }, { "epoch": 1.4315450014658457, "grad_norm": 0.44948477554412364, "learning_rate": 0.0004993642066742454, "loss": 3.2436182498931885, "step": 2442, "token_acc": 0.27496982712948104 }, { "epoch": 1.4321313397830548, "grad_norm": 0.49342786573180974, "learning_rate": 0.0004993624785364523, "loss": 3.269402503967285, "step": 2443, "token_acc": 0.2718052738336714 }, { "epoch": 1.4327176781002637, "grad_norm": 0.5020917570218351, "learning_rate": 0.0004993607480562346, "loss": 3.2819466590881348, "step": 2444, "token_acc": 0.26872154591594055 }, { "epoch": 1.4333040164174728, "grad_norm": 0.46251155185863657, "learning_rate": 0.0004993590152336086, "loss": 3.267725944519043, "step": 2445, "token_acc": 0.27092517382332454 }, { "epoch": 1.433890354734682, "grad_norm": 0.40637519977868697, "learning_rate": 0.0004993572800685908, "loss": 3.2750463485717773, "step": 2446, "token_acc": 0.2708327037182548 }, { "epoch": 1.434476693051891, "grad_norm": 0.3349490013743137, "learning_rate": 0.0004993555425611973, "loss": 3.29209041595459, "step": 2447, "token_acc": 0.269240022384586 }, { "epoch": 1.4350630313691, "grad_norm": 0.3914006868213982, "learning_rate": 0.0004993538027114445, "loss": 3.2564802169799805, "step": 2448, "token_acc": 0.2723435324659011 }, { "epoch": 1.435649369686309, "grad_norm": 0.3795655466018845, "learning_rate": 0.0004993520605193488, "loss": 3.299866199493408, "step": 2449, "token_acc": 0.26683006665976994 }, { "epoch": 1.436235708003518, "grad_norm": 0.39320495729549493, "learning_rate": 0.0004993503159849265, "loss": 3.2362561225891113, "step": 2450, "token_acc": 0.277570010658319 }, { "epoch": 1.436822046320727, "grad_norm": 0.3855139263559048, "learning_rate": 0.0004993485691081938, "loss": 3.2747299671173096, "step": 2451, "token_acc": 0.2712236518508354 }, { "epoch": 1.4374083846379362, "grad_norm": 0.3171450553692962, "learning_rate": 0.0004993468198891674, "loss": 3.2112998962402344, "step": 2452, "token_acc": 0.2782831704248524 }, { "epoch": 1.437994722955145, "grad_norm": 0.343709236642012, "learning_rate": 0.0004993450683278638, "loss": 3.2441320419311523, "step": 2453, "token_acc": 0.275946520190088 }, { "epoch": 1.4385810612723542, "grad_norm": 0.4122018025254066, "learning_rate": 0.0004993433144242991, "loss": 3.3050241470336914, "step": 2454, "token_acc": 0.2652505405302887 }, { "epoch": 1.439167399589563, "grad_norm": 0.4482920361446149, "learning_rate": 0.0004993415581784899, "loss": 3.2310972213745117, "step": 2455, "token_acc": 0.278045961471679 }, { "epoch": 1.4397537379067722, "grad_norm": 0.42084961180158714, "learning_rate": 0.0004993397995904529, "loss": 3.282933473587036, "step": 2456, "token_acc": 0.268900026237022 }, { "epoch": 1.4403400762239813, "grad_norm": 0.4193544433482455, "learning_rate": 0.0004993380386602044, "loss": 3.254255533218384, "step": 2457, "token_acc": 0.27231084229109864 }, { "epoch": 1.4409264145411902, "grad_norm": 0.42854566305269237, "learning_rate": 0.0004993362753877611, "loss": 3.3185832500457764, "step": 2458, "token_acc": 0.2659728533633944 }, { "epoch": 1.4415127528583993, "grad_norm": 0.46673963600122176, "learning_rate": 0.0004993345097731393, "loss": 3.273925542831421, "step": 2459, "token_acc": 0.26833342992564074 }, { "epoch": 1.4420990911756082, "grad_norm": 0.5317865469910968, "learning_rate": 0.0004993327418163559, "loss": 3.2802789211273193, "step": 2460, "token_acc": 0.26909991724496035 }, { "epoch": 1.4426854294928173, "grad_norm": 0.4691112477627041, "learning_rate": 0.0004993309715174274, "loss": 3.2661590576171875, "step": 2461, "token_acc": 0.2712857671484171 }, { "epoch": 1.4432717678100264, "grad_norm": 0.3974951175482918, "learning_rate": 0.0004993291988763703, "loss": 3.2456650733947754, "step": 2462, "token_acc": 0.2727835286441477 }, { "epoch": 1.4438581061272355, "grad_norm": 0.40054505264099066, "learning_rate": 0.0004993274238932014, "loss": 3.2457351684570312, "step": 2463, "token_acc": 0.27604568545297015 }, { "epoch": 1.4444444444444444, "grad_norm": 0.47178570681883564, "learning_rate": 0.0004993256465679373, "loss": 3.2745256423950195, "step": 2464, "token_acc": 0.2701804417444937 }, { "epoch": 1.4450307827616535, "grad_norm": 0.5517408575569929, "learning_rate": 0.0004993238669005947, "loss": 3.3075637817382812, "step": 2465, "token_acc": 0.26637777147318975 }, { "epoch": 1.4456171210788624, "grad_norm": 0.44406072492792575, "learning_rate": 0.0004993220848911904, "loss": 3.31062912940979, "step": 2466, "token_acc": 0.2655964311183318 }, { "epoch": 1.4462034593960715, "grad_norm": 0.4131787890915645, "learning_rate": 0.000499320300539741, "loss": 3.2557196617126465, "step": 2467, "token_acc": 0.2733014187750637 }, { "epoch": 1.4467897977132806, "grad_norm": 0.5057645729067284, "learning_rate": 0.0004993185138462634, "loss": 3.273193120956421, "step": 2468, "token_acc": 0.27069373145076525 }, { "epoch": 1.4473761360304895, "grad_norm": 0.37248904123597704, "learning_rate": 0.0004993167248107744, "loss": 3.324291706085205, "step": 2469, "token_acc": 0.26502664699706285 }, { "epoch": 1.4479624743476986, "grad_norm": 0.4182448605169437, "learning_rate": 0.0004993149334332906, "loss": 3.2704477310180664, "step": 2470, "token_acc": 0.26992079544226405 }, { "epoch": 1.4485488126649075, "grad_norm": 0.41411552948298735, "learning_rate": 0.000499313139713829, "loss": 3.242617130279541, "step": 2471, "token_acc": 0.27521945603683984 }, { "epoch": 1.4491351509821166, "grad_norm": 0.42963211641251975, "learning_rate": 0.0004993113436524063, "loss": 3.315239906311035, "step": 2472, "token_acc": 0.26580921480899783 }, { "epoch": 1.4497214892993258, "grad_norm": 0.4286416688189302, "learning_rate": 0.0004993095452490397, "loss": 3.2252345085144043, "step": 2473, "token_acc": 0.2757206792505042 }, { "epoch": 1.4503078276165349, "grad_norm": 0.47164884619082065, "learning_rate": 0.0004993077445037457, "loss": 3.2793056964874268, "step": 2474, "token_acc": 0.26860372268262617 }, { "epoch": 1.4508941659337438, "grad_norm": 0.5175823965836549, "learning_rate": 0.0004993059414165415, "loss": 3.3221418857574463, "step": 2475, "token_acc": 0.2621854570975704 }, { "epoch": 1.4514805042509529, "grad_norm": 0.5342560600401027, "learning_rate": 0.0004993041359874439, "loss": 3.337784767150879, "step": 2476, "token_acc": 0.25989950375756127 }, { "epoch": 1.4520668425681618, "grad_norm": 0.458328714064463, "learning_rate": 0.0004993023282164698, "loss": 3.2408292293548584, "step": 2477, "token_acc": 0.2749948885708444 }, { "epoch": 1.4526531808853709, "grad_norm": 0.39698258685234666, "learning_rate": 0.0004993005181036363, "loss": 3.320158004760742, "step": 2478, "token_acc": 0.2643404055094533 }, { "epoch": 1.45323951920258, "grad_norm": 0.3765396597582679, "learning_rate": 0.0004992987056489604, "loss": 3.238387107849121, "step": 2479, "token_acc": 0.2754056384956155 }, { "epoch": 1.4538258575197889, "grad_norm": 0.42077219527452947, "learning_rate": 0.0004992968908524591, "loss": 3.274458169937134, "step": 2480, "token_acc": 0.2701064778888132 }, { "epoch": 1.454412195836998, "grad_norm": 0.41389339023504246, "learning_rate": 0.0004992950737141494, "loss": 3.2594246864318848, "step": 2481, "token_acc": 0.27272703628471184 }, { "epoch": 1.4549985341542069, "grad_norm": 0.3405634738360139, "learning_rate": 0.0004992932542340485, "loss": 3.2280430793762207, "step": 2482, "token_acc": 0.2774763759120692 }, { "epoch": 1.455584872471416, "grad_norm": 0.4297230054259209, "learning_rate": 0.0004992914324121732, "loss": 3.2634799480438232, "step": 2483, "token_acc": 0.27272418788582764 }, { "epoch": 1.456171210788625, "grad_norm": 0.4059970885983026, "learning_rate": 0.000499289608248541, "loss": 3.259629726409912, "step": 2484, "token_acc": 0.27341594141371256 }, { "epoch": 1.456757549105834, "grad_norm": 0.4114114119208386, "learning_rate": 0.0004992877817431688, "loss": 3.2147793769836426, "step": 2485, "token_acc": 0.2775868208772663 }, { "epoch": 1.457343887423043, "grad_norm": 0.3621918317386891, "learning_rate": 0.0004992859528960738, "loss": 3.2161388397216797, "step": 2486, "token_acc": 0.27861182671522905 }, { "epoch": 1.457930225740252, "grad_norm": 0.33401741015434977, "learning_rate": 0.0004992841217072733, "loss": 3.24991512298584, "step": 2487, "token_acc": 0.2740352855054979 }, { "epoch": 1.458516564057461, "grad_norm": 0.34819618250013823, "learning_rate": 0.0004992822881767843, "loss": 3.252859354019165, "step": 2488, "token_acc": 0.27454571706649267 }, { "epoch": 1.4591029023746702, "grad_norm": 0.3434519470462143, "learning_rate": 0.0004992804523046242, "loss": 3.252744436264038, "step": 2489, "token_acc": 0.27188809202584674 }, { "epoch": 1.4596892406918793, "grad_norm": 0.3676748689804412, "learning_rate": 0.00049927861409081, "loss": 3.218554973602295, "step": 2490, "token_acc": 0.27627457235345493 }, { "epoch": 1.4602755790090882, "grad_norm": 0.4093801308459334, "learning_rate": 0.0004992767735353591, "loss": 3.230775833129883, "step": 2491, "token_acc": 0.2751333000295656 }, { "epoch": 1.4608619173262973, "grad_norm": 0.47916584493949715, "learning_rate": 0.0004992749306382889, "loss": 3.272024154663086, "step": 2492, "token_acc": 0.2706898827036384 }, { "epoch": 1.4614482556435062, "grad_norm": 0.49056228612230235, "learning_rate": 0.0004992730853996168, "loss": 3.292412519454956, "step": 2493, "token_acc": 0.2688209593017831 }, { "epoch": 1.4620345939607153, "grad_norm": 0.4262599691382597, "learning_rate": 0.0004992712378193598, "loss": 3.2419066429138184, "step": 2494, "token_acc": 0.27397899465375974 }, { "epoch": 1.4626209322779244, "grad_norm": 0.37095447757174566, "learning_rate": 0.0004992693878975354, "loss": 3.273664951324463, "step": 2495, "token_acc": 0.27020208628838516 }, { "epoch": 1.4632072705951333, "grad_norm": 0.38679784066929224, "learning_rate": 0.000499267535634161, "loss": 3.290173292160034, "step": 2496, "token_acc": 0.26697974727456775 }, { "epoch": 1.4637936089123424, "grad_norm": 0.3810999761910147, "learning_rate": 0.000499265681029254, "loss": 3.292463541030884, "step": 2497, "token_acc": 0.2668369650759247 }, { "epoch": 1.4643799472295513, "grad_norm": 0.36770983731819634, "learning_rate": 0.0004992638240828319, "loss": 3.300384521484375, "step": 2498, "token_acc": 0.26715834547625683 }, { "epoch": 1.4649662855467604, "grad_norm": 0.32980598892475077, "learning_rate": 0.0004992619647949119, "loss": 3.2862884998321533, "step": 2499, "token_acc": 0.26934831671995485 }, { "epoch": 1.4655526238639696, "grad_norm": 0.39278428036680624, "learning_rate": 0.0004992601031655117, "loss": 3.3008973598480225, "step": 2500, "token_acc": 0.2664430445194996 }, { "epoch": 1.4661389621811787, "grad_norm": 0.44455873849917044, "learning_rate": 0.0004992582391946488, "loss": 3.2516372203826904, "step": 2501, "token_acc": 0.2744417779632721 }, { "epoch": 1.4667253004983876, "grad_norm": 0.427252733739739, "learning_rate": 0.0004992563728823406, "loss": 3.2190747261047363, "step": 2502, "token_acc": 0.2800599233550749 }, { "epoch": 1.4673116388155967, "grad_norm": 0.4090301357351065, "learning_rate": 0.0004992545042286046, "loss": 3.302269220352173, "step": 2503, "token_acc": 0.2660768422355733 }, { "epoch": 1.4678979771328056, "grad_norm": 0.39886847311459095, "learning_rate": 0.0004992526332334583, "loss": 3.252699613571167, "step": 2504, "token_acc": 0.271145524819142 }, { "epoch": 1.4684843154500147, "grad_norm": 0.4078953172404111, "learning_rate": 0.0004992507598969196, "loss": 3.2592248916625977, "step": 2505, "token_acc": 0.2716038202090196 }, { "epoch": 1.4690706537672238, "grad_norm": 0.43734613408854095, "learning_rate": 0.0004992488842190057, "loss": 3.270545721054077, "step": 2506, "token_acc": 0.27089965351989276 }, { "epoch": 1.4696569920844327, "grad_norm": 0.48848822685530585, "learning_rate": 0.0004992470061997345, "loss": 3.2691125869750977, "step": 2507, "token_acc": 0.27078320308340964 }, { "epoch": 1.4702433304016418, "grad_norm": 0.5420366381466455, "learning_rate": 0.0004992451258391236, "loss": 3.287473678588867, "step": 2508, "token_acc": 0.26925375976639987 }, { "epoch": 1.4708296687188507, "grad_norm": 0.49461804919858526, "learning_rate": 0.0004992432431371905, "loss": 3.262782096862793, "step": 2509, "token_acc": 0.27267549764820787 }, { "epoch": 1.4714160070360598, "grad_norm": 0.4094541211173672, "learning_rate": 0.000499241358093953, "loss": 3.2442383766174316, "step": 2510, "token_acc": 0.27638559380009864 }, { "epoch": 1.472002345353269, "grad_norm": 0.3658091359430242, "learning_rate": 0.0004992394707094289, "loss": 3.259352684020996, "step": 2511, "token_acc": 0.27263883730155725 }, { "epoch": 1.4725886836704778, "grad_norm": 0.4121571918774716, "learning_rate": 0.0004992375809836357, "loss": 3.3123250007629395, "step": 2512, "token_acc": 0.2660374538013083 }, { "epoch": 1.473175021987687, "grad_norm": 0.4282535189365276, "learning_rate": 0.0004992356889165913, "loss": 3.2643938064575195, "step": 2513, "token_acc": 0.2737601321002591 }, { "epoch": 1.4737613603048958, "grad_norm": 0.4411158385694457, "learning_rate": 0.0004992337945083134, "loss": 3.2541675567626953, "step": 2514, "token_acc": 0.2731821033310357 }, { "epoch": 1.474347698622105, "grad_norm": 0.39037770660671206, "learning_rate": 0.0004992318977588199, "loss": 3.266140937805176, "step": 2515, "token_acc": 0.2711929502069963 }, { "epoch": 1.474934036939314, "grad_norm": 0.36433353861365153, "learning_rate": 0.0004992299986681287, "loss": 3.271409034729004, "step": 2516, "token_acc": 0.2706185635718494 }, { "epoch": 1.4755203752565231, "grad_norm": 0.34204519996515037, "learning_rate": 0.0004992280972362573, "loss": 3.2876739501953125, "step": 2517, "token_acc": 0.26861219999690117 }, { "epoch": 1.476106713573732, "grad_norm": 0.3998745624048512, "learning_rate": 0.0004992261934632239, "loss": 3.2723703384399414, "step": 2518, "token_acc": 0.2713411164971211 }, { "epoch": 1.4766930518909411, "grad_norm": 0.3720749801473729, "learning_rate": 0.0004992242873490462, "loss": 3.216571092605591, "step": 2519, "token_acc": 0.27758389054465155 }, { "epoch": 1.47727939020815, "grad_norm": 0.4728686180618035, "learning_rate": 0.0004992223788937421, "loss": 3.281606674194336, "step": 2520, "token_acc": 0.26887030001885287 }, { "epoch": 1.4778657285253591, "grad_norm": 0.5673286721023328, "learning_rate": 0.0004992204680973297, "loss": 3.2853360176086426, "step": 2521, "token_acc": 0.26759280641620764 }, { "epoch": 1.4784520668425682, "grad_norm": 0.4467303957471683, "learning_rate": 0.0004992185549598267, "loss": 3.2037525177001953, "step": 2522, "token_acc": 0.2789331040054205 }, { "epoch": 1.4790384051597771, "grad_norm": 0.4201350550856369, "learning_rate": 0.0004992166394812513, "loss": 3.320878505706787, "step": 2523, "token_acc": 0.26204837693495775 }, { "epoch": 1.4796247434769862, "grad_norm": 0.37489777082272246, "learning_rate": 0.0004992147216616214, "loss": 3.279184341430664, "step": 2524, "token_acc": 0.2688679245283019 }, { "epoch": 1.4802110817941951, "grad_norm": 0.3445151654199398, "learning_rate": 0.000499212801500955, "loss": 3.2414145469665527, "step": 2525, "token_acc": 0.2742514907526343 }, { "epoch": 1.4807974201114043, "grad_norm": 0.4057399445518621, "learning_rate": 0.0004992108789992701, "loss": 3.288478136062622, "step": 2526, "token_acc": 0.2704312093789201 }, { "epoch": 1.4813837584286134, "grad_norm": 0.46686801053346844, "learning_rate": 0.0004992089541565848, "loss": 3.2893803119659424, "step": 2527, "token_acc": 0.2675993793804505 }, { "epoch": 1.4819700967458225, "grad_norm": 0.4408039850018635, "learning_rate": 0.0004992070269729173, "loss": 3.2801411151885986, "step": 2528, "token_acc": 0.2698625561978163 }, { "epoch": 1.4825564350630314, "grad_norm": 0.431038949250353, "learning_rate": 0.0004992050974482855, "loss": 3.286717176437378, "step": 2529, "token_acc": 0.26773762792559697 }, { "epoch": 1.4831427733802405, "grad_norm": 0.3576258072283714, "learning_rate": 0.0004992031655827076, "loss": 3.2499890327453613, "step": 2530, "token_acc": 0.2736560310486608 }, { "epoch": 1.4837291116974494, "grad_norm": 0.4057627696885976, "learning_rate": 0.0004992012313762017, "loss": 3.2701375484466553, "step": 2531, "token_acc": 0.2716592678376118 }, { "epoch": 1.4843154500146585, "grad_norm": 0.39603020271321543, "learning_rate": 0.0004991992948287863, "loss": 3.2669310569763184, "step": 2532, "token_acc": 0.2701963593784906 }, { "epoch": 1.4849017883318676, "grad_norm": 0.38895173695480456, "learning_rate": 0.0004991973559404791, "loss": 3.25887393951416, "step": 2533, "token_acc": 0.27116314402802866 }, { "epoch": 1.4854881266490765, "grad_norm": 0.43479576544405085, "learning_rate": 0.0004991954147112986, "loss": 3.2750566005706787, "step": 2534, "token_acc": 0.2705086670277928 }, { "epoch": 1.4860744649662856, "grad_norm": 0.38123934526684516, "learning_rate": 0.0004991934711412629, "loss": 3.253174304962158, "step": 2535, "token_acc": 0.27202693987218346 }, { "epoch": 1.4866608032834945, "grad_norm": 0.371384013724577, "learning_rate": 0.0004991915252303905, "loss": 3.285108804702759, "step": 2536, "token_acc": 0.268993812393922 }, { "epoch": 1.4872471416007036, "grad_norm": 0.36054233736719843, "learning_rate": 0.0004991895769786993, "loss": 3.26125431060791, "step": 2537, "token_acc": 0.27457485201299425 }, { "epoch": 1.4878334799179127, "grad_norm": 0.3459758782275917, "learning_rate": 0.000499187626386208, "loss": 3.2527108192443848, "step": 2538, "token_acc": 0.27326024872370946 }, { "epoch": 1.4884198182351216, "grad_norm": 0.38426839468638996, "learning_rate": 0.0004991856734529347, "loss": 3.2523908615112305, "step": 2539, "token_acc": 0.27540276577580874 }, { "epoch": 1.4890061565523307, "grad_norm": 0.36496578914511524, "learning_rate": 0.0004991837181788977, "loss": 3.3056159019470215, "step": 2540, "token_acc": 0.2653258627926925 }, { "epoch": 1.4895924948695396, "grad_norm": 0.35812148403270494, "learning_rate": 0.0004991817605641155, "loss": 3.2906641960144043, "step": 2541, "token_acc": 0.268473345334239 }, { "epoch": 1.4901788331867487, "grad_norm": 0.393978117896672, "learning_rate": 0.0004991798006086063, "loss": 3.243887424468994, "step": 2542, "token_acc": 0.27553603882801586 }, { "epoch": 1.4907651715039578, "grad_norm": 0.3454127374929976, "learning_rate": 0.0004991778383123889, "loss": 3.2879605293273926, "step": 2543, "token_acc": 0.2684119714778691 }, { "epoch": 1.491351509821167, "grad_norm": 0.34927344356638507, "learning_rate": 0.0004991758736754814, "loss": 3.2568609714508057, "step": 2544, "token_acc": 0.2726638246009693 }, { "epoch": 1.4919378481383758, "grad_norm": 0.30426900052763595, "learning_rate": 0.0004991739066979022, "loss": 3.2414450645446777, "step": 2545, "token_acc": 0.2743923916872138 }, { "epoch": 1.492524186455585, "grad_norm": 0.367144824183199, "learning_rate": 0.00049917193737967, "loss": 3.2983627319335938, "step": 2546, "token_acc": 0.2667659673743338 }, { "epoch": 1.4931105247727938, "grad_norm": 0.4167837345012218, "learning_rate": 0.0004991699657208032, "loss": 3.2750966548919678, "step": 2547, "token_acc": 0.2689232817083218 }, { "epoch": 1.493696863090003, "grad_norm": 0.38857331549122276, "learning_rate": 0.0004991679917213203, "loss": 3.2825686931610107, "step": 2548, "token_acc": 0.268260187053966 }, { "epoch": 1.494283201407212, "grad_norm": 0.44692810512357894, "learning_rate": 0.00049916601538124, "loss": 3.2607336044311523, "step": 2549, "token_acc": 0.2724953679370995 }, { "epoch": 1.494869539724421, "grad_norm": 0.5810174130057221, "learning_rate": 0.0004991640367005806, "loss": 3.252685308456421, "step": 2550, "token_acc": 0.26933843367237076 }, { "epoch": 1.49545587804163, "grad_norm": 0.6489466846847916, "learning_rate": 0.0004991620556793609, "loss": 3.2388062477111816, "step": 2551, "token_acc": 0.2745364469032945 }, { "epoch": 1.496042216358839, "grad_norm": 0.5430146023631852, "learning_rate": 0.0004991600723175994, "loss": 3.3004467487335205, "step": 2552, "token_acc": 0.26614094360679574 }, { "epoch": 1.496628554676048, "grad_norm": 0.4988531391079722, "learning_rate": 0.0004991580866153148, "loss": 3.2348427772521973, "step": 2553, "token_acc": 0.27469680556923287 }, { "epoch": 1.4972148929932572, "grad_norm": 0.5388112096588197, "learning_rate": 0.0004991560985725257, "loss": 3.233935832977295, "step": 2554, "token_acc": 0.27664990529141625 }, { "epoch": 1.4978012313104663, "grad_norm": 0.4819893143576697, "learning_rate": 0.0004991541081892508, "loss": 3.2734124660491943, "step": 2555, "token_acc": 0.2714117184427841 }, { "epoch": 1.4983875696276752, "grad_norm": 0.4377724039456534, "learning_rate": 0.0004991521154655088, "loss": 3.2604904174804688, "step": 2556, "token_acc": 0.272985248498017 }, { "epoch": 1.4989739079448843, "grad_norm": 0.4873880855879326, "learning_rate": 0.0004991501204013184, "loss": 3.314098596572876, "step": 2557, "token_acc": 0.265938466946088 }, { "epoch": 1.4995602462620932, "grad_norm": 0.39701673753830447, "learning_rate": 0.0004991481229966985, "loss": 3.2702064514160156, "step": 2558, "token_acc": 0.27099487517905446 }, { "epoch": 1.5001465845793023, "grad_norm": 0.374282002245181, "learning_rate": 0.0004991461232516675, "loss": 3.2452337741851807, "step": 2559, "token_acc": 0.2735383869460141 }, { "epoch": 1.5007329228965114, "grad_norm": 0.34440348717032154, "learning_rate": 0.0004991441211662444, "loss": 3.24863862991333, "step": 2560, "token_acc": 0.27339911646917964 }, { "epoch": 1.5013192612137203, "grad_norm": 0.3448228171012622, "learning_rate": 0.0004991421167404482, "loss": 3.243621587753296, "step": 2561, "token_acc": 0.27413607873412116 }, { "epoch": 1.5019055995309294, "grad_norm": 0.3850168564546283, "learning_rate": 0.0004991401099742974, "loss": 3.319021224975586, "step": 2562, "token_acc": 0.26408151786817213 }, { "epoch": 1.5024919378481383, "grad_norm": 0.3385927206564174, "learning_rate": 0.000499138100867811, "loss": 3.2335896492004395, "step": 2563, "token_acc": 0.27524289628341597 }, { "epoch": 1.5030782761653474, "grad_norm": 0.362794282544457, "learning_rate": 0.0004991360894210079, "loss": 3.262423515319824, "step": 2564, "token_acc": 0.27464957787621574 }, { "epoch": 1.5036646144825565, "grad_norm": 0.4273206532090922, "learning_rate": 0.0004991340756339069, "loss": 3.2533841133117676, "step": 2565, "token_acc": 0.2719156340067206 }, { "epoch": 1.5042509527997656, "grad_norm": 0.4682393230211044, "learning_rate": 0.000499132059506527, "loss": 3.2540810108184814, "step": 2566, "token_acc": 0.27131067377240375 }, { "epoch": 1.5048372911169745, "grad_norm": 0.3674835501977162, "learning_rate": 0.0004991300410388871, "loss": 3.2532877922058105, "step": 2567, "token_acc": 0.2734548079228056 }, { "epoch": 1.5054236294341834, "grad_norm": 0.36979177350080983, "learning_rate": 0.0004991280202310062, "loss": 3.263533592224121, "step": 2568, "token_acc": 0.27035336847680785 }, { "epoch": 1.5060099677513925, "grad_norm": 0.3643822037132622, "learning_rate": 0.0004991259970829032, "loss": 3.2781331539154053, "step": 2569, "token_acc": 0.26862745098039215 }, { "epoch": 1.5065963060686016, "grad_norm": 0.4476152904867317, "learning_rate": 0.0004991239715945972, "loss": 3.23483943939209, "step": 2570, "token_acc": 0.2737736123824582 }, { "epoch": 1.5071826443858107, "grad_norm": 0.5344595048723654, "learning_rate": 0.0004991219437661072, "loss": 3.2789430618286133, "step": 2571, "token_acc": 0.26990771932746394 }, { "epoch": 1.5077689827030196, "grad_norm": 0.4592028198727494, "learning_rate": 0.0004991199135974522, "loss": 3.2721993923187256, "step": 2572, "token_acc": 0.27100821431785166 }, { "epoch": 1.5083553210202285, "grad_norm": 0.37682639727390393, "learning_rate": 0.0004991178810886514, "loss": 3.248673439025879, "step": 2573, "token_acc": 0.27404235118766573 }, { "epoch": 1.5089416593374376, "grad_norm": 0.40615964776211, "learning_rate": 0.0004991158462397236, "loss": 3.289583683013916, "step": 2574, "token_acc": 0.268455007796814 }, { "epoch": 1.5095279976546467, "grad_norm": 0.3691587872426139, "learning_rate": 0.0004991138090506882, "loss": 3.28464937210083, "step": 2575, "token_acc": 0.2700779782571575 }, { "epoch": 1.5101143359718558, "grad_norm": 0.35995416533838553, "learning_rate": 0.0004991117695215643, "loss": 3.2437875270843506, "step": 2576, "token_acc": 0.2777024414138029 }, { "epoch": 1.5107006742890647, "grad_norm": 0.4020323682739989, "learning_rate": 0.000499109727652371, "loss": 3.2557735443115234, "step": 2577, "token_acc": 0.27351970942303905 }, { "epoch": 1.5112870126062738, "grad_norm": 0.40012308842668476, "learning_rate": 0.0004991076834431275, "loss": 3.312852382659912, "step": 2578, "token_acc": 0.2641404185260604 }, { "epoch": 1.5118733509234827, "grad_norm": 0.38016669868628233, "learning_rate": 0.000499105636893853, "loss": 3.2731266021728516, "step": 2579, "token_acc": 0.27011038843863516 }, { "epoch": 1.5124596892406919, "grad_norm": 0.3536723847367996, "learning_rate": 0.0004991035880045667, "loss": 3.257784366607666, "step": 2580, "token_acc": 0.27153840979764166 }, { "epoch": 1.513046027557901, "grad_norm": 0.34200990305898926, "learning_rate": 0.0004991015367752878, "loss": 3.3083670139312744, "step": 2581, "token_acc": 0.2667002219467596 }, { "epoch": 1.51363236587511, "grad_norm": 0.4032844574981755, "learning_rate": 0.0004990994832060356, "loss": 3.3118104934692383, "step": 2582, "token_acc": 0.2641626375675668 }, { "epoch": 1.514218704192319, "grad_norm": 0.4728202127626407, "learning_rate": 0.0004990974272968295, "loss": 3.2963128089904785, "step": 2583, "token_acc": 0.26776409346843894 }, { "epoch": 1.5148050425095279, "grad_norm": 0.45323267559923375, "learning_rate": 0.0004990953690476887, "loss": 3.2584738731384277, "step": 2584, "token_acc": 0.2733442967647285 }, { "epoch": 1.515391380826737, "grad_norm": 0.41946261004931057, "learning_rate": 0.0004990933084586327, "loss": 3.2761378288269043, "step": 2585, "token_acc": 0.27130436160185034 }, { "epoch": 1.515977719143946, "grad_norm": 0.475321890835984, "learning_rate": 0.0004990912455296806, "loss": 3.2937917709350586, "step": 2586, "token_acc": 0.26510702183784696 }, { "epoch": 1.5165640574611552, "grad_norm": 0.41653501663177966, "learning_rate": 0.0004990891802608519, "loss": 3.2514376640319824, "step": 2587, "token_acc": 0.2732557036153398 }, { "epoch": 1.517150395778364, "grad_norm": 0.40933631326340525, "learning_rate": 0.000499087112652166, "loss": 3.2598648071289062, "step": 2588, "token_acc": 0.2699195041631175 }, { "epoch": 1.5177367340955732, "grad_norm": 0.43205113531703115, "learning_rate": 0.0004990850427036424, "loss": 3.259249448776245, "step": 2589, "token_acc": 0.2716156976668106 }, { "epoch": 1.518323072412782, "grad_norm": 0.34824819515019817, "learning_rate": 0.0004990829704153004, "loss": 3.2567336559295654, "step": 2590, "token_acc": 0.273110723354454 }, { "epoch": 1.5189094107299912, "grad_norm": 0.3757132671888345, "learning_rate": 0.0004990808957871596, "loss": 3.251190662384033, "step": 2591, "token_acc": 0.27156952068289575 }, { "epoch": 1.5194957490472003, "grad_norm": 0.3800848425365529, "learning_rate": 0.0004990788188192393, "loss": 3.2771501541137695, "step": 2592, "token_acc": 0.2690651998853508 }, { "epoch": 1.5200820873644094, "grad_norm": 0.40526590172302934, "learning_rate": 0.0004990767395115593, "loss": 3.2796850204467773, "step": 2593, "token_acc": 0.269747386489365 }, { "epoch": 1.5206684256816183, "grad_norm": 0.4112730647827353, "learning_rate": 0.0004990746578641389, "loss": 3.26863169670105, "step": 2594, "token_acc": 0.2725010098904106 }, { "epoch": 1.5212547639988272, "grad_norm": 0.3656664415754131, "learning_rate": 0.0004990725738769977, "loss": 3.2911901473999023, "step": 2595, "token_acc": 0.27179157626952116 }, { "epoch": 1.5218411023160363, "grad_norm": 0.36439374169814104, "learning_rate": 0.0004990704875501553, "loss": 3.2122573852539062, "step": 2596, "token_acc": 0.27790043551899346 }, { "epoch": 1.5224274406332454, "grad_norm": 0.4497139664839275, "learning_rate": 0.0004990683988836313, "loss": 3.280503749847412, "step": 2597, "token_acc": 0.2700311026926968 }, { "epoch": 1.5230137789504545, "grad_norm": 0.4490913930307062, "learning_rate": 0.0004990663078774453, "loss": 3.254506826400757, "step": 2598, "token_acc": 0.27037192228849005 }, { "epoch": 1.5236001172676634, "grad_norm": 0.45112041187206764, "learning_rate": 0.000499064214531617, "loss": 3.2496957778930664, "step": 2599, "token_acc": 0.272075400642249 }, { "epoch": 1.5241864555848723, "grad_norm": 0.4492685229374902, "learning_rate": 0.000499062118846166, "loss": 3.301370143890381, "step": 2600, "token_acc": 0.26543296642634984 }, { "epoch": 1.5247727939020814, "grad_norm": 0.3904869025112164, "learning_rate": 0.000499060020821112, "loss": 3.2064359188079834, "step": 2601, "token_acc": 0.27939304968878625 }, { "epoch": 1.5253591322192905, "grad_norm": 0.3499647392180847, "learning_rate": 0.0004990579204564747, "loss": 3.21273136138916, "step": 2602, "token_acc": 0.2794000506661026 }, { "epoch": 1.5259454705364996, "grad_norm": 0.32554426093460603, "learning_rate": 0.0004990558177522739, "loss": 3.23994779586792, "step": 2603, "token_acc": 0.2735700223299816 }, { "epoch": 1.5265318088537085, "grad_norm": 0.3338394247619128, "learning_rate": 0.0004990537127085292, "loss": 3.250572919845581, "step": 2604, "token_acc": 0.2724101746695889 }, { "epoch": 1.5271181471709177, "grad_norm": 0.312988608011108, "learning_rate": 0.0004990516053252606, "loss": 3.2569456100463867, "step": 2605, "token_acc": 0.2736684404684555 }, { "epoch": 1.5277044854881265, "grad_norm": 0.3299606179099335, "learning_rate": 0.0004990494956024877, "loss": 3.246515989303589, "step": 2606, "token_acc": 0.27203160816807215 }, { "epoch": 1.5282908238053357, "grad_norm": 0.35524688646385216, "learning_rate": 0.0004990473835402304, "loss": 3.288687229156494, "step": 2607, "token_acc": 0.2672541716687477 }, { "epoch": 1.5288771621225448, "grad_norm": 0.3938440317275084, "learning_rate": 0.0004990452691385086, "loss": 3.254031181335449, "step": 2608, "token_acc": 0.27265844660661687 }, { "epoch": 1.5294635004397539, "grad_norm": 0.377281942458023, "learning_rate": 0.0004990431523973419, "loss": 3.2196178436279297, "step": 2609, "token_acc": 0.27718507618259297 }, { "epoch": 1.5300498387569628, "grad_norm": 0.3156695530810945, "learning_rate": 0.0004990410333167506, "loss": 3.2919702529907227, "step": 2610, "token_acc": 0.2675626432002836 }, { "epoch": 1.5306361770741717, "grad_norm": 0.4069045520187358, "learning_rate": 0.0004990389118967542, "loss": 3.269207715988159, "step": 2611, "token_acc": 0.27138172380029735 }, { "epoch": 1.5312225153913808, "grad_norm": 0.36361656979757545, "learning_rate": 0.0004990367881373729, "loss": 3.267333745956421, "step": 2612, "token_acc": 0.2701189304329584 }, { "epoch": 1.5318088537085899, "grad_norm": 0.34883269174316006, "learning_rate": 0.0004990346620386265, "loss": 3.2118911743164062, "step": 2613, "token_acc": 0.27726341705631646 }, { "epoch": 1.532395192025799, "grad_norm": 0.3691702258654956, "learning_rate": 0.0004990325336005351, "loss": 3.2957334518432617, "step": 2614, "token_acc": 0.2689772037284975 }, { "epoch": 1.5329815303430079, "grad_norm": 0.5018513952070723, "learning_rate": 0.0004990304028231185, "loss": 3.213566780090332, "step": 2615, "token_acc": 0.2770729532700477 }, { "epoch": 1.533567868660217, "grad_norm": 0.5738740323802886, "learning_rate": 0.000499028269706397, "loss": 3.246070146560669, "step": 2616, "token_acc": 0.2735984004509367 }, { "epoch": 1.5341542069774259, "grad_norm": 0.49099425737924635, "learning_rate": 0.0004990261342503904, "loss": 3.2986602783203125, "step": 2617, "token_acc": 0.2683126997153783 }, { "epoch": 1.534740545294635, "grad_norm": 0.42632125699657863, "learning_rate": 0.0004990239964551189, "loss": 3.301894426345825, "step": 2618, "token_acc": 0.26685611879160265 }, { "epoch": 1.535326883611844, "grad_norm": 0.41861919970031386, "learning_rate": 0.0004990218563206024, "loss": 3.2553257942199707, "step": 2619, "token_acc": 0.2733750003264756 }, { "epoch": 1.5359132219290532, "grad_norm": 0.4023218351430975, "learning_rate": 0.0004990197138468611, "loss": 3.239457130432129, "step": 2620, "token_acc": 0.2743222015433615 }, { "epoch": 1.536499560246262, "grad_norm": 0.41425083272992663, "learning_rate": 0.0004990175690339153, "loss": 3.2661590576171875, "step": 2621, "token_acc": 0.2712879760376551 }, { "epoch": 1.537085898563471, "grad_norm": 0.38063663540170106, "learning_rate": 0.0004990154218817848, "loss": 3.2548868656158447, "step": 2622, "token_acc": 0.2719864788389261 }, { "epoch": 1.53767223688068, "grad_norm": 0.4004431423583892, "learning_rate": 0.0004990132723904901, "loss": 3.282156229019165, "step": 2623, "token_acc": 0.27060004344341493 }, { "epoch": 1.5382585751978892, "grad_norm": 0.4125367425518996, "learning_rate": 0.0004990111205600513, "loss": 3.320084571838379, "step": 2624, "token_acc": 0.2650111486539911 }, { "epoch": 1.5388449135150983, "grad_norm": 0.31626041136147365, "learning_rate": 0.0004990089663904885, "loss": 3.254483938217163, "step": 2625, "token_acc": 0.2709560046683319 }, { "epoch": 1.5394312518323072, "grad_norm": 0.42809743032355846, "learning_rate": 0.0004990068098818221, "loss": 3.2869207859039307, "step": 2626, "token_acc": 0.2681024355436903 }, { "epoch": 1.5400175901495161, "grad_norm": 0.3893687379457032, "learning_rate": 0.0004990046510340721, "loss": 3.240131139755249, "step": 2627, "token_acc": 0.2725396923457095 }, { "epoch": 1.5406039284667252, "grad_norm": 0.42652289423361567, "learning_rate": 0.0004990024898472589, "loss": 3.2188005447387695, "step": 2628, "token_acc": 0.27687323768171684 }, { "epoch": 1.5411902667839343, "grad_norm": 0.4646790209668725, "learning_rate": 0.000499000326321403, "loss": 3.2517547607421875, "step": 2629, "token_acc": 0.27302447524499224 }, { "epoch": 1.5417766051011434, "grad_norm": 0.3876984049842635, "learning_rate": 0.0004989981604565245, "loss": 3.2105846405029297, "step": 2630, "token_acc": 0.2779264921080737 }, { "epoch": 1.5423629434183523, "grad_norm": 0.3619991010331251, "learning_rate": 0.0004989959922526439, "loss": 3.2419073581695557, "step": 2631, "token_acc": 0.27580839208071256 }, { "epoch": 1.5429492817355615, "grad_norm": 0.3797860874387506, "learning_rate": 0.0004989938217097814, "loss": 3.2080209255218506, "step": 2632, "token_acc": 0.279506476037134 }, { "epoch": 1.5435356200527703, "grad_norm": 0.34234491666687183, "learning_rate": 0.0004989916488279575, "loss": 3.2258660793304443, "step": 2633, "token_acc": 0.27551688777455047 }, { "epoch": 1.5441219583699795, "grad_norm": 0.37945457042923514, "learning_rate": 0.0004989894736071924, "loss": 3.2722809314727783, "step": 2634, "token_acc": 0.2710098930110495 }, { "epoch": 1.5447082966871886, "grad_norm": 0.46145950945105835, "learning_rate": 0.0004989872960475069, "loss": 3.2760467529296875, "step": 2635, "token_acc": 0.27053734844731175 }, { "epoch": 1.5452946350043977, "grad_norm": 0.4835128210450808, "learning_rate": 0.0004989851161489213, "loss": 3.214796304702759, "step": 2636, "token_acc": 0.27867832096539663 }, { "epoch": 1.5458809733216066, "grad_norm": 0.42130780625521774, "learning_rate": 0.000498982933911456, "loss": 3.2687814235687256, "step": 2637, "token_acc": 0.2706117797914617 }, { "epoch": 1.5464673116388155, "grad_norm": 0.4222852347317549, "learning_rate": 0.0004989807493351315, "loss": 3.2881879806518555, "step": 2638, "token_acc": 0.2678677588556217 }, { "epoch": 1.5470536499560246, "grad_norm": 0.3856519286332186, "learning_rate": 0.0004989785624199684, "loss": 3.2356760501861572, "step": 2639, "token_acc": 0.2743609357908349 }, { "epoch": 1.5476399882732337, "grad_norm": 0.37731430873539445, "learning_rate": 0.0004989763731659872, "loss": 3.2439823150634766, "step": 2640, "token_acc": 0.27453879995563607 }, { "epoch": 1.5482263265904428, "grad_norm": 0.3596363141636464, "learning_rate": 0.0004989741815732085, "loss": 3.3185830116271973, "step": 2641, "token_acc": 0.2656785517042737 }, { "epoch": 1.5488126649076517, "grad_norm": 0.3487001734746019, "learning_rate": 0.0004989719876416529, "loss": 3.2326765060424805, "step": 2642, "token_acc": 0.275534034491769 }, { "epoch": 1.5493990032248608, "grad_norm": 0.3300309678475686, "learning_rate": 0.0004989697913713409, "loss": 3.257556676864624, "step": 2643, "token_acc": 0.27281272866138623 }, { "epoch": 1.5499853415420697, "grad_norm": 0.3838529287168988, "learning_rate": 0.0004989675927622931, "loss": 3.249099016189575, "step": 2644, "token_acc": 0.273306884335702 }, { "epoch": 1.5505716798592788, "grad_norm": 0.4143647705928201, "learning_rate": 0.0004989653918145305, "loss": 3.2324624061584473, "step": 2645, "token_acc": 0.27393156542676983 }, { "epoch": 1.551158018176488, "grad_norm": 0.49725611499121625, "learning_rate": 0.0004989631885280733, "loss": 3.285338878631592, "step": 2646, "token_acc": 0.2691531522135892 }, { "epoch": 1.551744356493697, "grad_norm": 0.4105517857631105, "learning_rate": 0.0004989609829029425, "loss": 3.234142303466797, "step": 2647, "token_acc": 0.2753726028725965 }, { "epoch": 1.552330694810906, "grad_norm": 0.39416348603598894, "learning_rate": 0.0004989587749391587, "loss": 3.231597661972046, "step": 2648, "token_acc": 0.27749008038784956 }, { "epoch": 1.5529170331281148, "grad_norm": 0.36722345168408743, "learning_rate": 0.0004989565646367429, "loss": 3.2738986015319824, "step": 2649, "token_acc": 0.2704959227015547 }, { "epoch": 1.553503371445324, "grad_norm": 0.354552313381656, "learning_rate": 0.0004989543519957155, "loss": 3.2706987857818604, "step": 2650, "token_acc": 0.2703417754396329 }, { "epoch": 1.554089709762533, "grad_norm": 0.40294080366790597, "learning_rate": 0.0004989521370160974, "loss": 3.3017120361328125, "step": 2651, "token_acc": 0.2673277202602611 }, { "epoch": 1.5546760480797421, "grad_norm": 0.4025411834888025, "learning_rate": 0.0004989499196979095, "loss": 3.239588499069214, "step": 2652, "token_acc": 0.27497293263115846 }, { "epoch": 1.555262386396951, "grad_norm": 0.3589102076574963, "learning_rate": 0.0004989477000411725, "loss": 3.27361798286438, "step": 2653, "token_acc": 0.2719704171062665 }, { "epoch": 1.55584872471416, "grad_norm": 0.36026880012163814, "learning_rate": 0.0004989454780459073, "loss": 3.2707529067993164, "step": 2654, "token_acc": 0.2706217817986251 }, { "epoch": 1.556435063031369, "grad_norm": 0.3769613499464902, "learning_rate": 0.0004989432537121349, "loss": 3.3027758598327637, "step": 2655, "token_acc": 0.26701522582039744 }, { "epoch": 1.5570214013485781, "grad_norm": 0.342672851494411, "learning_rate": 0.000498941027039876, "loss": 3.2814202308654785, "step": 2656, "token_acc": 0.2692951669510899 }, { "epoch": 1.5576077396657872, "grad_norm": 0.3574098534377979, "learning_rate": 0.0004989387980291516, "loss": 3.284370183944702, "step": 2657, "token_acc": 0.2699602483481128 }, { "epoch": 1.5581940779829961, "grad_norm": 0.3514926295971717, "learning_rate": 0.0004989365666799827, "loss": 3.252963066101074, "step": 2658, "token_acc": 0.2714648020371283 }, { "epoch": 1.5587804163002053, "grad_norm": 0.3417768728615734, "learning_rate": 0.0004989343329923902, "loss": 3.240387439727783, "step": 2659, "token_acc": 0.2740051459534683 }, { "epoch": 1.5593667546174141, "grad_norm": 0.3844651396566197, "learning_rate": 0.0004989320969663951, "loss": 3.254425525665283, "step": 2660, "token_acc": 0.27187097959933837 }, { "epoch": 1.5599530929346233, "grad_norm": 0.3846254546361974, "learning_rate": 0.0004989298586020183, "loss": 3.2569668292999268, "step": 2661, "token_acc": 0.2706875966362179 }, { "epoch": 1.5605394312518324, "grad_norm": 0.37931114930598936, "learning_rate": 0.000498927617899281, "loss": 3.238466262817383, "step": 2662, "token_acc": 0.27485740144387616 }, { "epoch": 1.5611257695690415, "grad_norm": 0.4426821670641154, "learning_rate": 0.0004989253748582042, "loss": 3.2260851860046387, "step": 2663, "token_acc": 0.2744047588311534 }, { "epoch": 1.5617121078862504, "grad_norm": 0.5233817670180233, "learning_rate": 0.0004989231294788088, "loss": 3.240323305130005, "step": 2664, "token_acc": 0.27272104719850054 }, { "epoch": 1.5622984462034593, "grad_norm": 0.4995519929215957, "learning_rate": 0.0004989208817611162, "loss": 3.2973456382751465, "step": 2665, "token_acc": 0.2683416875085024 }, { "epoch": 1.5628847845206684, "grad_norm": 0.42641943666751, "learning_rate": 0.0004989186317051472, "loss": 3.241018295288086, "step": 2666, "token_acc": 0.2734630072488855 }, { "epoch": 1.5634711228378775, "grad_norm": 0.39996113411284107, "learning_rate": 0.0004989163793109231, "loss": 3.2816903591156006, "step": 2667, "token_acc": 0.2705359529357132 }, { "epoch": 1.5640574611550866, "grad_norm": 0.5530718271321012, "learning_rate": 0.0004989141245784651, "loss": 3.33862566947937, "step": 2668, "token_acc": 0.26283321746168453 }, { "epoch": 1.5646437994722955, "grad_norm": 0.5325390424675902, "learning_rate": 0.0004989118675077942, "loss": 3.280628204345703, "step": 2669, "token_acc": 0.2695132902781736 }, { "epoch": 1.5652301377895046, "grad_norm": 0.3847569319623661, "learning_rate": 0.0004989096080989318, "loss": 3.2433745861053467, "step": 2670, "token_acc": 0.2722702003980999 }, { "epoch": 1.5658164761067135, "grad_norm": 0.4375873969562279, "learning_rate": 0.000498907346351899, "loss": 3.3132450580596924, "step": 2671, "token_acc": 0.26583004508834446 }, { "epoch": 1.5664028144239226, "grad_norm": 0.45026986045211215, "learning_rate": 0.0004989050822667172, "loss": 3.226424217224121, "step": 2672, "token_acc": 0.2761323136224173 }, { "epoch": 1.5669891527411317, "grad_norm": 0.39761124993926966, "learning_rate": 0.0004989028158434074, "loss": 3.2660775184631348, "step": 2673, "token_acc": 0.2715718426388341 }, { "epoch": 1.5675754910583408, "grad_norm": 0.4366372416828852, "learning_rate": 0.0004989005470819912, "loss": 3.227046012878418, "step": 2674, "token_acc": 0.27720152228072237 }, { "epoch": 1.5681618293755497, "grad_norm": 0.39021958849509253, "learning_rate": 0.0004988982759824896, "loss": 3.2357258796691895, "step": 2675, "token_acc": 0.2747005064381357 }, { "epoch": 1.5687481676927586, "grad_norm": 0.3262171587001197, "learning_rate": 0.0004988960025449242, "loss": 3.249258041381836, "step": 2676, "token_acc": 0.27259877481365447 }, { "epoch": 1.5693345060099677, "grad_norm": 0.34448535523871937, "learning_rate": 0.0004988937267693162, "loss": 3.2756948471069336, "step": 2677, "token_acc": 0.2692577021559959 }, { "epoch": 1.5699208443271768, "grad_norm": 0.37032248885205776, "learning_rate": 0.000498891448655687, "loss": 3.2353405952453613, "step": 2678, "token_acc": 0.27351220045178387 }, { "epoch": 1.570507182644386, "grad_norm": 0.35453190717742655, "learning_rate": 0.0004988891682040581, "loss": 3.2334859371185303, "step": 2679, "token_acc": 0.2767216447768983 }, { "epoch": 1.5710935209615948, "grad_norm": 0.318483802055312, "learning_rate": 0.0004988868854144508, "loss": 3.2155065536499023, "step": 2680, "token_acc": 0.2768662561793556 }, { "epoch": 1.5716798592788037, "grad_norm": 0.32327002418678236, "learning_rate": 0.0004988846002868866, "loss": 3.2319586277008057, "step": 2681, "token_acc": 0.2771636545383973 }, { "epoch": 1.5722661975960128, "grad_norm": 0.3441250186577668, "learning_rate": 0.0004988823128213869, "loss": 3.2407686710357666, "step": 2682, "token_acc": 0.27392532821678733 }, { "epoch": 1.572852535913222, "grad_norm": 0.3601102533657783, "learning_rate": 0.0004988800230179733, "loss": 3.2567479610443115, "step": 2683, "token_acc": 0.27164362550407833 }, { "epoch": 1.573438874230431, "grad_norm": 0.33662251526197795, "learning_rate": 0.0004988777308766673, "loss": 3.2499845027923584, "step": 2684, "token_acc": 0.27264931329736003 }, { "epoch": 1.57402521254764, "grad_norm": 0.3247345631168005, "learning_rate": 0.0004988754363974904, "loss": 3.2635385990142822, "step": 2685, "token_acc": 0.2718904268455264 }, { "epoch": 1.574611550864849, "grad_norm": 0.3777905980359913, "learning_rate": 0.0004988731395804641, "loss": 3.2611401081085205, "step": 2686, "token_acc": 0.27217976724684084 }, { "epoch": 1.575197889182058, "grad_norm": 0.33663718256597874, "learning_rate": 0.00049887084042561, "loss": 3.279417037963867, "step": 2687, "token_acc": 0.2686788139760509 }, { "epoch": 1.575784227499267, "grad_norm": 0.3362316721317457, "learning_rate": 0.0004988685389329497, "loss": 3.1999971866607666, "step": 2688, "token_acc": 0.2798969002604483 }, { "epoch": 1.5763705658164762, "grad_norm": 0.3399648099293411, "learning_rate": 0.0004988662351025048, "loss": 3.2987563610076904, "step": 2689, "token_acc": 0.26550862017274235 }, { "epoch": 1.5769569041336853, "grad_norm": 0.422309117473261, "learning_rate": 0.000498863928934297, "loss": 3.259262800216675, "step": 2690, "token_acc": 0.27012342520943916 }, { "epoch": 1.5775432424508942, "grad_norm": 0.3905374891803225, "learning_rate": 0.0004988616204283479, "loss": 3.2702717781066895, "step": 2691, "token_acc": 0.27083267394951027 }, { "epoch": 1.578129580768103, "grad_norm": 0.3503140145756829, "learning_rate": 0.0004988593095846793, "loss": 3.215207815170288, "step": 2692, "token_acc": 0.276515849039701 }, { "epoch": 1.5787159190853122, "grad_norm": 0.377567862143921, "learning_rate": 0.0004988569964033128, "loss": 3.2278919219970703, "step": 2693, "token_acc": 0.27492802039028946 }, { "epoch": 1.5793022574025213, "grad_norm": 0.4117813332740536, "learning_rate": 0.0004988546808842703, "loss": 3.2277774810791016, "step": 2694, "token_acc": 0.2758324741994112 }, { "epoch": 1.5798885957197304, "grad_norm": 0.3942180648872112, "learning_rate": 0.0004988523630275731, "loss": 3.2668137550354004, "step": 2695, "token_acc": 0.2705968817260749 }, { "epoch": 1.5804749340369393, "grad_norm": 0.31324215549712725, "learning_rate": 0.0004988500428332435, "loss": 3.226727247238159, "step": 2696, "token_acc": 0.2740891668032819 }, { "epoch": 1.5810612723541484, "grad_norm": 0.38645085509564414, "learning_rate": 0.0004988477203013029, "loss": 3.2715234756469727, "step": 2697, "token_acc": 0.26994020646340916 }, { "epoch": 1.5816476106713573, "grad_norm": 0.4015884618559776, "learning_rate": 0.0004988453954317735, "loss": 3.248443126678467, "step": 2698, "token_acc": 0.27439067448138754 }, { "epoch": 1.5822339489885664, "grad_norm": 0.4374823655905406, "learning_rate": 0.0004988430682246769, "loss": 3.2270560264587402, "step": 2699, "token_acc": 0.27576205715656893 }, { "epoch": 1.5828202873057755, "grad_norm": 0.4099083312504755, "learning_rate": 0.000498840738680035, "loss": 3.27923846244812, "step": 2700, "token_acc": 0.26916692589407315 }, { "epoch": 1.5834066256229846, "grad_norm": 0.35450487951341336, "learning_rate": 0.0004988384067978695, "loss": 3.264312505722046, "step": 2701, "token_acc": 0.27171067775107194 }, { "epoch": 1.5839929639401935, "grad_norm": 0.3603862045876232, "learning_rate": 0.0004988360725782027, "loss": 3.249986171722412, "step": 2702, "token_acc": 0.27329845143373055 }, { "epoch": 1.5845793022574024, "grad_norm": 0.38290899483458135, "learning_rate": 0.0004988337360210562, "loss": 3.2628514766693115, "step": 2703, "token_acc": 0.2723530338529093 }, { "epoch": 1.5851656405746115, "grad_norm": 0.34659374894191103, "learning_rate": 0.000498831397126452, "loss": 3.2192413806915283, "step": 2704, "token_acc": 0.2758640701247975 }, { "epoch": 1.5857519788918206, "grad_norm": 0.3868393656960508, "learning_rate": 0.0004988290558944123, "loss": 3.2475297451019287, "step": 2705, "token_acc": 0.27241009689148576 }, { "epoch": 1.5863383172090297, "grad_norm": 0.38793105455210936, "learning_rate": 0.0004988267123249588, "loss": 3.228393316268921, "step": 2706, "token_acc": 0.2742931815943769 }, { "epoch": 1.5869246555262386, "grad_norm": 0.40405986606024225, "learning_rate": 0.0004988243664181137, "loss": 3.2677111625671387, "step": 2707, "token_acc": 0.27193307879780915 }, { "epoch": 1.5875109938434475, "grad_norm": 0.4490691144683981, "learning_rate": 0.0004988220181738988, "loss": 3.2014970779418945, "step": 2708, "token_acc": 0.2791681670867843 }, { "epoch": 1.5880973321606566, "grad_norm": 0.37508884935344244, "learning_rate": 0.0004988196675923365, "loss": 3.2355289459228516, "step": 2709, "token_acc": 0.2740727508675516 }, { "epoch": 1.5886836704778657, "grad_norm": 0.3778067038215156, "learning_rate": 0.0004988173146734487, "loss": 3.240598678588867, "step": 2710, "token_acc": 0.27501648344255014 }, { "epoch": 1.5892700087950749, "grad_norm": 0.41313150686608957, "learning_rate": 0.0004988149594172574, "loss": 3.2623720169067383, "step": 2711, "token_acc": 0.2718314091170674 }, { "epoch": 1.5898563471122837, "grad_norm": 0.3556962425691541, "learning_rate": 0.000498812601823785, "loss": 3.250446319580078, "step": 2712, "token_acc": 0.27298510394994685 }, { "epoch": 1.5904426854294929, "grad_norm": 0.3608077178922655, "learning_rate": 0.0004988102418930534, "loss": 3.193519115447998, "step": 2713, "token_acc": 0.27913297807178733 }, { "epoch": 1.5910290237467017, "grad_norm": 0.3297838350918935, "learning_rate": 0.0004988078796250849, "loss": 3.2485578060150146, "step": 2714, "token_acc": 0.27300142067209937 }, { "epoch": 1.5916153620639109, "grad_norm": 0.411991775655341, "learning_rate": 0.0004988055150199016, "loss": 3.302521228790283, "step": 2715, "token_acc": 0.26687487740446464 }, { "epoch": 1.59220170038112, "grad_norm": 0.43107094714592736, "learning_rate": 0.0004988031480775257, "loss": 3.2885003089904785, "step": 2716, "token_acc": 0.26872974385781495 }, { "epoch": 1.592788038698329, "grad_norm": 0.45608371945595877, "learning_rate": 0.0004988007787979795, "loss": 3.2895407676696777, "step": 2717, "token_acc": 0.2688575466809904 }, { "epoch": 1.593374377015538, "grad_norm": 0.4670488762531189, "learning_rate": 0.0004987984071812854, "loss": 3.234917640686035, "step": 2718, "token_acc": 0.27583820488186156 }, { "epoch": 1.5939607153327469, "grad_norm": 0.45755183631185503, "learning_rate": 0.0004987960332274654, "loss": 3.232421398162842, "step": 2719, "token_acc": 0.2742818600222187 }, { "epoch": 1.594547053649956, "grad_norm": 0.4227019373376549, "learning_rate": 0.000498793656936542, "loss": 3.2531185150146484, "step": 2720, "token_acc": 0.2722035918647155 }, { "epoch": 1.595133391967165, "grad_norm": 0.4215712774910508, "learning_rate": 0.0004987912783085374, "loss": 3.2416720390319824, "step": 2721, "token_acc": 0.2717643540053012 }, { "epoch": 1.5957197302843742, "grad_norm": 0.3689758269420926, "learning_rate": 0.000498788897343474, "loss": 3.2418856620788574, "step": 2722, "token_acc": 0.27507259976920245 }, { "epoch": 1.596306068601583, "grad_norm": 0.39639354177563874, "learning_rate": 0.0004987865140413741, "loss": 3.255746603012085, "step": 2723, "token_acc": 0.2711629428472031 }, { "epoch": 1.5968924069187922, "grad_norm": 0.38263030782121593, "learning_rate": 0.0004987841284022601, "loss": 3.267214775085449, "step": 2724, "token_acc": 0.26980066605531156 }, { "epoch": 1.597478745236001, "grad_norm": 0.36578567798962536, "learning_rate": 0.0004987817404261546, "loss": 3.220471143722534, "step": 2725, "token_acc": 0.2765835089783396 }, { "epoch": 1.5980650835532102, "grad_norm": 0.3485440905052892, "learning_rate": 0.0004987793501130799, "loss": 3.2583389282226562, "step": 2726, "token_acc": 0.27033725767902983 }, { "epoch": 1.5986514218704193, "grad_norm": 0.35846008610783775, "learning_rate": 0.0004987769574630583, "loss": 3.2682206630706787, "step": 2727, "token_acc": 0.27180805795416807 }, { "epoch": 1.5992377601876284, "grad_norm": 0.36281373255912586, "learning_rate": 0.0004987745624761126, "loss": 3.234234094619751, "step": 2728, "token_acc": 0.27501035558068104 }, { "epoch": 1.5998240985048373, "grad_norm": 0.3523650325238099, "learning_rate": 0.0004987721651522649, "loss": 3.348665714263916, "step": 2729, "token_acc": 0.25943302682742536 }, { "epoch": 1.6004104368220462, "grad_norm": 0.3648963457155009, "learning_rate": 0.000498769765491538, "loss": 3.20805287361145, "step": 2730, "token_acc": 0.2779875951854581 }, { "epoch": 1.6009967751392553, "grad_norm": 0.377782425628979, "learning_rate": 0.0004987673634939544, "loss": 3.2456960678100586, "step": 2731, "token_acc": 0.2727531354251376 }, { "epoch": 1.6015831134564644, "grad_norm": 0.45546514642939634, "learning_rate": 0.0004987649591595367, "loss": 3.280743360519409, "step": 2732, "token_acc": 0.26956021593291946 }, { "epoch": 1.6021694517736735, "grad_norm": 0.35707631745038704, "learning_rate": 0.0004987625524883074, "loss": 3.235963821411133, "step": 2733, "token_acc": 0.2742844321606503 }, { "epoch": 1.6027557900908824, "grad_norm": 0.3895146299921404, "learning_rate": 0.0004987601434802891, "loss": 3.269639492034912, "step": 2734, "token_acc": 0.27049018039007217 }, { "epoch": 1.6033421284080913, "grad_norm": 0.4028401675283581, "learning_rate": 0.0004987577321355044, "loss": 3.288886547088623, "step": 2735, "token_acc": 0.2656581424415137 }, { "epoch": 1.6039284667253004, "grad_norm": 0.42007054227776724, "learning_rate": 0.0004987553184539761, "loss": 3.2761054039001465, "step": 2736, "token_acc": 0.2700375653310105 }, { "epoch": 1.6045148050425095, "grad_norm": 0.37746187433885026, "learning_rate": 0.0004987529024357267, "loss": 3.3051414489746094, "step": 2737, "token_acc": 0.2657674825487854 }, { "epoch": 1.6051011433597187, "grad_norm": 0.3651236854749246, "learning_rate": 0.0004987504840807791, "loss": 3.233194589614868, "step": 2738, "token_acc": 0.27516466075915885 }, { "epoch": 1.6056874816769275, "grad_norm": 0.3821426670517067, "learning_rate": 0.0004987480633891558, "loss": 3.2217910289764404, "step": 2739, "token_acc": 0.2749110320284697 }, { "epoch": 1.6062738199941367, "grad_norm": 0.49580605035281067, "learning_rate": 0.0004987456403608796, "loss": 3.2270307540893555, "step": 2740, "token_acc": 0.274558846799757 }, { "epoch": 1.6068601583113455, "grad_norm": 0.4330358816269615, "learning_rate": 0.0004987432149959734, "loss": 3.235473155975342, "step": 2741, "token_acc": 0.2763001323584215 }, { "epoch": 1.6074464966285547, "grad_norm": 0.347032543621862, "learning_rate": 0.0004987407872944599, "loss": 3.2172203063964844, "step": 2742, "token_acc": 0.27568816703393334 }, { "epoch": 1.6080328349457638, "grad_norm": 0.31771705082755164, "learning_rate": 0.0004987383572563618, "loss": 3.2275846004486084, "step": 2743, "token_acc": 0.27543459854124636 }, { "epoch": 1.6086191732629729, "grad_norm": 0.35758684373540345, "learning_rate": 0.0004987359248817021, "loss": 3.177854299545288, "step": 2744, "token_acc": 0.2824852786943278 }, { "epoch": 1.6092055115801818, "grad_norm": 0.35291716826327824, "learning_rate": 0.0004987334901705035, "loss": 3.2149596214294434, "step": 2745, "token_acc": 0.2776302978572342 }, { "epoch": 1.6097918498973907, "grad_norm": 0.3538669298207957, "learning_rate": 0.000498731053122789, "loss": 3.253016471862793, "step": 2746, "token_acc": 0.27146569784401603 }, { "epoch": 1.6103781882145998, "grad_norm": 0.3451118309842075, "learning_rate": 0.0004987286137385813, "loss": 3.2773189544677734, "step": 2747, "token_acc": 0.26837553141363346 }, { "epoch": 1.6109645265318089, "grad_norm": 0.3179749163014257, "learning_rate": 0.0004987261720179035, "loss": 3.2556238174438477, "step": 2748, "token_acc": 0.27126350917934017 }, { "epoch": 1.611550864849018, "grad_norm": 0.32816561709366504, "learning_rate": 0.0004987237279607784, "loss": 3.279360771179199, "step": 2749, "token_acc": 0.2685902832967442 }, { "epoch": 1.6121372031662269, "grad_norm": 0.32513850302848574, "learning_rate": 0.0004987212815672292, "loss": 3.2462167739868164, "step": 2750, "token_acc": 0.27311525000713455 }, { "epoch": 1.612723541483436, "grad_norm": 0.3411678929209327, "learning_rate": 0.0004987188328372787, "loss": 3.206583261489868, "step": 2751, "token_acc": 0.27804876753504354 }, { "epoch": 1.6133098798006449, "grad_norm": 0.3983071952813173, "learning_rate": 0.0004987163817709498, "loss": 3.218691349029541, "step": 2752, "token_acc": 0.2780238851862712 }, { "epoch": 1.613896218117854, "grad_norm": 0.3862351281946069, "learning_rate": 0.0004987139283682656, "loss": 3.2317378520965576, "step": 2753, "token_acc": 0.2751936244543791 }, { "epoch": 1.614482556435063, "grad_norm": 0.348458635267046, "learning_rate": 0.0004987114726292494, "loss": 3.221101760864258, "step": 2754, "token_acc": 0.27583929116207506 }, { "epoch": 1.6150688947522722, "grad_norm": 0.3118101786582252, "learning_rate": 0.0004987090145539239, "loss": 3.2600440979003906, "step": 2755, "token_acc": 0.27148637727068087 }, { "epoch": 1.6156552330694811, "grad_norm": 0.4016303275087847, "learning_rate": 0.0004987065541423125, "loss": 3.255066394805908, "step": 2756, "token_acc": 0.27424011040025165 }, { "epoch": 1.61624157138669, "grad_norm": 0.44224944029872704, "learning_rate": 0.0004987040913944379, "loss": 3.2731754779815674, "step": 2757, "token_acc": 0.2719338951054114 }, { "epoch": 1.6168279097038991, "grad_norm": 0.4151930126060737, "learning_rate": 0.0004987016263103237, "loss": 3.2479019165039062, "step": 2758, "token_acc": 0.2716210286573963 }, { "epoch": 1.6174142480211082, "grad_norm": 0.382939023349043, "learning_rate": 0.0004986991588899929, "loss": 3.286262035369873, "step": 2759, "token_acc": 0.26529028844417196 }, { "epoch": 1.6180005863383173, "grad_norm": 0.3490235209365785, "learning_rate": 0.0004986966891334685, "loss": 3.2657227516174316, "step": 2760, "token_acc": 0.2724756398994954 }, { "epoch": 1.6185869246555262, "grad_norm": 0.3520265759538571, "learning_rate": 0.0004986942170407737, "loss": 3.2647459506988525, "step": 2761, "token_acc": 0.2707282224129629 }, { "epoch": 1.6191732629727351, "grad_norm": 0.36733780809683125, "learning_rate": 0.000498691742611932, "loss": 3.1752400398254395, "step": 2762, "token_acc": 0.2830303266243625 }, { "epoch": 1.6197596012899442, "grad_norm": 0.41888606951507035, "learning_rate": 0.0004986892658469666, "loss": 3.212538003921509, "step": 2763, "token_acc": 0.27658080325125506 }, { "epoch": 1.6203459396071533, "grad_norm": 0.39855172916085413, "learning_rate": 0.0004986867867459006, "loss": 3.2517294883728027, "step": 2764, "token_acc": 0.2713157017161327 }, { "epoch": 1.6209322779243625, "grad_norm": 0.41027245762118103, "learning_rate": 0.0004986843053087573, "loss": 3.1951522827148438, "step": 2765, "token_acc": 0.27912448728309114 }, { "epoch": 1.6215186162415713, "grad_norm": 0.4367539404422904, "learning_rate": 0.00049868182153556, "loss": 3.228646755218506, "step": 2766, "token_acc": 0.27499007051904223 }, { "epoch": 1.6221049545587805, "grad_norm": 0.37799934714026406, "learning_rate": 0.0004986793354263321, "loss": 3.238917350769043, "step": 2767, "token_acc": 0.2760871295534743 }, { "epoch": 1.6226912928759893, "grad_norm": 0.3280972892430306, "learning_rate": 0.000498676846981097, "loss": 3.210571765899658, "step": 2768, "token_acc": 0.2788220950092525 }, { "epoch": 1.6232776311931985, "grad_norm": 0.40600094055703184, "learning_rate": 0.0004986743561998781, "loss": 3.27011775970459, "step": 2769, "token_acc": 0.27040760195165886 }, { "epoch": 1.6238639695104076, "grad_norm": 0.41423823374294383, "learning_rate": 0.0004986718630826986, "loss": 3.23716402053833, "step": 2770, "token_acc": 0.274798722353706 }, { "epoch": 1.6244503078276167, "grad_norm": 0.3864477557920523, "learning_rate": 0.0004986693676295821, "loss": 3.2373876571655273, "step": 2771, "token_acc": 0.27323684067870113 }, { "epoch": 1.6250366461448256, "grad_norm": 0.3541719735878081, "learning_rate": 0.000498666869840552, "loss": 3.240128755569458, "step": 2772, "token_acc": 0.27542786247654444 }, { "epoch": 1.6256229844620345, "grad_norm": 0.3871081603780341, "learning_rate": 0.0004986643697156317, "loss": 3.269397497177124, "step": 2773, "token_acc": 0.26959298371580914 }, { "epoch": 1.6262093227792436, "grad_norm": 0.36603598396570886, "learning_rate": 0.0004986618672548446, "loss": 3.3137803077697754, "step": 2774, "token_acc": 0.26363959837659817 }, { "epoch": 1.6267956610964527, "grad_norm": 0.36939061875417595, "learning_rate": 0.0004986593624582145, "loss": 3.262991428375244, "step": 2775, "token_acc": 0.27099446291297113 }, { "epoch": 1.6273819994136618, "grad_norm": 0.32479646239203375, "learning_rate": 0.0004986568553257646, "loss": 3.2764062881469727, "step": 2776, "token_acc": 0.2709072985998944 }, { "epoch": 1.6279683377308707, "grad_norm": 0.32947646142705234, "learning_rate": 0.0004986543458575188, "loss": 3.2155604362487793, "step": 2777, "token_acc": 0.27678465421921217 }, { "epoch": 1.6285546760480798, "grad_norm": 0.4162345838071443, "learning_rate": 0.0004986518340535004, "loss": 3.2170333862304688, "step": 2778, "token_acc": 0.2774165684506852 }, { "epoch": 1.6291410143652887, "grad_norm": 0.41759363110660364, "learning_rate": 0.0004986493199137331, "loss": 3.2335448265075684, "step": 2779, "token_acc": 0.2744798825195583 }, { "epoch": 1.6297273526824978, "grad_norm": 0.397037175693642, "learning_rate": 0.0004986468034382406, "loss": 3.2837014198303223, "step": 2780, "token_acc": 0.2685570906166738 }, { "epoch": 1.630313690999707, "grad_norm": 0.355904633245635, "learning_rate": 0.0004986442846270462, "loss": 3.2292261123657227, "step": 2781, "token_acc": 0.2747922522693035 }, { "epoch": 1.630900029316916, "grad_norm": 0.3392622183786451, "learning_rate": 0.000498641763480174, "loss": 3.243370294570923, "step": 2782, "token_acc": 0.27472683999388803 }, { "epoch": 1.631486367634125, "grad_norm": 0.34813422302345937, "learning_rate": 0.0004986392399976473, "loss": 3.199794292449951, "step": 2783, "token_acc": 0.2797840826805345 }, { "epoch": 1.6320727059513338, "grad_norm": 0.35138959495079725, "learning_rate": 0.0004986367141794902, "loss": 3.2339978218078613, "step": 2784, "token_acc": 0.27494765303401164 }, { "epoch": 1.632659044268543, "grad_norm": 0.3719351952658503, "learning_rate": 0.000498634186025726, "loss": 3.2448782920837402, "step": 2785, "token_acc": 0.27485918010954674 }, { "epoch": 1.633245382585752, "grad_norm": 0.40982051502724537, "learning_rate": 0.0004986316555363788, "loss": 3.20831298828125, "step": 2786, "token_acc": 0.2756497096163377 }, { "epoch": 1.6338317209029611, "grad_norm": 0.4679634136903241, "learning_rate": 0.0004986291227114722, "loss": 3.227557897567749, "step": 2787, "token_acc": 0.276445668895063 }, { "epoch": 1.63441805922017, "grad_norm": 0.4286460626684321, "learning_rate": 0.00049862658755103, "loss": 3.2810144424438477, "step": 2788, "token_acc": 0.2693951901613107 }, { "epoch": 1.635004397537379, "grad_norm": 0.34445129120444495, "learning_rate": 0.0004986240500550761, "loss": 3.2818071842193604, "step": 2789, "token_acc": 0.269373519689753 }, { "epoch": 1.635590735854588, "grad_norm": 0.3267231925692841, "learning_rate": 0.0004986215102236343, "loss": 3.261897087097168, "step": 2790, "token_acc": 0.27020795172933953 }, { "epoch": 1.6361770741717971, "grad_norm": 0.3392819320475654, "learning_rate": 0.0004986189680567282, "loss": 3.2720112800598145, "step": 2791, "token_acc": 0.2692128260127343 }, { "epoch": 1.6367634124890063, "grad_norm": 0.3615260712749753, "learning_rate": 0.0004986164235543822, "loss": 3.2113118171691895, "step": 2792, "token_acc": 0.27501873744852096 }, { "epoch": 1.6373497508062151, "grad_norm": 0.3677819809166616, "learning_rate": 0.0004986138767166197, "loss": 3.328761577606201, "step": 2793, "token_acc": 0.26214651052439786 }, { "epoch": 1.6379360891234243, "grad_norm": 0.4351661442243823, "learning_rate": 0.000498611327543465, "loss": 3.2705330848693848, "step": 2794, "token_acc": 0.2708117461631617 }, { "epoch": 1.6385224274406331, "grad_norm": 0.39804657522399217, "learning_rate": 0.0004986087760349418, "loss": 3.2207818031311035, "step": 2795, "token_acc": 0.2756710054281475 }, { "epoch": 1.6391087657578423, "grad_norm": 0.4218043965329263, "learning_rate": 0.0004986062221910742, "loss": 3.2590789794921875, "step": 2796, "token_acc": 0.27254624435274205 }, { "epoch": 1.6396951040750514, "grad_norm": 0.4540068057156381, "learning_rate": 0.0004986036660118861, "loss": 3.297433853149414, "step": 2797, "token_acc": 0.266402219283918 }, { "epoch": 1.6402814423922605, "grad_norm": 0.4469006842733171, "learning_rate": 0.0004986011074974016, "loss": 3.2957658767700195, "step": 2798, "token_acc": 0.26661584984184655 }, { "epoch": 1.6408677807094694, "grad_norm": 0.41208797298991495, "learning_rate": 0.0004985985466476446, "loss": 3.2869277000427246, "step": 2799, "token_acc": 0.26692779521131194 }, { "epoch": 1.6414541190266783, "grad_norm": 0.3633945274867081, "learning_rate": 0.0004985959834626393, "loss": 3.3038723468780518, "step": 2800, "token_acc": 0.2656008985786308 }, { "epoch": 1.6420404573438874, "grad_norm": 0.3462139546365612, "learning_rate": 0.0004985934179424097, "loss": 3.236398696899414, "step": 2801, "token_acc": 0.273619067934341 }, { "epoch": 1.6426267956610965, "grad_norm": 0.3487087292807739, "learning_rate": 0.00049859085008698, "loss": 3.1826324462890625, "step": 2802, "token_acc": 0.2807146708697997 }, { "epoch": 1.6432131339783056, "grad_norm": 0.36891589999050367, "learning_rate": 0.0004985882798963742, "loss": 3.2357091903686523, "step": 2803, "token_acc": 0.2752817395002323 }, { "epoch": 1.6437994722955145, "grad_norm": 0.3732750470174466, "learning_rate": 0.0004985857073706165, "loss": 3.257347583770752, "step": 2804, "token_acc": 0.27214495466133143 }, { "epoch": 1.6443858106127234, "grad_norm": 0.3239709473222964, "learning_rate": 0.0004985831325097311, "loss": 3.2477762699127197, "step": 2805, "token_acc": 0.27246390782320606 }, { "epoch": 1.6449721489299325, "grad_norm": 0.35725519351593177, "learning_rate": 0.000498580555313742, "loss": 3.2600955963134766, "step": 2806, "token_acc": 0.27240646532294416 }, { "epoch": 1.6455584872471416, "grad_norm": 0.3540916620430091, "learning_rate": 0.0004985779757826737, "loss": 3.236013650894165, "step": 2807, "token_acc": 0.27411103375364226 }, { "epoch": 1.6461448255643507, "grad_norm": 0.37591701849603665, "learning_rate": 0.0004985753939165501, "loss": 3.245718240737915, "step": 2808, "token_acc": 0.27278551349937896 }, { "epoch": 1.6467311638815598, "grad_norm": 0.39541450209991336, "learning_rate": 0.0004985728097153958, "loss": 3.2262916564941406, "step": 2809, "token_acc": 0.2763185232124492 }, { "epoch": 1.6473175021987687, "grad_norm": 0.40034914685238143, "learning_rate": 0.0004985702231792348, "loss": 3.2422690391540527, "step": 2810, "token_acc": 0.27297902778743655 }, { "epoch": 1.6479038405159776, "grad_norm": 0.36400651519381383, "learning_rate": 0.0004985676343080916, "loss": 3.1679704189300537, "step": 2811, "token_acc": 0.2842027605263511 }, { "epoch": 1.6484901788331867, "grad_norm": 0.2991063437106015, "learning_rate": 0.0004985650431019904, "loss": 3.206399440765381, "step": 2812, "token_acc": 0.27931457435191465 }, { "epoch": 1.6490765171503958, "grad_norm": 0.3490048272867048, "learning_rate": 0.0004985624495609555, "loss": 3.2703442573547363, "step": 2813, "token_acc": 0.2727640631323351 }, { "epoch": 1.649662855467605, "grad_norm": 0.32726425220451977, "learning_rate": 0.0004985598536850114, "loss": 3.2183618545532227, "step": 2814, "token_acc": 0.27437523027862126 }, { "epoch": 1.6502491937848138, "grad_norm": 0.3625428919090589, "learning_rate": 0.0004985572554741824, "loss": 3.267106771469116, "step": 2815, "token_acc": 0.27121543310952506 }, { "epoch": 1.6508355321020227, "grad_norm": 0.3722076074199333, "learning_rate": 0.0004985546549284929, "loss": 3.2349417209625244, "step": 2816, "token_acc": 0.27441717213625916 }, { "epoch": 1.6514218704192318, "grad_norm": 0.4025791854353653, "learning_rate": 0.0004985520520479674, "loss": 3.1869289875030518, "step": 2817, "token_acc": 0.28000790019083577 }, { "epoch": 1.652008208736441, "grad_norm": 0.4031437414739885, "learning_rate": 0.0004985494468326302, "loss": 3.2438113689422607, "step": 2818, "token_acc": 0.27342399566820086 }, { "epoch": 1.65259454705365, "grad_norm": 0.3459326240470866, "learning_rate": 0.0004985468392825059, "loss": 3.1798017024993896, "step": 2819, "token_acc": 0.28222709959958675 }, { "epoch": 1.653180885370859, "grad_norm": 0.3680654138185959, "learning_rate": 0.000498544229397619, "loss": 3.2882046699523926, "step": 2820, "token_acc": 0.26705050590576185 }, { "epoch": 1.653767223688068, "grad_norm": 0.42710970747055693, "learning_rate": 0.0004985416171779941, "loss": 3.250947952270508, "step": 2821, "token_acc": 0.27014820080492796 }, { "epoch": 1.654353562005277, "grad_norm": 0.36030137173458404, "learning_rate": 0.0004985390026236554, "loss": 3.1986308097839355, "step": 2822, "token_acc": 0.27964537168792286 }, { "epoch": 1.654939900322486, "grad_norm": 0.3300176141682994, "learning_rate": 0.0004985363857346278, "loss": 3.215203046798706, "step": 2823, "token_acc": 0.27901802559024896 }, { "epoch": 1.6555262386396952, "grad_norm": 0.38077522645917034, "learning_rate": 0.0004985337665109358, "loss": 3.2363369464874268, "step": 2824, "token_acc": 0.27359952461573533 }, { "epoch": 1.6561125769569043, "grad_norm": 0.3954765443524495, "learning_rate": 0.0004985311449526038, "loss": 3.253981828689575, "step": 2825, "token_acc": 0.27064094382709863 }, { "epoch": 1.6566989152741132, "grad_norm": 0.36542662594144765, "learning_rate": 0.0004985285210596567, "loss": 3.22945499420166, "step": 2826, "token_acc": 0.27326493819794817 }, { "epoch": 1.657285253591322, "grad_norm": 0.37026509008563846, "learning_rate": 0.0004985258948321191, "loss": 3.240661859512329, "step": 2827, "token_acc": 0.27355614204986856 }, { "epoch": 1.6578715919085312, "grad_norm": 0.4419391598562821, "learning_rate": 0.0004985232662700155, "loss": 3.2287344932556152, "step": 2828, "token_acc": 0.27558495220315404 }, { "epoch": 1.6584579302257403, "grad_norm": 0.48779050943975094, "learning_rate": 0.0004985206353733708, "loss": 3.234668493270874, "step": 2829, "token_acc": 0.27590292314394704 }, { "epoch": 1.6590442685429494, "grad_norm": 0.4094397266582616, "learning_rate": 0.0004985180021422096, "loss": 3.264362096786499, "step": 2830, "token_acc": 0.2688967548144556 }, { "epoch": 1.6596306068601583, "grad_norm": 0.30673402009883666, "learning_rate": 0.0004985153665765566, "loss": 3.1701488494873047, "step": 2831, "token_acc": 0.28299677380810134 }, { "epoch": 1.6602169451773672, "grad_norm": 0.38757061478866, "learning_rate": 0.0004985127286764366, "loss": 3.2231578826904297, "step": 2832, "token_acc": 0.27622282088275474 }, { "epoch": 1.6608032834945763, "grad_norm": 0.33669139817449567, "learning_rate": 0.0004985100884418745, "loss": 3.231654167175293, "step": 2833, "token_acc": 0.2753767747767405 }, { "epoch": 1.6613896218117854, "grad_norm": 0.3440032753977391, "learning_rate": 0.0004985074458728948, "loss": 3.224477767944336, "step": 2834, "token_acc": 0.2746972328475345 }, { "epoch": 1.6619759601289945, "grad_norm": 0.3616120900262278, "learning_rate": 0.0004985048009695227, "loss": 3.253448486328125, "step": 2835, "token_acc": 0.2716456210077358 }, { "epoch": 1.6625622984462036, "grad_norm": 0.4099868481129341, "learning_rate": 0.0004985021537317828, "loss": 3.2238569259643555, "step": 2836, "token_acc": 0.2756548516019131 }, { "epoch": 1.6631486367634125, "grad_norm": 0.3803184341745219, "learning_rate": 0.0004984995041596999, "loss": 3.222700357437134, "step": 2837, "token_acc": 0.27487946610154124 }, { "epoch": 1.6637349750806214, "grad_norm": 0.3184454224279368, "learning_rate": 0.0004984968522532991, "loss": 3.2895102500915527, "step": 2838, "token_acc": 0.2685154473400007 }, { "epoch": 1.6643213133978305, "grad_norm": 0.3601900947363005, "learning_rate": 0.0004984941980126053, "loss": 3.2527172565460205, "step": 2839, "token_acc": 0.2745359943668937 }, { "epoch": 1.6649076517150396, "grad_norm": 0.45627374355477374, "learning_rate": 0.0004984915414376433, "loss": 3.236271381378174, "step": 2840, "token_acc": 0.27445176567486873 }, { "epoch": 1.6654939900322487, "grad_norm": 0.40581273245134936, "learning_rate": 0.0004984888825284382, "loss": 3.1966490745544434, "step": 2841, "token_acc": 0.2786169680233678 }, { "epoch": 1.6660803283494576, "grad_norm": 0.4110783404223848, "learning_rate": 0.0004984862212850148, "loss": 3.243350028991699, "step": 2842, "token_acc": 0.27296050339432204 }, { "epoch": 1.6666666666666665, "grad_norm": 0.5153383702538445, "learning_rate": 0.0004984835577073981, "loss": 3.256732940673828, "step": 2843, "token_acc": 0.2729319812124157 }, { "epoch": 1.6672530049838756, "grad_norm": 0.44880704978525005, "learning_rate": 0.0004984808917956134, "loss": 3.260125160217285, "step": 2844, "token_acc": 0.271819140508182 }, { "epoch": 1.6678393433010847, "grad_norm": 0.3197319716077354, "learning_rate": 0.0004984782235496855, "loss": 3.2926900386810303, "step": 2845, "token_acc": 0.26584011908777144 }, { "epoch": 1.6684256816182939, "grad_norm": 0.3943195370218462, "learning_rate": 0.0004984755529696394, "loss": 3.2412266731262207, "step": 2846, "token_acc": 0.27476188352687814 }, { "epoch": 1.6690120199355027, "grad_norm": 0.3860838342042475, "learning_rate": 0.0004984728800555005, "loss": 3.2655344009399414, "step": 2847, "token_acc": 0.27119640914182447 }, { "epoch": 1.6695983582527119, "grad_norm": 0.34465659528537185, "learning_rate": 0.0004984702048072935, "loss": 3.213186264038086, "step": 2848, "token_acc": 0.2785841205256216 }, { "epoch": 1.6701846965699207, "grad_norm": 0.35769725979815153, "learning_rate": 0.000498467527225044, "loss": 3.217963218688965, "step": 2849, "token_acc": 0.27643066235131625 }, { "epoch": 1.6707710348871299, "grad_norm": 0.36363495994466405, "learning_rate": 0.0004984648473087767, "loss": 3.209224224090576, "step": 2850, "token_acc": 0.2760898827728124 }, { "epoch": 1.671357373204339, "grad_norm": 0.31830103344088206, "learning_rate": 0.0004984621650585171, "loss": 3.242711067199707, "step": 2851, "token_acc": 0.2721768417792575 }, { "epoch": 1.671943711521548, "grad_norm": 0.3471702983054354, "learning_rate": 0.0004984594804742902, "loss": 3.3064746856689453, "step": 2852, "token_acc": 0.2639758738372062 }, { "epoch": 1.672530049838757, "grad_norm": 0.34798627988646263, "learning_rate": 0.0004984567935561212, "loss": 3.2588958740234375, "step": 2853, "token_acc": 0.27207120831281906 }, { "epoch": 1.6731163881559659, "grad_norm": 0.3607793185234463, "learning_rate": 0.0004984541043040355, "loss": 3.2446255683898926, "step": 2854, "token_acc": 0.2737564517630509 }, { "epoch": 1.673702726473175, "grad_norm": 0.32926655624091755, "learning_rate": 0.0004984514127180583, "loss": 3.2151713371276855, "step": 2855, "token_acc": 0.27725342696058114 }, { "epoch": 1.674289064790384, "grad_norm": 0.37611181097852886, "learning_rate": 0.0004984487187982148, "loss": 3.2627339363098145, "step": 2856, "token_acc": 0.2722053333118302 }, { "epoch": 1.6748754031075932, "grad_norm": 0.40896488647581897, "learning_rate": 0.0004984460225445304, "loss": 3.279529333114624, "step": 2857, "token_acc": 0.26890280522682836 }, { "epoch": 1.675461741424802, "grad_norm": 0.39281409288986535, "learning_rate": 0.0004984433239570305, "loss": 3.2456796169281006, "step": 2858, "token_acc": 0.27267336915456303 }, { "epoch": 1.676048079742011, "grad_norm": 0.34413726305781994, "learning_rate": 0.0004984406230357402, "loss": 3.2264256477355957, "step": 2859, "token_acc": 0.27582265936361117 }, { "epoch": 1.67663441805922, "grad_norm": 0.3467504554488161, "learning_rate": 0.0004984379197806851, "loss": 3.2215394973754883, "step": 2860, "token_acc": 0.2748563608605428 }, { "epoch": 1.6772207563764292, "grad_norm": 0.33984180174820466, "learning_rate": 0.0004984352141918906, "loss": 3.214142084121704, "step": 2861, "token_acc": 0.2768340173021261 }, { "epoch": 1.6778070946936383, "grad_norm": 0.3615594616040768, "learning_rate": 0.0004984325062693819, "loss": 3.2550578117370605, "step": 2862, "token_acc": 0.27170446199161835 }, { "epoch": 1.6783934330108474, "grad_norm": 0.35220268672586635, "learning_rate": 0.0004984297960131846, "loss": 3.2225451469421387, "step": 2863, "token_acc": 0.27553557369576925 }, { "epoch": 1.6789797713280563, "grad_norm": 0.33769575037759203, "learning_rate": 0.0004984270834233242, "loss": 3.267636299133301, "step": 2864, "token_acc": 0.2687341807786704 }, { "epoch": 1.6795661096452652, "grad_norm": 0.35845810691304725, "learning_rate": 0.0004984243684998261, "loss": 3.2455272674560547, "step": 2865, "token_acc": 0.2753273940566806 }, { "epoch": 1.6801524479624743, "grad_norm": 0.38971212381741577, "learning_rate": 0.0004984216512427158, "loss": 3.2231504917144775, "step": 2866, "token_acc": 0.27462370305421596 }, { "epoch": 1.6807387862796834, "grad_norm": 0.39681164654289813, "learning_rate": 0.0004984189316520188, "loss": 3.2895255088806152, "step": 2867, "token_acc": 0.2676954160060288 }, { "epoch": 1.6813251245968925, "grad_norm": 0.374844709777505, "learning_rate": 0.0004984162097277607, "loss": 3.2543487548828125, "step": 2868, "token_acc": 0.27301719430143845 }, { "epoch": 1.6819114629141014, "grad_norm": 0.29416803244766027, "learning_rate": 0.0004984134854699671, "loss": 3.1950864791870117, "step": 2869, "token_acc": 0.2782429499955767 }, { "epoch": 1.6824978012313103, "grad_norm": 0.3436114704202493, "learning_rate": 0.0004984107588786635, "loss": 3.2870800495147705, "step": 2870, "token_acc": 0.2666457753926907 }, { "epoch": 1.6830841395485194, "grad_norm": 0.4192258914010484, "learning_rate": 0.0004984080299538755, "loss": 3.2338109016418457, "step": 2871, "token_acc": 0.27302191221024447 }, { "epoch": 1.6836704778657285, "grad_norm": 0.4003790402565155, "learning_rate": 0.0004984052986956289, "loss": 3.2391676902770996, "step": 2872, "token_acc": 0.2728091141692902 }, { "epoch": 1.6842568161829377, "grad_norm": 0.33582620360864984, "learning_rate": 0.0004984025651039492, "loss": 3.2198894023895264, "step": 2873, "token_acc": 0.2753551616138871 }, { "epoch": 1.6848431545001465, "grad_norm": 0.33429753447902155, "learning_rate": 0.0004983998291788621, "loss": 3.2407405376434326, "step": 2874, "token_acc": 0.2734838889221222 }, { "epoch": 1.6854294928173557, "grad_norm": 0.35421971099328325, "learning_rate": 0.0004983970909203934, "loss": 3.2357354164123535, "step": 2875, "token_acc": 0.2748795792433671 }, { "epoch": 1.6860158311345645, "grad_norm": 0.3741384256079026, "learning_rate": 0.0004983943503285688, "loss": 3.2567801475524902, "step": 2876, "token_acc": 0.2721441445223035 }, { "epoch": 1.6866021694517737, "grad_norm": 0.345836872200717, "learning_rate": 0.0004983916074034139, "loss": 3.2966365814208984, "step": 2877, "token_acc": 0.2660323042888673 }, { "epoch": 1.6871885077689828, "grad_norm": 0.39063554246123605, "learning_rate": 0.0004983888621449545, "loss": 3.21757173538208, "step": 2878, "token_acc": 0.27558786163104726 }, { "epoch": 1.6877748460861919, "grad_norm": 0.3869949530362038, "learning_rate": 0.0004983861145532166, "loss": 3.2078969478607178, "step": 2879, "token_acc": 0.2780600263619055 }, { "epoch": 1.6883611844034008, "grad_norm": 0.3434902100940193, "learning_rate": 0.0004983833646282258, "loss": 3.2380006313323975, "step": 2880, "token_acc": 0.27356789356629063 }, { "epoch": 1.6889475227206097, "grad_norm": 0.34636998172161854, "learning_rate": 0.0004983806123700079, "loss": 3.211334466934204, "step": 2881, "token_acc": 0.2796932242270688 }, { "epoch": 1.6895338610378188, "grad_norm": 0.3428730656032943, "learning_rate": 0.0004983778577785889, "loss": 3.261359691619873, "step": 2882, "token_acc": 0.269963419803545 }, { "epoch": 1.6901201993550279, "grad_norm": 0.36185207007541964, "learning_rate": 0.0004983751008539947, "loss": 3.2325234413146973, "step": 2883, "token_acc": 0.27488725337473763 }, { "epoch": 1.690706537672237, "grad_norm": 0.3608162432825475, "learning_rate": 0.000498372341596251, "loss": 3.224308490753174, "step": 2884, "token_acc": 0.27561507073902775 }, { "epoch": 1.6912928759894459, "grad_norm": 0.35638662663854925, "learning_rate": 0.0004983695800053839, "loss": 3.2613353729248047, "step": 2885, "token_acc": 0.2692588136643698 }, { "epoch": 1.6918792143066548, "grad_norm": 0.34155490699521035, "learning_rate": 0.0004983668160814192, "loss": 3.2296810150146484, "step": 2886, "token_acc": 0.2742266999571903 }, { "epoch": 1.692465552623864, "grad_norm": 0.30582027308419557, "learning_rate": 0.0004983640498243831, "loss": 3.2086734771728516, "step": 2887, "token_acc": 0.2779119010037504 }, { "epoch": 1.693051890941073, "grad_norm": 0.32889832737058994, "learning_rate": 0.0004983612812343013, "loss": 3.1901867389678955, "step": 2888, "token_acc": 0.28109710358217244 }, { "epoch": 1.6936382292582821, "grad_norm": 0.33600176577089796, "learning_rate": 0.0004983585103111999, "loss": 3.1642379760742188, "step": 2889, "token_acc": 0.28245248825745367 }, { "epoch": 1.6942245675754912, "grad_norm": 0.32020247114454153, "learning_rate": 0.000498355737055105, "loss": 3.222043991088867, "step": 2890, "token_acc": 0.27524997846105503 }, { "epoch": 1.6948109058927001, "grad_norm": 0.32750785473876326, "learning_rate": 0.0004983529614660427, "loss": 3.2486190795898438, "step": 2891, "token_acc": 0.2723835636022016 }, { "epoch": 1.695397244209909, "grad_norm": 0.34933460689597867, "learning_rate": 0.000498350183544039, "loss": 3.2640299797058105, "step": 2892, "token_acc": 0.26989428365454904 }, { "epoch": 1.6959835825271181, "grad_norm": 0.3347309150446478, "learning_rate": 0.0004983474032891199, "loss": 3.252535581588745, "step": 2893, "token_acc": 0.2716911890707674 }, { "epoch": 1.6965699208443272, "grad_norm": 0.34390645727999336, "learning_rate": 0.0004983446207013115, "loss": 3.2394938468933105, "step": 2894, "token_acc": 0.2718806748513868 }, { "epoch": 1.6971562591615363, "grad_norm": 0.34901035942175995, "learning_rate": 0.0004983418357806401, "loss": 3.2158308029174805, "step": 2895, "token_acc": 0.2752899284739736 }, { "epoch": 1.6977425974787452, "grad_norm": 0.3581554556331172, "learning_rate": 0.000498339048527132, "loss": 3.2208313941955566, "step": 2896, "token_acc": 0.2750399093671147 }, { "epoch": 1.6983289357959541, "grad_norm": 0.34400339725625534, "learning_rate": 0.000498336258940813, "loss": 3.24052095413208, "step": 2897, "token_acc": 0.27316235772398834 }, { "epoch": 1.6989152741131632, "grad_norm": 0.36663961983743665, "learning_rate": 0.0004983334670217095, "loss": 3.206697940826416, "step": 2898, "token_acc": 0.2770434728223196 }, { "epoch": 1.6995016124303723, "grad_norm": 0.3349017606432719, "learning_rate": 0.0004983306727698477, "loss": 3.2583377361297607, "step": 2899, "token_acc": 0.27023326572008116 }, { "epoch": 1.7000879507475815, "grad_norm": 0.32712479401379124, "learning_rate": 0.0004983278761852539, "loss": 3.256592273712158, "step": 2900, "token_acc": 0.2711067633303405 }, { "epoch": 1.7006742890647903, "grad_norm": 0.3252553592693444, "learning_rate": 0.0004983250772679543, "loss": 3.2417705059051514, "step": 2901, "token_acc": 0.2739805809960307 }, { "epoch": 1.7012606273819995, "grad_norm": 0.3632553249171078, "learning_rate": 0.0004983222760179752, "loss": 3.2737178802490234, "step": 2902, "token_acc": 0.26884821130056835 }, { "epoch": 1.7018469656992083, "grad_norm": 0.36038204512469485, "learning_rate": 0.0004983194724353429, "loss": 3.238574743270874, "step": 2903, "token_acc": 0.2736429770565193 }, { "epoch": 1.7024333040164175, "grad_norm": 0.39232407339359915, "learning_rate": 0.0004983166665200839, "loss": 3.2138049602508545, "step": 2904, "token_acc": 0.2777297283087317 }, { "epoch": 1.7030196423336266, "grad_norm": 0.4113127586839885, "learning_rate": 0.0004983138582722244, "loss": 3.244720220565796, "step": 2905, "token_acc": 0.2715484521717429 }, { "epoch": 1.7036059806508357, "grad_norm": 0.4365462595276291, "learning_rate": 0.0004983110476917907, "loss": 3.21602725982666, "step": 2906, "token_acc": 0.2763317254484328 }, { "epoch": 1.7041923189680446, "grad_norm": 0.4333985172809448, "learning_rate": 0.0004983082347788094, "loss": 3.2125048637390137, "step": 2907, "token_acc": 0.27890444074534293 }, { "epoch": 1.7047786572852535, "grad_norm": 0.40934218359409935, "learning_rate": 0.0004983054195333069, "loss": 3.1973676681518555, "step": 2908, "token_acc": 0.2788486970873581 }, { "epoch": 1.7053649956024626, "grad_norm": 0.40428112562326424, "learning_rate": 0.0004983026019553094, "loss": 3.2668399810791016, "step": 2909, "token_acc": 0.2690622508338653 }, { "epoch": 1.7059513339196717, "grad_norm": 0.4082594051392893, "learning_rate": 0.0004982997820448437, "loss": 3.2588162422180176, "step": 2910, "token_acc": 0.2698203913761608 }, { "epoch": 1.7065376722368808, "grad_norm": 0.4212207923262204, "learning_rate": 0.000498296959801936, "loss": 3.293275833129883, "step": 2911, "token_acc": 0.26636096986153984 }, { "epoch": 1.7071240105540897, "grad_norm": 0.4365212188736934, "learning_rate": 0.000498294135226613, "loss": 3.215772867202759, "step": 2912, "token_acc": 0.27624439479481155 }, { "epoch": 1.7077103488712986, "grad_norm": 0.47002232476399813, "learning_rate": 0.0004982913083189012, "loss": 3.2346603870391846, "step": 2913, "token_acc": 0.2745080654694737 }, { "epoch": 1.7082966871885077, "grad_norm": 0.31572224687400335, "learning_rate": 0.0004982884790788272, "loss": 3.2538657188415527, "step": 2914, "token_acc": 0.27191922587965833 }, { "epoch": 1.7088830255057168, "grad_norm": 0.3306971530735801, "learning_rate": 0.0004982856475064175, "loss": 3.21561598777771, "step": 2915, "token_acc": 0.2763440403318328 }, { "epoch": 1.709469363822926, "grad_norm": 0.3174128371929221, "learning_rate": 0.0004982828136016986, "loss": 3.207119941711426, "step": 2916, "token_acc": 0.27671113243503315 }, { "epoch": 1.7100557021401348, "grad_norm": 0.3066164469585881, "learning_rate": 0.0004982799773646973, "loss": 3.215477705001831, "step": 2917, "token_acc": 0.27665513150855575 }, { "epoch": 1.710642040457344, "grad_norm": 0.3430531821664205, "learning_rate": 0.0004982771387954402, "loss": 3.231034278869629, "step": 2918, "token_acc": 0.2755734179162055 }, { "epoch": 1.7112283787745528, "grad_norm": 0.3402987833114422, "learning_rate": 0.0004982742978939538, "loss": 3.2693495750427246, "step": 2919, "token_acc": 0.26985224112613027 }, { "epoch": 1.711814717091762, "grad_norm": 0.3591023537472934, "learning_rate": 0.0004982714546602652, "loss": 3.2044625282287598, "step": 2920, "token_acc": 0.2784588620548506 }, { "epoch": 1.712401055408971, "grad_norm": 0.36471490933823236, "learning_rate": 0.0004982686090944006, "loss": 3.245622158050537, "step": 2921, "token_acc": 0.2715842654949556 }, { "epoch": 1.7129873937261801, "grad_norm": 0.3080611201451875, "learning_rate": 0.000498265761196387, "loss": 3.2527220249176025, "step": 2922, "token_acc": 0.27139268046310444 }, { "epoch": 1.713573732043389, "grad_norm": 0.30586315159749394, "learning_rate": 0.0004982629109662512, "loss": 3.2204384803771973, "step": 2923, "token_acc": 0.2754876591049403 }, { "epoch": 1.714160070360598, "grad_norm": 0.3648844362231272, "learning_rate": 0.0004982600584040197, "loss": 3.247915744781494, "step": 2924, "token_acc": 0.2734142984110468 }, { "epoch": 1.714746408677807, "grad_norm": 0.36170905249156393, "learning_rate": 0.0004982572035097197, "loss": 3.240469455718994, "step": 2925, "token_acc": 0.276552030747295 }, { "epoch": 1.7153327469950161, "grad_norm": 0.297762001161725, "learning_rate": 0.0004982543462833777, "loss": 3.193209648132324, "step": 2926, "token_acc": 0.2809385873822667 }, { "epoch": 1.7159190853122253, "grad_norm": 0.30717979976635335, "learning_rate": 0.0004982514867250206, "loss": 3.195533275604248, "step": 2927, "token_acc": 0.27891429301315623 }, { "epoch": 1.7165054236294341, "grad_norm": 0.31617972081210305, "learning_rate": 0.0004982486248346755, "loss": 3.278057098388672, "step": 2928, "token_acc": 0.26764444491188955 }, { "epoch": 1.7170917619466433, "grad_norm": 0.3616189517132931, "learning_rate": 0.0004982457606123689, "loss": 3.2243564128875732, "step": 2929, "token_acc": 0.2732860823090627 }, { "epoch": 1.7176781002638521, "grad_norm": 0.33887948444764937, "learning_rate": 0.000498242894058128, "loss": 3.2172412872314453, "step": 2930, "token_acc": 0.277730658318812 }, { "epoch": 1.7182644385810613, "grad_norm": 0.3084703335401283, "learning_rate": 0.0004982400251719796, "loss": 3.211422920227051, "step": 2931, "token_acc": 0.2781537131683846 }, { "epoch": 1.7188507768982704, "grad_norm": 0.3502064730269053, "learning_rate": 0.0004982371539539506, "loss": 3.256725788116455, "step": 2932, "token_acc": 0.2708511389632389 }, { "epoch": 1.7194371152154795, "grad_norm": 0.3503760175185083, "learning_rate": 0.0004982342804040681, "loss": 3.260897159576416, "step": 2933, "token_acc": 0.27022671050236136 }, { "epoch": 1.7200234535326884, "grad_norm": 0.2953753550243247, "learning_rate": 0.000498231404522359, "loss": 3.278717517852783, "step": 2934, "token_acc": 0.26924270599551137 }, { "epoch": 1.7206097918498973, "grad_norm": 0.3074354124223109, "learning_rate": 0.0004982285263088504, "loss": 3.2559940814971924, "step": 2935, "token_acc": 0.27261939233489835 }, { "epoch": 1.7211961301671064, "grad_norm": 0.29029054434071005, "learning_rate": 0.0004982256457635693, "loss": 3.1864242553710938, "step": 2936, "token_acc": 0.2801218977187265 }, { "epoch": 1.7217824684843155, "grad_norm": 0.31093496511605156, "learning_rate": 0.0004982227628865427, "loss": 3.1821141242980957, "step": 2937, "token_acc": 0.2805667303566554 }, { "epoch": 1.7223688068015246, "grad_norm": 0.35310803622015535, "learning_rate": 0.0004982198776777978, "loss": 3.2258875370025635, "step": 2938, "token_acc": 0.2781626981789755 }, { "epoch": 1.7229551451187335, "grad_norm": 0.396046557807798, "learning_rate": 0.0004982169901373615, "loss": 3.237213611602783, "step": 2939, "token_acc": 0.27376153710996043 }, { "epoch": 1.7235414834359424, "grad_norm": 0.45557161486600006, "learning_rate": 0.0004982141002652611, "loss": 3.2402782440185547, "step": 2940, "token_acc": 0.2728206814352061 }, { "epoch": 1.7241278217531515, "grad_norm": 0.43608211406194136, "learning_rate": 0.0004982112080615238, "loss": 3.2449405193328857, "step": 2941, "token_acc": 0.2716437274809101 }, { "epoch": 1.7247141600703606, "grad_norm": 0.40080783312829255, "learning_rate": 0.0004982083135261765, "loss": 3.2063965797424316, "step": 2942, "token_acc": 0.27758305941375455 }, { "epoch": 1.7253004983875697, "grad_norm": 0.374390420989693, "learning_rate": 0.0004982054166592466, "loss": 3.284343719482422, "step": 2943, "token_acc": 0.2683870367546922 }, { "epoch": 1.7258868367047786, "grad_norm": 0.39281511774799016, "learning_rate": 0.0004982025174607614, "loss": 3.2126054763793945, "step": 2944, "token_acc": 0.27706377895746587 }, { "epoch": 1.7264731750219877, "grad_norm": 0.37342900560415854, "learning_rate": 0.0004981996159307479, "loss": 3.201507091522217, "step": 2945, "token_acc": 0.27888671336022813 }, { "epoch": 1.7270595133391966, "grad_norm": 0.36786987732230925, "learning_rate": 0.0004981967120692335, "loss": 3.1971688270568848, "step": 2946, "token_acc": 0.2782797977162704 }, { "epoch": 1.7276458516564057, "grad_norm": 0.35235560316059217, "learning_rate": 0.0004981938058762454, "loss": 3.272752046585083, "step": 2947, "token_acc": 0.27078533756658185 }, { "epoch": 1.7282321899736148, "grad_norm": 0.38353194333142954, "learning_rate": 0.000498190897351811, "loss": 3.263674259185791, "step": 2948, "token_acc": 0.27082498008237826 }, { "epoch": 1.728818528290824, "grad_norm": 0.37371185207299473, "learning_rate": 0.0004981879864959575, "loss": 3.252302646636963, "step": 2949, "token_acc": 0.27062768789678476 }, { "epoch": 1.7294048666080328, "grad_norm": 0.3575118930210213, "learning_rate": 0.0004981850733087123, "loss": 3.224151134490967, "step": 2950, "token_acc": 0.27643006497388256 }, { "epoch": 1.7299912049252417, "grad_norm": 0.4125809343804859, "learning_rate": 0.0004981821577901029, "loss": 3.2398011684417725, "step": 2951, "token_acc": 0.27276146131805157 }, { "epoch": 1.7305775432424508, "grad_norm": 0.3574589938657644, "learning_rate": 0.0004981792399401565, "loss": 3.2361512184143066, "step": 2952, "token_acc": 0.2736039976340966 }, { "epoch": 1.73116388155966, "grad_norm": 0.387037460078306, "learning_rate": 0.0004981763197589005, "loss": 3.2521414756774902, "step": 2953, "token_acc": 0.2719322226369663 }, { "epoch": 1.731750219876869, "grad_norm": 0.4017064388643273, "learning_rate": 0.0004981733972463624, "loss": 3.2117247581481934, "step": 2954, "token_acc": 0.27719028239444254 }, { "epoch": 1.732336558194078, "grad_norm": 0.4020798108864234, "learning_rate": 0.0004981704724025697, "loss": 3.240279197692871, "step": 2955, "token_acc": 0.273643557138758 }, { "epoch": 1.732922896511287, "grad_norm": 0.3243453294327697, "learning_rate": 0.0004981675452275497, "loss": 3.2274320125579834, "step": 2956, "token_acc": 0.2757550009965279 }, { "epoch": 1.733509234828496, "grad_norm": 0.3814305758346807, "learning_rate": 0.0004981646157213302, "loss": 3.219722270965576, "step": 2957, "token_acc": 0.276751677137685 }, { "epoch": 1.734095573145705, "grad_norm": 0.37190985656625297, "learning_rate": 0.0004981616838839384, "loss": 3.2010815143585205, "step": 2958, "token_acc": 0.276917454184723 }, { "epoch": 1.7346819114629142, "grad_norm": 0.40406374133025846, "learning_rate": 0.0004981587497154021, "loss": 3.27656888961792, "step": 2959, "token_acc": 0.2693105400443379 }, { "epoch": 1.7352682497801233, "grad_norm": 0.39288688110412295, "learning_rate": 0.0004981558132157487, "loss": 3.265738010406494, "step": 2960, "token_acc": 0.2691838116513033 }, { "epoch": 1.7358545880973322, "grad_norm": 0.33378428899074625, "learning_rate": 0.0004981528743850058, "loss": 3.243781805038452, "step": 2961, "token_acc": 0.27264371766531537 }, { "epoch": 1.736440926414541, "grad_norm": 0.31061086929213516, "learning_rate": 0.0004981499332232011, "loss": 3.236665725708008, "step": 2962, "token_acc": 0.27372398743945925 }, { "epoch": 1.7370272647317502, "grad_norm": 0.3939558479365216, "learning_rate": 0.0004981469897303621, "loss": 3.209806442260742, "step": 2963, "token_acc": 0.27713399578282877 }, { "epoch": 1.7376136030489593, "grad_norm": 0.3813587229703237, "learning_rate": 0.0004981440439065165, "loss": 3.2112414836883545, "step": 2964, "token_acc": 0.2780221216954737 }, { "epoch": 1.7381999413661684, "grad_norm": 0.4030733387875492, "learning_rate": 0.0004981410957516921, "loss": 3.304614543914795, "step": 2965, "token_acc": 0.2643179317492161 }, { "epoch": 1.7387862796833773, "grad_norm": 0.4816950232488938, "learning_rate": 0.0004981381452659163, "loss": 3.291616678237915, "step": 2966, "token_acc": 0.2653495496010455 }, { "epoch": 1.7393726180005862, "grad_norm": 0.41934844139271665, "learning_rate": 0.0004981351924492171, "loss": 3.292178153991699, "step": 2967, "token_acc": 0.26680586422438224 }, { "epoch": 1.7399589563177953, "grad_norm": 0.38681918418696587, "learning_rate": 0.0004981322373016221, "loss": 3.202153205871582, "step": 2968, "token_acc": 0.27886285302054664 }, { "epoch": 1.7405452946350044, "grad_norm": 0.422892783665272, "learning_rate": 0.0004981292798231592, "loss": 3.2289347648620605, "step": 2969, "token_acc": 0.27432715331463364 }, { "epoch": 1.7411316329522135, "grad_norm": 0.3980869388031106, "learning_rate": 0.000498126320013856, "loss": 3.238144874572754, "step": 2970, "token_acc": 0.2726922395925552 }, { "epoch": 1.7417179712694224, "grad_norm": 0.3626910506076907, "learning_rate": 0.0004981233578737404, "loss": 3.2238550186157227, "step": 2971, "token_acc": 0.27277440188178975 }, { "epoch": 1.7423043095866315, "grad_norm": 0.3875155033807427, "learning_rate": 0.0004981203934028402, "loss": 3.222712278366089, "step": 2972, "token_acc": 0.27390961846493184 }, { "epoch": 1.7428906479038404, "grad_norm": 0.3623234780438531, "learning_rate": 0.0004981174266011832, "loss": 3.2737631797790527, "step": 2973, "token_acc": 0.26902172447818656 }, { "epoch": 1.7434769862210495, "grad_norm": 0.3612056195844169, "learning_rate": 0.0004981144574687973, "loss": 3.1621432304382324, "step": 2974, "token_acc": 0.28211334396376647 }, { "epoch": 1.7440633245382586, "grad_norm": 0.3463241596483429, "learning_rate": 0.0004981114860057105, "loss": 3.1605350971221924, "step": 2975, "token_acc": 0.2859252504002777 }, { "epoch": 1.7446496628554677, "grad_norm": 0.3170790506923805, "learning_rate": 0.0004981085122119505, "loss": 3.2323050498962402, "step": 2976, "token_acc": 0.27584826644184657 }, { "epoch": 1.7452360011726766, "grad_norm": 0.3320462012680574, "learning_rate": 0.0004981055360875455, "loss": 3.220576763153076, "step": 2977, "token_acc": 0.27720228954348736 }, { "epoch": 1.7458223394898855, "grad_norm": 0.2688471085552832, "learning_rate": 0.0004981025576325232, "loss": 3.2152085304260254, "step": 2978, "token_acc": 0.27587546946946434 }, { "epoch": 1.7464086778070946, "grad_norm": 0.31908683841767665, "learning_rate": 0.0004980995768469117, "loss": 3.237415313720703, "step": 2979, "token_acc": 0.274964384913529 }, { "epoch": 1.7469950161243037, "grad_norm": 0.3256698652534432, "learning_rate": 0.0004980965937307391, "loss": 3.2539477348327637, "step": 2980, "token_acc": 0.27067472111391583 }, { "epoch": 1.7475813544415129, "grad_norm": 0.29343056246799526, "learning_rate": 0.0004980936082840333, "loss": 3.243265390396118, "step": 2981, "token_acc": 0.27319400426922785 }, { "epoch": 1.7481676927587217, "grad_norm": 0.3086384803980287, "learning_rate": 0.0004980906205068223, "loss": 3.183326244354248, "step": 2982, "token_acc": 0.2786131631103447 }, { "epoch": 1.7487540310759309, "grad_norm": 0.280483968244348, "learning_rate": 0.0004980876303991343, "loss": 3.205498218536377, "step": 2983, "token_acc": 0.27788064416294034 }, { "epoch": 1.7493403693931397, "grad_norm": 0.313437214406856, "learning_rate": 0.0004980846379609972, "loss": 3.239558219909668, "step": 2984, "token_acc": 0.2710759877775039 }, { "epoch": 1.7499267077103489, "grad_norm": 0.2894197397514832, "learning_rate": 0.0004980816431924392, "loss": 3.2059102058410645, "step": 2985, "token_acc": 0.2779870493742606 }, { "epoch": 1.750513046027558, "grad_norm": 0.3634947237804513, "learning_rate": 0.0004980786460934886, "loss": 3.2123279571533203, "step": 2986, "token_acc": 0.2768251674401801 }, { "epoch": 1.751099384344767, "grad_norm": 0.41487629488616956, "learning_rate": 0.0004980756466641733, "loss": 3.17323637008667, "step": 2987, "token_acc": 0.2805083987858191 }, { "epoch": 1.751685722661976, "grad_norm": 0.37602651674794163, "learning_rate": 0.0004980726449045217, "loss": 3.246419668197632, "step": 2988, "token_acc": 0.27217933467983363 }, { "epoch": 1.7522720609791849, "grad_norm": 0.3649613881184053, "learning_rate": 0.0004980696408145619, "loss": 3.2143547534942627, "step": 2989, "token_acc": 0.27642830410262365 }, { "epoch": 1.752858399296394, "grad_norm": 0.40451866823687505, "learning_rate": 0.0004980666343943219, "loss": 3.160172939300537, "step": 2990, "token_acc": 0.28651241209950623 }, { "epoch": 1.753444737613603, "grad_norm": 0.3543439709519719, "learning_rate": 0.0004980636256438303, "loss": 3.200169324874878, "step": 2991, "token_acc": 0.27783205782423115 }, { "epoch": 1.7540310759308122, "grad_norm": 0.34117552345337554, "learning_rate": 0.0004980606145631152, "loss": 3.196709632873535, "step": 2992, "token_acc": 0.27759981695458186 }, { "epoch": 1.754617414248021, "grad_norm": 0.3493708947884874, "learning_rate": 0.0004980576011522048, "loss": 3.2362403869628906, "step": 2993, "token_acc": 0.2749046391684164 }, { "epoch": 1.75520375256523, "grad_norm": 0.3373444048842763, "learning_rate": 0.0004980545854111276, "loss": 3.245910167694092, "step": 2994, "token_acc": 0.27103813615159583 }, { "epoch": 1.755790090882439, "grad_norm": 0.3148879311900756, "learning_rate": 0.0004980515673399117, "loss": 3.1721384525299072, "step": 2995, "token_acc": 0.28068246088104065 }, { "epoch": 1.7563764291996482, "grad_norm": 0.3455036237755556, "learning_rate": 0.0004980485469385857, "loss": 3.218928575515747, "step": 2996, "token_acc": 0.27588237101051577 }, { "epoch": 1.7569627675168573, "grad_norm": 0.33901707011842386, "learning_rate": 0.0004980455242071779, "loss": 3.2165982723236084, "step": 2997, "token_acc": 0.2759785650705049 }, { "epoch": 1.7575491058340662, "grad_norm": 0.3530099508895064, "learning_rate": 0.0004980424991457165, "loss": 3.284792423248291, "step": 2998, "token_acc": 0.2666016133229248 }, { "epoch": 1.7581354441512753, "grad_norm": 0.3011842054682417, "learning_rate": 0.0004980394717542301, "loss": 3.198941707611084, "step": 2999, "token_acc": 0.2764731844491242 }, { "epoch": 1.7587217824684842, "grad_norm": 0.32958241487595, "learning_rate": 0.0004980364420327472, "loss": 3.2457873821258545, "step": 3000, "token_acc": 0.2729881117425108 }, { "epoch": 1.7593081207856933, "grad_norm": 0.3342327695110001, "learning_rate": 0.0004980334099812961, "loss": 3.2389214038848877, "step": 3001, "token_acc": 0.2731218617613377 }, { "epoch": 1.7598944591029024, "grad_norm": 0.4149505862224289, "learning_rate": 0.0004980303755999053, "loss": 3.22603440284729, "step": 3002, "token_acc": 0.2764770088497469 }, { "epoch": 1.7604807974201115, "grad_norm": 0.5174176115005527, "learning_rate": 0.0004980273388886034, "loss": 3.2259535789489746, "step": 3003, "token_acc": 0.27584888683155934 }, { "epoch": 1.7610671357373204, "grad_norm": 0.5027779996337693, "learning_rate": 0.0004980242998474188, "loss": 3.2213692665100098, "step": 3004, "token_acc": 0.2749432845990985 }, { "epoch": 1.7616534740545293, "grad_norm": 0.3558314802372436, "learning_rate": 0.0004980212584763802, "loss": 3.221322536468506, "step": 3005, "token_acc": 0.27349832889563763 }, { "epoch": 1.7622398123717384, "grad_norm": 0.2938893391343505, "learning_rate": 0.0004980182147755161, "loss": 3.193880558013916, "step": 3006, "token_acc": 0.2796691907772814 }, { "epoch": 1.7628261506889475, "grad_norm": 0.37599480835141597, "learning_rate": 0.000498015168744855, "loss": 3.234499931335449, "step": 3007, "token_acc": 0.2731092993617072 }, { "epoch": 1.7634124890061567, "grad_norm": 0.32374380043964024, "learning_rate": 0.0004980121203844257, "loss": 3.1899890899658203, "step": 3008, "token_acc": 0.279889653576397 }, { "epoch": 1.7639988273233655, "grad_norm": 0.2894269404528567, "learning_rate": 0.0004980090696942567, "loss": 3.2286553382873535, "step": 3009, "token_acc": 0.2744495114006515 }, { "epoch": 1.7645851656405747, "grad_norm": 0.27614261824945624, "learning_rate": 0.0004980060166743766, "loss": 3.2384986877441406, "step": 3010, "token_acc": 0.27345347194266184 }, { "epoch": 1.7651715039577835, "grad_norm": 0.36059398725152203, "learning_rate": 0.0004980029613248141, "loss": 3.2385268211364746, "step": 3011, "token_acc": 0.27411050991965896 }, { "epoch": 1.7657578422749927, "grad_norm": 0.3075915475900472, "learning_rate": 0.0004979999036455982, "loss": 3.2044291496276855, "step": 3012, "token_acc": 0.2773535847599434 }, { "epoch": 1.7663441805922018, "grad_norm": 0.2954681841014092, "learning_rate": 0.0004979968436367572, "loss": 3.2330384254455566, "step": 3013, "token_acc": 0.2748736885571433 }, { "epoch": 1.7669305189094109, "grad_norm": 0.2984382566031914, "learning_rate": 0.0004979937812983202, "loss": 3.2375760078430176, "step": 3014, "token_acc": 0.27324742537508495 }, { "epoch": 1.7675168572266198, "grad_norm": 0.31673112672210957, "learning_rate": 0.0004979907166303157, "loss": 3.211059331893921, "step": 3015, "token_acc": 0.2781554879241415 }, { "epoch": 1.7681031955438287, "grad_norm": 0.41317290345659796, "learning_rate": 0.0004979876496327726, "loss": 3.2291154861450195, "step": 3016, "token_acc": 0.2761533581074957 }, { "epoch": 1.7686895338610378, "grad_norm": 0.39161828254612224, "learning_rate": 0.0004979845803057196, "loss": 3.2371697425842285, "step": 3017, "token_acc": 0.27153889764463024 }, { "epoch": 1.7692758721782469, "grad_norm": 0.32476241901676794, "learning_rate": 0.0004979815086491858, "loss": 3.21490478515625, "step": 3018, "token_acc": 0.27660157132791835 }, { "epoch": 1.769862210495456, "grad_norm": 0.34729100067838015, "learning_rate": 0.0004979784346631998, "loss": 3.2134461402893066, "step": 3019, "token_acc": 0.2780134174637004 }, { "epoch": 1.770448548812665, "grad_norm": 0.39552937848305697, "learning_rate": 0.0004979753583477905, "loss": 3.262941837310791, "step": 3020, "token_acc": 0.26955587835331934 }, { "epoch": 1.7710348871298738, "grad_norm": 0.47807493147682856, "learning_rate": 0.000497972279702987, "loss": 3.257229804992676, "step": 3021, "token_acc": 0.2712900262533068 }, { "epoch": 1.771621225447083, "grad_norm": 0.41934966795898143, "learning_rate": 0.0004979691987288179, "loss": 3.2018563747406006, "step": 3022, "token_acc": 0.2767373146928282 }, { "epoch": 1.772207563764292, "grad_norm": 0.3828129179671491, "learning_rate": 0.0004979661154253125, "loss": 3.2349252700805664, "step": 3023, "token_acc": 0.2732313073422437 }, { "epoch": 1.7727939020815011, "grad_norm": 0.38065321810916636, "learning_rate": 0.0004979630297924996, "loss": 3.2119650840759277, "step": 3024, "token_acc": 0.2760106165781952 }, { "epoch": 1.77338024039871, "grad_norm": 0.3645631535819651, "learning_rate": 0.000497959941830408, "loss": 3.207237958908081, "step": 3025, "token_acc": 0.2778072800396645 }, { "epoch": 1.7739665787159191, "grad_norm": 0.35671996851641835, "learning_rate": 0.000497956851539067, "loss": 3.261927843093872, "step": 3026, "token_acc": 0.2695882606904834 }, { "epoch": 1.774552917033128, "grad_norm": 0.3504982534444864, "learning_rate": 0.0004979537589185055, "loss": 3.21873140335083, "step": 3027, "token_acc": 0.27596293852068343 }, { "epoch": 1.7751392553503371, "grad_norm": 0.37743710258961743, "learning_rate": 0.0004979506639687524, "loss": 3.236753463745117, "step": 3028, "token_acc": 0.27391937578539466 }, { "epoch": 1.7757255936675462, "grad_norm": 0.3644401203938981, "learning_rate": 0.0004979475666898371, "loss": 3.169583320617676, "step": 3029, "token_acc": 0.2842915085699333 }, { "epoch": 1.7763119319847553, "grad_norm": 0.347407136563413, "learning_rate": 0.0004979444670817886, "loss": 3.225637435913086, "step": 3030, "token_acc": 0.27409282119569967 }, { "epoch": 1.7768982703019642, "grad_norm": 0.38292462153665086, "learning_rate": 0.0004979413651446357, "loss": 3.220942497253418, "step": 3031, "token_acc": 0.2744512871971779 }, { "epoch": 1.7774846086191731, "grad_norm": 0.34667861122472304, "learning_rate": 0.0004979382608784079, "loss": 3.2380077838897705, "step": 3032, "token_acc": 0.27279422405060677 }, { "epoch": 1.7780709469363822, "grad_norm": 0.38666134115973627, "learning_rate": 0.0004979351542831343, "loss": 3.1864466667175293, "step": 3033, "token_acc": 0.2786576478558085 }, { "epoch": 1.7786572852535913, "grad_norm": 0.42196850420933596, "learning_rate": 0.0004979320453588439, "loss": 3.2147109508514404, "step": 3034, "token_acc": 0.27672610711062673 }, { "epoch": 1.7792436235708005, "grad_norm": 0.4313685385812127, "learning_rate": 0.0004979289341055661, "loss": 3.253695011138916, "step": 3035, "token_acc": 0.2722565425652201 }, { "epoch": 1.7798299618880093, "grad_norm": 0.3222914467112649, "learning_rate": 0.00049792582052333, "loss": 3.211650848388672, "step": 3036, "token_acc": 0.27660722683504096 }, { "epoch": 1.7804163002052185, "grad_norm": 0.34281298595103366, "learning_rate": 0.000497922704612165, "loss": 3.200486421585083, "step": 3037, "token_acc": 0.27955332169890557 }, { "epoch": 1.7810026385224274, "grad_norm": 0.4052020324178503, "learning_rate": 0.0004979195863721002, "loss": 3.238013505935669, "step": 3038, "token_acc": 0.2738013424344672 }, { "epoch": 1.7815889768396365, "grad_norm": 0.35748763093628994, "learning_rate": 0.000497916465803165, "loss": 3.2003164291381836, "step": 3039, "token_acc": 0.2768867172283616 }, { "epoch": 1.7821753151568456, "grad_norm": 0.3014288961182022, "learning_rate": 0.0004979133429053885, "loss": 3.2241785526275635, "step": 3040, "token_acc": 0.27481671377993505 }, { "epoch": 1.7827616534740547, "grad_norm": 0.3653977046753094, "learning_rate": 0.0004979102176788005, "loss": 3.227954864501953, "step": 3041, "token_acc": 0.2754926751156627 }, { "epoch": 1.7833479917912636, "grad_norm": 0.35387790311194056, "learning_rate": 0.0004979070901234299, "loss": 3.2072231769561768, "step": 3042, "token_acc": 0.27862468550612235 }, { "epoch": 1.7839343301084725, "grad_norm": 0.3134796758440765, "learning_rate": 0.0004979039602393063, "loss": 3.264094591140747, "step": 3043, "token_acc": 0.27295852928601966 }, { "epoch": 1.7845206684256816, "grad_norm": 0.3143294474850163, "learning_rate": 0.0004979008280264589, "loss": 3.2129220962524414, "step": 3044, "token_acc": 0.2758149070902107 }, { "epoch": 1.7851070067428907, "grad_norm": 0.35473493844366777, "learning_rate": 0.0004978976934849176, "loss": 3.2005584239959717, "step": 3045, "token_acc": 0.2785615246340467 }, { "epoch": 1.7856933450600998, "grad_norm": 0.4002865901052259, "learning_rate": 0.0004978945566147112, "loss": 3.233079433441162, "step": 3046, "token_acc": 0.27495956751226297 }, { "epoch": 1.7862796833773087, "grad_norm": 0.30202097569773967, "learning_rate": 0.0004978914174158696, "loss": 3.2649312019348145, "step": 3047, "token_acc": 0.27042915309661697 }, { "epoch": 1.7868660216945176, "grad_norm": 0.38121081176551214, "learning_rate": 0.0004978882758884222, "loss": 3.256307601928711, "step": 3048, "token_acc": 0.2716579923283834 }, { "epoch": 1.7874523600117267, "grad_norm": 0.4238585904359199, "learning_rate": 0.0004978851320323984, "loss": 3.185225009918213, "step": 3049, "token_acc": 0.28061716981084023 }, { "epoch": 1.7880386983289358, "grad_norm": 0.3923459511441045, "learning_rate": 0.0004978819858478279, "loss": 3.2211811542510986, "step": 3050, "token_acc": 0.2754796617163871 }, { "epoch": 1.788625036646145, "grad_norm": 0.3425226437079169, "learning_rate": 0.0004978788373347401, "loss": 3.2133331298828125, "step": 3051, "token_acc": 0.277002285614862 }, { "epoch": 1.7892113749633538, "grad_norm": 0.3931444647559363, "learning_rate": 0.0004978756864931647, "loss": 3.2010340690612793, "step": 3052, "token_acc": 0.2777529413047508 }, { "epoch": 1.789797713280563, "grad_norm": 0.3919612233516763, "learning_rate": 0.0004978725333231312, "loss": 3.1898512840270996, "step": 3053, "token_acc": 0.28037795831861534 }, { "epoch": 1.7903840515977718, "grad_norm": 0.29107398335687057, "learning_rate": 0.0004978693778246692, "loss": 3.254596710205078, "step": 3054, "token_acc": 0.26990290753697266 }, { "epoch": 1.790970389914981, "grad_norm": 0.37187333483605906, "learning_rate": 0.0004978662199978086, "loss": 3.1810340881347656, "step": 3055, "token_acc": 0.28070395173389684 }, { "epoch": 1.79155672823219, "grad_norm": 0.35712438029223637, "learning_rate": 0.0004978630598425787, "loss": 3.241107225418091, "step": 3056, "token_acc": 0.27391042928287107 }, { "epoch": 1.7921430665493991, "grad_norm": 0.332295773026044, "learning_rate": 0.0004978598973590094, "loss": 3.180257797241211, "step": 3057, "token_acc": 0.28189725880132616 }, { "epoch": 1.792729404866608, "grad_norm": 0.3312439495567886, "learning_rate": 0.0004978567325471303, "loss": 3.1745986938476562, "step": 3058, "token_acc": 0.28338573932590283 }, { "epoch": 1.793315743183817, "grad_norm": 0.3739503612058253, "learning_rate": 0.0004978535654069713, "loss": 3.2107272148132324, "step": 3059, "token_acc": 0.27865870035625423 }, { "epoch": 1.793902081501026, "grad_norm": 0.36391710654284615, "learning_rate": 0.000497850395938562, "loss": 3.268104076385498, "step": 3060, "token_acc": 0.26943851676041575 }, { "epoch": 1.7944884198182351, "grad_norm": 0.3584543997716403, "learning_rate": 0.0004978472241419322, "loss": 3.2370619773864746, "step": 3061, "token_acc": 0.273876679695154 }, { "epoch": 1.7950747581354443, "grad_norm": 0.36340880898925376, "learning_rate": 0.0004978440500171117, "loss": 3.2221646308898926, "step": 3062, "token_acc": 0.2748442656311163 }, { "epoch": 1.7956610964526531, "grad_norm": 0.3873339414519027, "learning_rate": 0.0004978408735641304, "loss": 3.2930383682250977, "step": 3063, "token_acc": 0.263939074022676 }, { "epoch": 1.7962474347698623, "grad_norm": 0.41482617303419167, "learning_rate": 0.0004978376947830179, "loss": 3.2683162689208984, "step": 3064, "token_acc": 0.271600435604504 }, { "epoch": 1.7968337730870712, "grad_norm": 0.34954121883263045, "learning_rate": 0.0004978345136738043, "loss": 3.1729214191436768, "step": 3065, "token_acc": 0.2827161301293707 }, { "epoch": 1.7974201114042803, "grad_norm": 0.3103495483398344, "learning_rate": 0.0004978313302365195, "loss": 3.2378652095794678, "step": 3066, "token_acc": 0.2730559608617286 }, { "epoch": 1.7980064497214894, "grad_norm": 0.3955304867874588, "learning_rate": 0.0004978281444711932, "loss": 3.225979804992676, "step": 3067, "token_acc": 0.27478845509383376 }, { "epoch": 1.7985927880386985, "grad_norm": 0.38769879984135847, "learning_rate": 0.0004978249563778555, "loss": 3.263969898223877, "step": 3068, "token_acc": 0.27092229488245784 }, { "epoch": 1.7991791263559074, "grad_norm": 0.3427366015288133, "learning_rate": 0.0004978217659565362, "loss": 3.2790961265563965, "step": 3069, "token_acc": 0.2684529919147174 }, { "epoch": 1.7997654646731163, "grad_norm": 0.3509933173153281, "learning_rate": 0.0004978185732072654, "loss": 3.202017307281494, "step": 3070, "token_acc": 0.2771232904632819 }, { "epoch": 1.8003518029903254, "grad_norm": 0.36994098563812267, "learning_rate": 0.000497815378130073, "loss": 3.286817789077759, "step": 3071, "token_acc": 0.26782564353396016 }, { "epoch": 1.8009381413075345, "grad_norm": 0.3485177362244053, "learning_rate": 0.0004978121807249892, "loss": 3.222672700881958, "step": 3072, "token_acc": 0.2750010011880765 }, { "epoch": 1.8015244796247436, "grad_norm": 0.3779147757934785, "learning_rate": 0.0004978089809920438, "loss": 3.2124924659729004, "step": 3073, "token_acc": 0.2767968756168104 }, { "epoch": 1.8021108179419525, "grad_norm": 0.3769631519507005, "learning_rate": 0.0004978057789312669, "loss": 3.2211883068084717, "step": 3074, "token_acc": 0.27653093732759715 }, { "epoch": 1.8026971562591614, "grad_norm": 0.32871657841052576, "learning_rate": 0.0004978025745426887, "loss": 3.204349994659424, "step": 3075, "token_acc": 0.2775408474106896 }, { "epoch": 1.8032834945763705, "grad_norm": 0.3693189279777911, "learning_rate": 0.0004977993678263392, "loss": 3.2038631439208984, "step": 3076, "token_acc": 0.2768474685782168 }, { "epoch": 1.8038698328935796, "grad_norm": 0.3616413701092867, "learning_rate": 0.0004977961587822486, "loss": 3.261770248413086, "step": 3077, "token_acc": 0.26977999392347146 }, { "epoch": 1.8044561712107887, "grad_norm": 0.37639487686938766, "learning_rate": 0.0004977929474104469, "loss": 3.207293748855591, "step": 3078, "token_acc": 0.27639445032550386 }, { "epoch": 1.8050425095279976, "grad_norm": 0.38854222186616394, "learning_rate": 0.0004977897337109645, "loss": 3.2256155014038086, "step": 3079, "token_acc": 0.2758401944304665 }, { "epoch": 1.8056288478452067, "grad_norm": 0.3590370406285352, "learning_rate": 0.0004977865176838313, "loss": 3.2482213973999023, "step": 3080, "token_acc": 0.27238748885571246 }, { "epoch": 1.8062151861624156, "grad_norm": 0.3213123242580486, "learning_rate": 0.0004977832993290777, "loss": 3.232754707336426, "step": 3081, "token_acc": 0.27341134579097937 }, { "epoch": 1.8068015244796247, "grad_norm": 0.33814738594025034, "learning_rate": 0.0004977800786467341, "loss": 3.2238521575927734, "step": 3082, "token_acc": 0.27429833512005475 }, { "epoch": 1.8073878627968338, "grad_norm": 0.32725126925089004, "learning_rate": 0.0004977768556368303, "loss": 3.2200348377227783, "step": 3083, "token_acc": 0.275874671539662 }, { "epoch": 1.807974201114043, "grad_norm": 0.31065859550059544, "learning_rate": 0.000497773630299397, "loss": 3.1588001251220703, "step": 3084, "token_acc": 0.2832840854511663 }, { "epoch": 1.8085605394312518, "grad_norm": 0.29210071321745845, "learning_rate": 0.0004977704026344642, "loss": 3.2345504760742188, "step": 3085, "token_acc": 0.27349048414023375 }, { "epoch": 1.8091468777484607, "grad_norm": 0.3601742266796131, "learning_rate": 0.0004977671726420623, "loss": 3.2008392810821533, "step": 3086, "token_acc": 0.27847882000944835 }, { "epoch": 1.8097332160656698, "grad_norm": 0.359623803216631, "learning_rate": 0.0004977639403222217, "loss": 3.1807808876037598, "step": 3087, "token_acc": 0.2804770665482459 }, { "epoch": 1.810319554382879, "grad_norm": 0.30111071184533417, "learning_rate": 0.0004977607056749729, "loss": 3.2510619163513184, "step": 3088, "token_acc": 0.27215787192977325 }, { "epoch": 1.810905892700088, "grad_norm": 0.3122434450489425, "learning_rate": 0.000497757468700346, "loss": 3.160186767578125, "step": 3089, "token_acc": 0.283549929843818 }, { "epoch": 1.811492231017297, "grad_norm": 0.33903808851879147, "learning_rate": 0.0004977542293983716, "loss": 3.199387788772583, "step": 3090, "token_acc": 0.2782477213610506 }, { "epoch": 1.812078569334506, "grad_norm": 0.3210425491487077, "learning_rate": 0.00049775098776908, "loss": 3.266819953918457, "step": 3091, "token_acc": 0.26917340043320925 }, { "epoch": 1.812664907651715, "grad_norm": 0.28165244100059494, "learning_rate": 0.0004977477438125018, "loss": 3.2057886123657227, "step": 3092, "token_acc": 0.27883147912303047 }, { "epoch": 1.813251245968924, "grad_norm": 0.28346057851784295, "learning_rate": 0.0004977444975286674, "loss": 3.156052589416504, "step": 3093, "token_acc": 0.2830984019928746 }, { "epoch": 1.8138375842861332, "grad_norm": 0.2946439922573313, "learning_rate": 0.0004977412489176072, "loss": 3.204800844192505, "step": 3094, "token_acc": 0.278671615486449 }, { "epoch": 1.8144239226033423, "grad_norm": 0.3002826924190573, "learning_rate": 0.0004977379979793518, "loss": 3.2153425216674805, "step": 3095, "token_acc": 0.2761555647718303 }, { "epoch": 1.8150102609205512, "grad_norm": 0.28175738293552754, "learning_rate": 0.0004977347447139318, "loss": 3.2161903381347656, "step": 3096, "token_acc": 0.27703055419427697 }, { "epoch": 1.81559659923776, "grad_norm": 0.3002849633352072, "learning_rate": 0.0004977314891213777, "loss": 3.184248685836792, "step": 3097, "token_acc": 0.28247722675375586 }, { "epoch": 1.8161829375549692, "grad_norm": 0.32384403742729295, "learning_rate": 0.00049772823120172, "loss": 3.185739278793335, "step": 3098, "token_acc": 0.2805444039329762 }, { "epoch": 1.8167692758721783, "grad_norm": 0.36971157761720524, "learning_rate": 0.0004977249709549894, "loss": 3.251883029937744, "step": 3099, "token_acc": 0.2707999269026544 }, { "epoch": 1.8173556141893874, "grad_norm": 0.4353494644545547, "learning_rate": 0.0004977217083812167, "loss": 3.2433829307556152, "step": 3100, "token_acc": 0.2732780866571816 }, { "epoch": 1.8179419525065963, "grad_norm": 0.49033673586899357, "learning_rate": 0.0004977184434804321, "loss": 3.263723611831665, "step": 3101, "token_acc": 0.2704251105476036 }, { "epoch": 1.8185282908238052, "grad_norm": 0.454816597428356, "learning_rate": 0.0004977151762526667, "loss": 3.226797342300415, "step": 3102, "token_acc": 0.2740893792649163 }, { "epoch": 1.8191146291410143, "grad_norm": 0.39294971706698373, "learning_rate": 0.000497711906697951, "loss": 3.2254514694213867, "step": 3103, "token_acc": 0.27457616346505237 }, { "epoch": 1.8197009674582234, "grad_norm": 0.37092518686667825, "learning_rate": 0.0004977086348163156, "loss": 3.216805934906006, "step": 3104, "token_acc": 0.2755943793147852 }, { "epoch": 1.8202873057754325, "grad_norm": 0.3251317748581824, "learning_rate": 0.0004977053606077914, "loss": 3.179370164871216, "step": 3105, "token_acc": 0.2816592108406263 }, { "epoch": 1.8208736440926414, "grad_norm": 0.3048378568310232, "learning_rate": 0.0004977020840724093, "loss": 3.2183425426483154, "step": 3106, "token_acc": 0.2763232101813229 }, { "epoch": 1.8214599824098505, "grad_norm": 0.32228526010036945, "learning_rate": 0.0004976988052101998, "loss": 3.226909875869751, "step": 3107, "token_acc": 0.27474221079802924 }, { "epoch": 1.8220463207270594, "grad_norm": 0.36196240028810733, "learning_rate": 0.0004976955240211938, "loss": 3.168827772140503, "step": 3108, "token_acc": 0.28185230126703725 }, { "epoch": 1.8226326590442685, "grad_norm": 0.2974113209380026, "learning_rate": 0.0004976922405054221, "loss": 3.19718861579895, "step": 3109, "token_acc": 0.27764419006333185 }, { "epoch": 1.8232189973614776, "grad_norm": 0.3524302770314917, "learning_rate": 0.0004976889546629156, "loss": 3.2423512935638428, "step": 3110, "token_acc": 0.27441172564816285 }, { "epoch": 1.8238053356786867, "grad_norm": 0.40329508811407877, "learning_rate": 0.0004976856664937052, "loss": 3.1913347244262695, "step": 3111, "token_acc": 0.2788196788052154 }, { "epoch": 1.8243916739958956, "grad_norm": 0.4083376644570348, "learning_rate": 0.0004976823759978216, "loss": 3.2298972606658936, "step": 3112, "token_acc": 0.27421645931270333 }, { "epoch": 1.8249780123131045, "grad_norm": 0.33399227257877573, "learning_rate": 0.0004976790831752959, "loss": 3.223026752471924, "step": 3113, "token_acc": 0.2787739365326194 }, { "epoch": 1.8255643506303136, "grad_norm": 0.3135540725941951, "learning_rate": 0.0004976757880261589, "loss": 3.1948225498199463, "step": 3114, "token_acc": 0.27814315253120386 }, { "epoch": 1.8261506889475227, "grad_norm": 0.3157399998244445, "learning_rate": 0.0004976724905504417, "loss": 3.209144353866577, "step": 3115, "token_acc": 0.2771497244097653 }, { "epoch": 1.8267370272647319, "grad_norm": 0.37116510360062466, "learning_rate": 0.0004976691907481751, "loss": 3.211979389190674, "step": 3116, "token_acc": 0.27812151386191797 }, { "epoch": 1.8273233655819408, "grad_norm": 0.33255809348491844, "learning_rate": 0.0004976658886193903, "loss": 3.221564292907715, "step": 3117, "token_acc": 0.27540251236204577 }, { "epoch": 1.8279097038991499, "grad_norm": 0.3577638023527115, "learning_rate": 0.0004976625841641182, "loss": 3.183070182800293, "step": 3118, "token_acc": 0.2795035128068625 }, { "epoch": 1.8284960422163588, "grad_norm": 0.32327820005131086, "learning_rate": 0.0004976592773823899, "loss": 3.1644632816314697, "step": 3119, "token_acc": 0.28111199613491245 }, { "epoch": 1.8290823805335679, "grad_norm": 0.31164091685798206, "learning_rate": 0.0004976559682742362, "loss": 3.2378828525543213, "step": 3120, "token_acc": 0.27225829698367626 }, { "epoch": 1.829668718850777, "grad_norm": 0.3598986517359024, "learning_rate": 0.0004976526568396886, "loss": 3.190952777862549, "step": 3121, "token_acc": 0.2796077458581615 }, { "epoch": 1.830255057167986, "grad_norm": 0.3565411530991297, "learning_rate": 0.0004976493430787778, "loss": 3.2498016357421875, "step": 3122, "token_acc": 0.2700043661766846 }, { "epoch": 1.830841395485195, "grad_norm": 0.34174331138259756, "learning_rate": 0.0004976460269915353, "loss": 3.2153537273406982, "step": 3123, "token_acc": 0.27680852064082406 }, { "epoch": 1.8314277338024039, "grad_norm": 0.3360686714554351, "learning_rate": 0.0004976427085779921, "loss": 3.138901710510254, "step": 3124, "token_acc": 0.2855392757387171 }, { "epoch": 1.832014072119613, "grad_norm": 0.3742577982378195, "learning_rate": 0.0004976393878381793, "loss": 3.2174904346466064, "step": 3125, "token_acc": 0.27462398884525874 }, { "epoch": 1.832600410436822, "grad_norm": 0.3871094526962964, "learning_rate": 0.0004976360647721282, "loss": 3.229483127593994, "step": 3126, "token_acc": 0.2748356186667926 }, { "epoch": 1.8331867487540312, "grad_norm": 0.3828959085048492, "learning_rate": 0.0004976327393798699, "loss": 3.2417168617248535, "step": 3127, "token_acc": 0.27355023194734757 }, { "epoch": 1.83377308707124, "grad_norm": 0.3533370353116019, "learning_rate": 0.0004976294116614357, "loss": 3.26116943359375, "step": 3128, "token_acc": 0.26840127678014647 }, { "epoch": 1.834359425388449, "grad_norm": 0.3433605238772967, "learning_rate": 0.0004976260816168569, "loss": 3.19572377204895, "step": 3129, "token_acc": 0.27880398424143316 }, { "epoch": 1.834945763705658, "grad_norm": 0.36283551681434834, "learning_rate": 0.0004976227492461648, "loss": 3.1815576553344727, "step": 3130, "token_acc": 0.278300563063353 }, { "epoch": 1.8355321020228672, "grad_norm": 0.36029785390642, "learning_rate": 0.0004976194145493905, "loss": 3.2210793495178223, "step": 3131, "token_acc": 0.2748400469320961 }, { "epoch": 1.8361184403400763, "grad_norm": 0.329458886445964, "learning_rate": 0.0004976160775265656, "loss": 3.2189884185791016, "step": 3132, "token_acc": 0.2786865449084976 }, { "epoch": 1.8367047786572852, "grad_norm": 0.29352646232593516, "learning_rate": 0.0004976127381777212, "loss": 3.2372422218322754, "step": 3133, "token_acc": 0.2733540275459099 }, { "epoch": 1.8372911169744943, "grad_norm": 0.394873313097862, "learning_rate": 0.0004976093965028889, "loss": 3.2360429763793945, "step": 3134, "token_acc": 0.2746922700686788 }, { "epoch": 1.8378774552917032, "grad_norm": 0.345928833865739, "learning_rate": 0.0004976060525020999, "loss": 3.2294840812683105, "step": 3135, "token_acc": 0.2746953815746591 }, { "epoch": 1.8384637936089123, "grad_norm": 0.40877328884882363, "learning_rate": 0.0004976027061753857, "loss": 3.214244842529297, "step": 3136, "token_acc": 0.27532783852667925 }, { "epoch": 1.8390501319261214, "grad_norm": 0.39616700982536857, "learning_rate": 0.0004975993575227777, "loss": 3.2372498512268066, "step": 3137, "token_acc": 0.273489540568023 }, { "epoch": 1.8396364702433305, "grad_norm": 0.36848350483044756, "learning_rate": 0.0004975960065443075, "loss": 3.203279495239258, "step": 3138, "token_acc": 0.2778041495546429 }, { "epoch": 1.8402228085605394, "grad_norm": 0.336260306085189, "learning_rate": 0.0004975926532400064, "loss": 3.210517406463623, "step": 3139, "token_acc": 0.277127392560726 }, { "epoch": 1.8408091468777483, "grad_norm": 0.36663515411519815, "learning_rate": 0.0004975892976099059, "loss": 3.1943471431732178, "step": 3140, "token_acc": 0.28032813416145114 }, { "epoch": 1.8413954851949574, "grad_norm": 0.35390681764015564, "learning_rate": 0.0004975859396540377, "loss": 3.2373294830322266, "step": 3141, "token_acc": 0.27316110367576485 }, { "epoch": 1.8419818235121665, "grad_norm": 0.32382782853981074, "learning_rate": 0.0004975825793724332, "loss": 3.2416231632232666, "step": 3142, "token_acc": 0.27347788607452 }, { "epoch": 1.8425681618293757, "grad_norm": 0.3183885213508255, "learning_rate": 0.0004975792167651238, "loss": 3.177006244659424, "step": 3143, "token_acc": 0.28109130970125074 }, { "epoch": 1.8431545001465846, "grad_norm": 0.3500263407698211, "learning_rate": 0.0004975758518321414, "loss": 3.2232465744018555, "step": 3144, "token_acc": 0.27532448967165857 }, { "epoch": 1.8437408384637937, "grad_norm": 0.33021603173329733, "learning_rate": 0.0004975724845735175, "loss": 3.2225494384765625, "step": 3145, "token_acc": 0.2751031157309819 }, { "epoch": 1.8443271767810026, "grad_norm": 0.3278126409064888, "learning_rate": 0.0004975691149892837, "loss": 3.2059528827667236, "step": 3146, "token_acc": 0.27730124548344015 }, { "epoch": 1.8449135150982117, "grad_norm": 0.28326440291802535, "learning_rate": 0.0004975657430794717, "loss": 3.2390079498291016, "step": 3147, "token_acc": 0.2727793448265328 }, { "epoch": 1.8454998534154208, "grad_norm": 0.357141450133721, "learning_rate": 0.0004975623688441131, "loss": 3.197248935699463, "step": 3148, "token_acc": 0.27968242796062653 }, { "epoch": 1.8460861917326299, "grad_norm": 0.33195813811078523, "learning_rate": 0.0004975589922832398, "loss": 3.212507963180542, "step": 3149, "token_acc": 0.27692869693173255 }, { "epoch": 1.8466725300498388, "grad_norm": 0.3166989707665105, "learning_rate": 0.0004975556133968832, "loss": 3.1847949028015137, "step": 3150, "token_acc": 0.2811813914505676 }, { "epoch": 1.8472588683670477, "grad_norm": 0.2924435637348427, "learning_rate": 0.0004975522321850752, "loss": 3.214052438735962, "step": 3151, "token_acc": 0.2773872670273416 }, { "epoch": 1.8478452066842568, "grad_norm": 0.3119761471269352, "learning_rate": 0.0004975488486478475, "loss": 3.2143592834472656, "step": 3152, "token_acc": 0.27675271861595807 }, { "epoch": 1.848431545001466, "grad_norm": 0.3519916400630215, "learning_rate": 0.0004975454627852321, "loss": 3.23665714263916, "step": 3153, "token_acc": 0.27315646328321447 }, { "epoch": 1.849017883318675, "grad_norm": 0.41923734578491395, "learning_rate": 0.0004975420745972606, "loss": 3.219377040863037, "step": 3154, "token_acc": 0.27474468570948635 }, { "epoch": 1.849604221635884, "grad_norm": 0.3769761861283405, "learning_rate": 0.0004975386840839648, "loss": 3.2280220985412598, "step": 3155, "token_acc": 0.27319603624477634 }, { "epoch": 1.8501905599530928, "grad_norm": 0.3133650837281849, "learning_rate": 0.0004975352912453766, "loss": 3.2121810913085938, "step": 3156, "token_acc": 0.2747660690521095 }, { "epoch": 1.850776898270302, "grad_norm": 0.3138653934763373, "learning_rate": 0.0004975318960815279, "loss": 3.2204790115356445, "step": 3157, "token_acc": 0.27674664134870053 }, { "epoch": 1.851363236587511, "grad_norm": 0.3314318357157845, "learning_rate": 0.0004975284985924508, "loss": 3.187467098236084, "step": 3158, "token_acc": 0.27793669414669947 }, { "epoch": 1.8519495749047201, "grad_norm": 0.3029753657116342, "learning_rate": 0.0004975250987781768, "loss": 3.230639696121216, "step": 3159, "token_acc": 0.2733948934060486 }, { "epoch": 1.852535913221929, "grad_norm": 0.3524581173414202, "learning_rate": 0.0004975216966387381, "loss": 3.194685459136963, "step": 3160, "token_acc": 0.28047108193543413 }, { "epoch": 1.8531222515391381, "grad_norm": 0.3390248238269578, "learning_rate": 0.0004975182921741667, "loss": 3.239321708679199, "step": 3161, "token_acc": 0.27347459941002317 }, { "epoch": 1.853708589856347, "grad_norm": 0.3091468364062924, "learning_rate": 0.0004975148853844944, "loss": 3.18222713470459, "step": 3162, "token_acc": 0.2815449366550463 }, { "epoch": 1.8542949281735561, "grad_norm": 0.29815243038850664, "learning_rate": 0.0004975114762697531, "loss": 3.23945689201355, "step": 3163, "token_acc": 0.27493044969260483 }, { "epoch": 1.8548812664907652, "grad_norm": 0.3215585847225152, "learning_rate": 0.0004975080648299753, "loss": 3.2652814388275146, "step": 3164, "token_acc": 0.2698445759784858 }, { "epoch": 1.8554676048079743, "grad_norm": 0.34649803599119156, "learning_rate": 0.0004975046510651926, "loss": 3.2273406982421875, "step": 3165, "token_acc": 0.2739656338930178 }, { "epoch": 1.8560539431251832, "grad_norm": 0.31690377964329614, "learning_rate": 0.0004975012349754372, "loss": 3.184079170227051, "step": 3166, "token_acc": 0.2797838662513508 }, { "epoch": 1.8566402814423921, "grad_norm": 0.3347235605681407, "learning_rate": 0.0004974978165607412, "loss": 3.1979713439941406, "step": 3167, "token_acc": 0.2777996040057344 }, { "epoch": 1.8572266197596012, "grad_norm": 0.31187408164306907, "learning_rate": 0.0004974943958211368, "loss": 3.190084457397461, "step": 3168, "token_acc": 0.2802953650082883 }, { "epoch": 1.8578129580768104, "grad_norm": 0.30808743707477093, "learning_rate": 0.000497490972756656, "loss": 3.1951777935028076, "step": 3169, "token_acc": 0.27846077751488946 }, { "epoch": 1.8583992963940195, "grad_norm": 0.28781709449685455, "learning_rate": 0.0004974875473673311, "loss": 3.2095983028411865, "step": 3170, "token_acc": 0.2769773038972405 }, { "epoch": 1.8589856347112284, "grad_norm": 0.2667868718073247, "learning_rate": 0.0004974841196531941, "loss": 3.1729869842529297, "step": 3171, "token_acc": 0.28227593877273416 }, { "epoch": 1.8595719730284375, "grad_norm": 0.36169384867439086, "learning_rate": 0.0004974806896142773, "loss": 3.21612548828125, "step": 3172, "token_acc": 0.27613407537776086 }, { "epoch": 1.8601583113456464, "grad_norm": 0.3787721236086635, "learning_rate": 0.0004974772572506129, "loss": 3.2117819786071777, "step": 3173, "token_acc": 0.2760167685996841 }, { "epoch": 1.8607446496628555, "grad_norm": 0.36750435669555614, "learning_rate": 0.0004974738225622332, "loss": 3.2138185501098633, "step": 3174, "token_acc": 0.2761070628911411 }, { "epoch": 1.8613309879800646, "grad_norm": 0.44782320750606014, "learning_rate": 0.0004974703855491704, "loss": 3.2162137031555176, "step": 3175, "token_acc": 0.27624253149886846 }, { "epoch": 1.8619173262972737, "grad_norm": 0.3611677020076803, "learning_rate": 0.0004974669462114567, "loss": 3.1626176834106445, "step": 3176, "token_acc": 0.2838756976887305 }, { "epoch": 1.8625036646144826, "grad_norm": 0.34042634394152166, "learning_rate": 0.0004974635045491246, "loss": 3.243594169616699, "step": 3177, "token_acc": 0.27258156248384263 }, { "epoch": 1.8630900029316915, "grad_norm": 0.2742665316962135, "learning_rate": 0.0004974600605622063, "loss": 3.2122983932495117, "step": 3178, "token_acc": 0.27575242160903757 }, { "epoch": 1.8636763412489006, "grad_norm": 0.3877599185233168, "learning_rate": 0.0004974566142507342, "loss": 3.249803066253662, "step": 3179, "token_acc": 0.2721244068759397 }, { "epoch": 1.8642626795661097, "grad_norm": 0.40425426730030195, "learning_rate": 0.0004974531656147406, "loss": 3.194822311401367, "step": 3180, "token_acc": 0.2794471414467894 }, { "epoch": 1.8648490178833188, "grad_norm": 0.45105800018787545, "learning_rate": 0.000497449714654258, "loss": 3.2328460216522217, "step": 3181, "token_acc": 0.2743173524600478 }, { "epoch": 1.8654353562005277, "grad_norm": 0.31503863563968004, "learning_rate": 0.0004974462613693189, "loss": 3.232696056365967, "step": 3182, "token_acc": 0.274692856059457 }, { "epoch": 1.8660216945177366, "grad_norm": 0.33764223870327553, "learning_rate": 0.0004974428057599555, "loss": 3.212827205657959, "step": 3183, "token_acc": 0.2741012803053698 }, { "epoch": 1.8666080328349457, "grad_norm": 0.3808911403127586, "learning_rate": 0.0004974393478262004, "loss": 3.1923816204071045, "step": 3184, "token_acc": 0.27918689940539987 }, { "epoch": 1.8671943711521548, "grad_norm": 0.3344430633864189, "learning_rate": 0.000497435887568086, "loss": 3.2020010948181152, "step": 3185, "token_acc": 0.27797538781707565 }, { "epoch": 1.867780709469364, "grad_norm": 0.3690211481526404, "learning_rate": 0.0004974324249856449, "loss": 3.244870901107788, "step": 3186, "token_acc": 0.27205094433717375 }, { "epoch": 1.8683670477865728, "grad_norm": 0.37740308352187113, "learning_rate": 0.0004974289600789096, "loss": 3.2352359294891357, "step": 3187, "token_acc": 0.27430062305693204 }, { "epoch": 1.868953386103782, "grad_norm": 0.3459160998398935, "learning_rate": 0.0004974254928479126, "loss": 3.1817288398742676, "step": 3188, "token_acc": 0.2797147966916836 }, { "epoch": 1.8695397244209908, "grad_norm": 0.3007210912152274, "learning_rate": 0.0004974220232926865, "loss": 3.1853928565979004, "step": 3189, "token_acc": 0.27906011749844395 }, { "epoch": 1.8701260627382, "grad_norm": 0.3257078146011132, "learning_rate": 0.0004974185514132639, "loss": 3.2011704444885254, "step": 3190, "token_acc": 0.2757239764904288 }, { "epoch": 1.870712401055409, "grad_norm": 0.33274978817006384, "learning_rate": 0.0004974150772096774, "loss": 3.2101292610168457, "step": 3191, "token_acc": 0.27879314271188055 }, { "epoch": 1.8712987393726181, "grad_norm": 0.32871039262285545, "learning_rate": 0.0004974116006819597, "loss": 3.1871213912963867, "step": 3192, "token_acc": 0.2789420494135794 }, { "epoch": 1.871885077689827, "grad_norm": 0.2959164747183507, "learning_rate": 0.0004974081218301434, "loss": 3.2192976474761963, "step": 3193, "token_acc": 0.27461728446524036 }, { "epoch": 1.872471416007036, "grad_norm": 0.2838727741450811, "learning_rate": 0.0004974046406542612, "loss": 3.1955108642578125, "step": 3194, "token_acc": 0.27964859349599985 }, { "epoch": 1.873057754324245, "grad_norm": 0.2558016685443632, "learning_rate": 0.0004974011571543456, "loss": 3.1855757236480713, "step": 3195, "token_acc": 0.2788962641944155 }, { "epoch": 1.8736440926414542, "grad_norm": 0.27230629958789326, "learning_rate": 0.0004973976713304297, "loss": 3.18350887298584, "step": 3196, "token_acc": 0.27939244068743596 }, { "epoch": 1.8742304309586633, "grad_norm": 0.3165362476257985, "learning_rate": 0.000497394183182546, "loss": 3.2158000469207764, "step": 3197, "token_acc": 0.27685085548262134 }, { "epoch": 1.8748167692758722, "grad_norm": 0.33673595995617445, "learning_rate": 0.0004973906927107273, "loss": 3.213892936706543, "step": 3198, "token_acc": 0.2749061278843845 }, { "epoch": 1.875403107593081, "grad_norm": 0.34970305432716003, "learning_rate": 0.0004973871999150065, "loss": 3.1813442707061768, "step": 3199, "token_acc": 0.2808978977790095 }, { "epoch": 1.8759894459102902, "grad_norm": 0.4032671833699118, "learning_rate": 0.0004973837047954162, "loss": 3.203171968460083, "step": 3200, "token_acc": 0.27973987536124834 }, { "epoch": 1.8765757842274993, "grad_norm": 0.3873400966019496, "learning_rate": 0.0004973802073519894, "loss": 3.1737403869628906, "step": 3201, "token_acc": 0.28056242474461723 }, { "epoch": 1.8771621225447084, "grad_norm": 0.37122743823033044, "learning_rate": 0.0004973767075847588, "loss": 3.1454849243164062, "step": 3202, "token_acc": 0.2847357167988308 }, { "epoch": 1.8777484608619175, "grad_norm": 0.3255156860130465, "learning_rate": 0.0004973732054937575, "loss": 3.223222494125366, "step": 3203, "token_acc": 0.2750273043872483 }, { "epoch": 1.8783347991791264, "grad_norm": 0.35948473187109026, "learning_rate": 0.0004973697010790182, "loss": 3.2170510292053223, "step": 3204, "token_acc": 0.277822365923446 }, { "epoch": 1.8789211374963353, "grad_norm": 0.329513443380268, "learning_rate": 0.000497366194340574, "loss": 3.220018148422241, "step": 3205, "token_acc": 0.27413369127231924 }, { "epoch": 1.8795074758135444, "grad_norm": 0.3450147472311175, "learning_rate": 0.0004973626852784577, "loss": 3.2421979904174805, "step": 3206, "token_acc": 0.2731162222133642 }, { "epoch": 1.8800938141307535, "grad_norm": 0.31337793915466255, "learning_rate": 0.0004973591738927022, "loss": 3.1819725036621094, "step": 3207, "token_acc": 0.2789770562973863 }, { "epoch": 1.8806801524479626, "grad_norm": 0.34821985399251515, "learning_rate": 0.0004973556601833406, "loss": 3.2090983390808105, "step": 3208, "token_acc": 0.2780263876633472 }, { "epoch": 1.8812664907651715, "grad_norm": 0.29332944312052356, "learning_rate": 0.000497352144150406, "loss": 3.2027201652526855, "step": 3209, "token_acc": 0.27690631062721943 }, { "epoch": 1.8818528290823804, "grad_norm": 0.30646524117442714, "learning_rate": 0.0004973486257939311, "loss": 3.225149631500244, "step": 3210, "token_acc": 0.2753865841661483 }, { "epoch": 1.8824391673995895, "grad_norm": 0.32700920072564643, "learning_rate": 0.0004973451051139494, "loss": 3.206990957260132, "step": 3211, "token_acc": 0.2763532188754615 }, { "epoch": 1.8830255057167986, "grad_norm": 0.3401542223780962, "learning_rate": 0.0004973415821104936, "loss": 3.228464126586914, "step": 3212, "token_acc": 0.2735100734284262 }, { "epoch": 1.8836118440340077, "grad_norm": 0.37492279320205024, "learning_rate": 0.000497338056783597, "loss": 3.226731300354004, "step": 3213, "token_acc": 0.2744836233716495 }, { "epoch": 1.8841981823512166, "grad_norm": 0.42391757459530943, "learning_rate": 0.0004973345291332927, "loss": 3.204246997833252, "step": 3214, "token_acc": 0.27811981942921266 }, { "epoch": 1.8847845206684257, "grad_norm": 0.36725681907257646, "learning_rate": 0.0004973309991596137, "loss": 3.256497383117676, "step": 3215, "token_acc": 0.2720441001034257 }, { "epoch": 1.8853708589856346, "grad_norm": 0.33358954202990587, "learning_rate": 0.0004973274668625932, "loss": 3.202805757522583, "step": 3216, "token_acc": 0.27606244402833185 }, { "epoch": 1.8859571973028437, "grad_norm": 0.39563119239048544, "learning_rate": 0.0004973239322422645, "loss": 3.2335045337677, "step": 3217, "token_acc": 0.27338657546312156 }, { "epoch": 1.8865435356200528, "grad_norm": 0.34174037428925336, "learning_rate": 0.0004973203952986608, "loss": 3.2048444747924805, "step": 3218, "token_acc": 0.27848251208899394 }, { "epoch": 1.887129873937262, "grad_norm": 0.3648846754232908, "learning_rate": 0.0004973168560318151, "loss": 3.245723247528076, "step": 3219, "token_acc": 0.27322401558691584 }, { "epoch": 1.8877162122544708, "grad_norm": 0.3434491445171286, "learning_rate": 0.0004973133144417609, "loss": 3.218109130859375, "step": 3220, "token_acc": 0.2754695998650757 }, { "epoch": 1.8883025505716797, "grad_norm": 0.378815037406825, "learning_rate": 0.0004973097705285313, "loss": 3.229806900024414, "step": 3221, "token_acc": 0.2746115091655921 }, { "epoch": 1.8888888888888888, "grad_norm": 0.35516890329298767, "learning_rate": 0.0004973062242921598, "loss": 3.2406787872314453, "step": 3222, "token_acc": 0.2732929266052464 }, { "epoch": 1.889475227206098, "grad_norm": 0.39570064936772026, "learning_rate": 0.0004973026757326794, "loss": 3.237842559814453, "step": 3223, "token_acc": 0.27185950679286586 }, { "epoch": 1.890061565523307, "grad_norm": 0.3752993251096688, "learning_rate": 0.0004972991248501237, "loss": 3.2135162353515625, "step": 3224, "token_acc": 0.2756776768790754 }, { "epoch": 1.890647903840516, "grad_norm": 0.3156212542143818, "learning_rate": 0.000497295571644526, "loss": 3.210658073425293, "step": 3225, "token_acc": 0.27758197703775866 }, { "epoch": 1.8912342421577248, "grad_norm": 0.3732560978121744, "learning_rate": 0.0004972920161159196, "loss": 3.2549266815185547, "step": 3226, "token_acc": 0.26956761110488053 }, { "epoch": 1.891820580474934, "grad_norm": 0.36570661407445226, "learning_rate": 0.0004972884582643379, "loss": 3.251070976257324, "step": 3227, "token_acc": 0.2708862146418071 }, { "epoch": 1.892406918792143, "grad_norm": 0.34338731470130374, "learning_rate": 0.0004972848980898144, "loss": 3.188694477081299, "step": 3228, "token_acc": 0.27937241484961445 }, { "epoch": 1.8929932571093522, "grad_norm": 0.33420559243857445, "learning_rate": 0.0004972813355923825, "loss": 3.1797680854797363, "step": 3229, "token_acc": 0.28137867357640156 }, { "epoch": 1.8935795954265613, "grad_norm": 0.2910719112880023, "learning_rate": 0.0004972777707720756, "loss": 3.139526844024658, "step": 3230, "token_acc": 0.2837067861715749 }, { "epoch": 1.8941659337437702, "grad_norm": 0.3307947480190128, "learning_rate": 0.0004972742036289273, "loss": 3.2095818519592285, "step": 3231, "token_acc": 0.27667477021534936 }, { "epoch": 1.894752272060979, "grad_norm": 0.32942657068370057, "learning_rate": 0.000497270634162971, "loss": 3.247345447540283, "step": 3232, "token_acc": 0.2726510367961248 }, { "epoch": 1.8953386103781882, "grad_norm": 0.3419887301613843, "learning_rate": 0.0004972670623742405, "loss": 3.243971347808838, "step": 3233, "token_acc": 0.27222487690357966 }, { "epoch": 1.8959249486953973, "grad_norm": 0.3217314069421129, "learning_rate": 0.0004972634882627689, "loss": 3.2153892517089844, "step": 3234, "token_acc": 0.2768847131248331 }, { "epoch": 1.8965112870126064, "grad_norm": 0.32432811028489134, "learning_rate": 0.0004972599118285902, "loss": 3.1712770462036133, "step": 3235, "token_acc": 0.28314001978895237 }, { "epoch": 1.8970976253298153, "grad_norm": 0.30291471600971015, "learning_rate": 0.0004972563330717377, "loss": 3.21274471282959, "step": 3236, "token_acc": 0.27548518686390044 }, { "epoch": 1.8976839636470242, "grad_norm": 0.2946931355686893, "learning_rate": 0.0004972527519922452, "loss": 3.2140865325927734, "step": 3237, "token_acc": 0.2753327224800286 }, { "epoch": 1.8982703019642333, "grad_norm": 0.31330103454974073, "learning_rate": 0.0004972491685901462, "loss": 3.201774835586548, "step": 3238, "token_acc": 0.2768683562792459 }, { "epoch": 1.8988566402814424, "grad_norm": 0.29552366313157014, "learning_rate": 0.0004972455828654745, "loss": 3.167361259460449, "step": 3239, "token_acc": 0.2820283043231238 }, { "epoch": 1.8994429785986515, "grad_norm": 0.33191923339303425, "learning_rate": 0.0004972419948182637, "loss": 3.2070162296295166, "step": 3240, "token_acc": 0.27776553576381957 }, { "epoch": 1.9000293169158604, "grad_norm": 0.3490752267672838, "learning_rate": 0.0004972384044485475, "loss": 3.1854982376098633, "step": 3241, "token_acc": 0.2796276728491098 }, { "epoch": 1.9006156552330695, "grad_norm": 0.3036467827456306, "learning_rate": 0.0004972348117563598, "loss": 3.1936397552490234, "step": 3242, "token_acc": 0.27794686407676666 }, { "epoch": 1.9012019935502784, "grad_norm": 0.35404544231620155, "learning_rate": 0.000497231216741734, "loss": 3.2193100452423096, "step": 3243, "token_acc": 0.27459732401568093 }, { "epoch": 1.9017883318674875, "grad_norm": 0.34335495681318157, "learning_rate": 0.0004972276194047041, "loss": 3.215498447418213, "step": 3244, "token_acc": 0.27312107941768254 }, { "epoch": 1.9023746701846966, "grad_norm": 0.29721588043420866, "learning_rate": 0.0004972240197453039, "loss": 3.2404961585998535, "step": 3245, "token_acc": 0.2731248089447533 }, { "epoch": 1.9029610085019057, "grad_norm": 0.36648993350705167, "learning_rate": 0.0004972204177635672, "loss": 3.181131362915039, "step": 3246, "token_acc": 0.2821712364766063 }, { "epoch": 1.9035473468191146, "grad_norm": 0.3764259706821276, "learning_rate": 0.0004972168134595277, "loss": 3.1723313331604004, "step": 3247, "token_acc": 0.27984802577305284 }, { "epoch": 1.9041336851363235, "grad_norm": 0.39420930892712136, "learning_rate": 0.0004972132068332194, "loss": 3.2050790786743164, "step": 3248, "token_acc": 0.2761245563603077 }, { "epoch": 1.9047200234535326, "grad_norm": 0.3217579056309557, "learning_rate": 0.0004972095978846763, "loss": 3.2331573963165283, "step": 3249, "token_acc": 0.272179769959927 }, { "epoch": 1.9053063617707418, "grad_norm": 0.31963980779116213, "learning_rate": 0.0004972059866139321, "loss": 3.2373714447021484, "step": 3250, "token_acc": 0.2723248508751326 }, { "epoch": 1.9058927000879509, "grad_norm": 0.332174970218064, "learning_rate": 0.0004972023730210206, "loss": 3.166506767272949, "step": 3251, "token_acc": 0.28296525791759347 }, { "epoch": 1.9064790384051598, "grad_norm": 0.33054715221277986, "learning_rate": 0.000497198757105976, "loss": 3.251945734024048, "step": 3252, "token_acc": 0.2723264506296041 }, { "epoch": 1.9070653767223686, "grad_norm": 0.3239524684449019, "learning_rate": 0.0004971951388688323, "loss": 3.227743625640869, "step": 3253, "token_acc": 0.27277543871380916 }, { "epoch": 1.9076517150395778, "grad_norm": 0.3758479253861497, "learning_rate": 0.0004971915183096232, "loss": 3.2090821266174316, "step": 3254, "token_acc": 0.27621573462072396 }, { "epoch": 1.9082380533567869, "grad_norm": 0.37812488498080704, "learning_rate": 0.000497187895428383, "loss": 3.1961262226104736, "step": 3255, "token_acc": 0.2774504498835966 }, { "epoch": 1.908824391673996, "grad_norm": 0.3202552281551747, "learning_rate": 0.0004971842702251456, "loss": 3.231288433074951, "step": 3256, "token_acc": 0.27512210851270774 }, { "epoch": 1.909410729991205, "grad_norm": 0.3131574933199557, "learning_rate": 0.000497180642699945, "loss": 3.2010931968688965, "step": 3257, "token_acc": 0.2771954840014046 }, { "epoch": 1.909997068308414, "grad_norm": 0.31415192537406683, "learning_rate": 0.0004971770128528154, "loss": 3.158799171447754, "step": 3258, "token_acc": 0.28352145574387433 }, { "epoch": 1.9105834066256229, "grad_norm": 0.3324151332274796, "learning_rate": 0.0004971733806837906, "loss": 3.1905250549316406, "step": 3259, "token_acc": 0.27821866867888645 }, { "epoch": 1.911169744942832, "grad_norm": 0.3606384351245888, "learning_rate": 0.0004971697461929053, "loss": 3.1511802673339844, "step": 3260, "token_acc": 0.28438557747603205 }, { "epoch": 1.911756083260041, "grad_norm": 0.40778017979110603, "learning_rate": 0.0004971661093801932, "loss": 3.2007057666778564, "step": 3261, "token_acc": 0.27763172965219335 }, { "epoch": 1.9123424215772502, "grad_norm": 0.3793518726382328, "learning_rate": 0.0004971624702456885, "loss": 3.2358381748199463, "step": 3262, "token_acc": 0.2739803409102742 }, { "epoch": 1.912928759894459, "grad_norm": 0.3489883936747871, "learning_rate": 0.0004971588287894255, "loss": 3.1804826259613037, "step": 3263, "token_acc": 0.2797250123717683 }, { "epoch": 1.913515098211668, "grad_norm": 0.3549713026391409, "learning_rate": 0.0004971551850114383, "loss": 3.1808762550354004, "step": 3264, "token_acc": 0.2816051259716073 }, { "epoch": 1.914101436528877, "grad_norm": 0.29182578457399544, "learning_rate": 0.0004971515389117613, "loss": 3.1633729934692383, "step": 3265, "token_acc": 0.2836968366179136 }, { "epoch": 1.9146877748460862, "grad_norm": 0.379642310327676, "learning_rate": 0.0004971478904904285, "loss": 3.217869520187378, "step": 3266, "token_acc": 0.274332662782335 }, { "epoch": 1.9152741131632953, "grad_norm": 0.3581704996924199, "learning_rate": 0.0004971442397474744, "loss": 3.216370105743408, "step": 3267, "token_acc": 0.2767854025498269 }, { "epoch": 1.9158604514805042, "grad_norm": 0.3671525794093656, "learning_rate": 0.0004971405866829331, "loss": 3.2096691131591797, "step": 3268, "token_acc": 0.27631683884242597 }, { "epoch": 1.9164467897977133, "grad_norm": 0.35404654971841504, "learning_rate": 0.000497136931296839, "loss": 3.2011871337890625, "step": 3269, "token_acc": 0.2787005107453266 }, { "epoch": 1.9170331281149222, "grad_norm": 0.35297867377126746, "learning_rate": 0.0004971332735892265, "loss": 3.1925864219665527, "step": 3270, "token_acc": 0.2781865039170182 }, { "epoch": 1.9176194664321313, "grad_norm": 0.3049843173725241, "learning_rate": 0.00049712961356013, "loss": 3.219371795654297, "step": 3271, "token_acc": 0.2756746343723805 }, { "epoch": 1.9182058047493404, "grad_norm": 0.3311590804879516, "learning_rate": 0.0004971259512095837, "loss": 3.1935601234436035, "step": 3272, "token_acc": 0.279451019425098 }, { "epoch": 1.9187921430665495, "grad_norm": 0.28150351331393936, "learning_rate": 0.0004971222865376221, "loss": 3.171679973602295, "step": 3273, "token_acc": 0.2822164027146326 }, { "epoch": 1.9193784813837584, "grad_norm": 0.3163953727918806, "learning_rate": 0.0004971186195442797, "loss": 3.163278102874756, "step": 3274, "token_acc": 0.28308570541157824 }, { "epoch": 1.9199648197009673, "grad_norm": 0.3270020703602421, "learning_rate": 0.0004971149502295908, "loss": 3.1697912216186523, "step": 3275, "token_acc": 0.2813894409856133 }, { "epoch": 1.9205511580181764, "grad_norm": 0.3405200705441147, "learning_rate": 0.00049711127859359, "loss": 3.2281041145324707, "step": 3276, "token_acc": 0.2749717873218061 }, { "epoch": 1.9211374963353856, "grad_norm": 0.2852595111879431, "learning_rate": 0.0004971076046363115, "loss": 3.1619327068328857, "step": 3277, "token_acc": 0.28285992794657794 }, { "epoch": 1.9217238346525947, "grad_norm": 0.34683569218713, "learning_rate": 0.0004971039283577903, "loss": 3.205418825149536, "step": 3278, "token_acc": 0.27552238655597644 }, { "epoch": 1.9223101729698036, "grad_norm": 0.31921621619627655, "learning_rate": 0.0004971002497580606, "loss": 3.2118849754333496, "step": 3279, "token_acc": 0.2751948619452659 }, { "epoch": 1.9228965112870124, "grad_norm": 0.3367666468311743, "learning_rate": 0.0004970965688371569, "loss": 3.211894989013672, "step": 3280, "token_acc": 0.2783318812867019 }, { "epoch": 1.9234828496042216, "grad_norm": 0.3469526790234574, "learning_rate": 0.000497092885595114, "loss": 3.200273036956787, "step": 3281, "token_acc": 0.2763291883782334 }, { "epoch": 1.9240691879214307, "grad_norm": 0.4603988924848009, "learning_rate": 0.0004970892000319664, "loss": 3.2314515113830566, "step": 3282, "token_acc": 0.27475861327021867 }, { "epoch": 1.9246555262386398, "grad_norm": 0.41801360494864964, "learning_rate": 0.0004970855121477488, "loss": 3.1878371238708496, "step": 3283, "token_acc": 0.2778261631519413 }, { "epoch": 1.9252418645558487, "grad_norm": 0.4123696258893851, "learning_rate": 0.0004970818219424956, "loss": 3.2267203330993652, "step": 3284, "token_acc": 0.27354621105887245 }, { "epoch": 1.9258282028730578, "grad_norm": 0.36929878589833315, "learning_rate": 0.0004970781294162418, "loss": 3.223634719848633, "step": 3285, "token_acc": 0.27553608362532367 }, { "epoch": 1.9264145411902667, "grad_norm": 0.3276860672651484, "learning_rate": 0.0004970744345690218, "loss": 3.204263925552368, "step": 3286, "token_acc": 0.2779547949146943 }, { "epoch": 1.9270008795074758, "grad_norm": 0.33064543396397356, "learning_rate": 0.0004970707374008704, "loss": 3.1739344596862793, "step": 3287, "token_acc": 0.2817995437613699 }, { "epoch": 1.927587217824685, "grad_norm": 0.3144165707376213, "learning_rate": 0.0004970670379118224, "loss": 3.17598295211792, "step": 3288, "token_acc": 0.28031299113517066 }, { "epoch": 1.928173556141894, "grad_norm": 0.36270198804222953, "learning_rate": 0.0004970633361019125, "loss": 3.147336959838867, "step": 3289, "token_acc": 0.28549360131388524 }, { "epoch": 1.928759894459103, "grad_norm": 0.3173174332649723, "learning_rate": 0.0004970596319711756, "loss": 3.2429709434509277, "step": 3290, "token_acc": 0.26938059397884456 }, { "epoch": 1.9293462327763118, "grad_norm": 0.33399134891494087, "learning_rate": 0.0004970559255196462, "loss": 3.226123809814453, "step": 3291, "token_acc": 0.27464143085792425 }, { "epoch": 1.929932571093521, "grad_norm": 0.38467950970576453, "learning_rate": 0.0004970522167473593, "loss": 3.222080707550049, "step": 3292, "token_acc": 0.27545704595548154 }, { "epoch": 1.93051890941073, "grad_norm": 0.41005760985873096, "learning_rate": 0.0004970485056543498, "loss": 3.2169852256774902, "step": 3293, "token_acc": 0.27510152060206133 }, { "epoch": 1.9311052477279391, "grad_norm": 0.35266939899059435, "learning_rate": 0.0004970447922406525, "loss": 3.2392590045928955, "step": 3294, "token_acc": 0.2719471094313618 }, { "epoch": 1.931691586045148, "grad_norm": 0.28299581643499594, "learning_rate": 0.0004970410765063023, "loss": 3.214961528778076, "step": 3295, "token_acc": 0.27615661402778224 }, { "epoch": 1.9322779243623571, "grad_norm": 0.29812639815144615, "learning_rate": 0.000497037358451334, "loss": 3.214001178741455, "step": 3296, "token_acc": 0.27571924847141904 }, { "epoch": 1.932864262679566, "grad_norm": 0.3382327657027396, "learning_rate": 0.0004970336380757827, "loss": 3.2022149562835693, "step": 3297, "token_acc": 0.27966875430475246 }, { "epoch": 1.9334506009967751, "grad_norm": 0.3066979313106334, "learning_rate": 0.0004970299153796831, "loss": 3.202234983444214, "step": 3298, "token_acc": 0.27683720487080615 }, { "epoch": 1.9340369393139842, "grad_norm": 0.3166979973689334, "learning_rate": 0.0004970261903630705, "loss": 3.183774948120117, "step": 3299, "token_acc": 0.2801360276429165 }, { "epoch": 1.9346232776311933, "grad_norm": 0.3361129313449817, "learning_rate": 0.0004970224630259796, "loss": 3.192099094390869, "step": 3300, "token_acc": 0.2789783229709293 }, { "epoch": 1.9352096159484022, "grad_norm": 0.30155742096847576, "learning_rate": 0.0004970187333684455, "loss": 3.2279253005981445, "step": 3301, "token_acc": 0.27584519160424115 }, { "epoch": 1.9357959542656111, "grad_norm": 0.28380554924379486, "learning_rate": 0.0004970150013905033, "loss": 3.1881918907165527, "step": 3302, "token_acc": 0.2798804727728716 }, { "epoch": 1.9363822925828202, "grad_norm": 0.29065498707486437, "learning_rate": 0.0004970112670921881, "loss": 3.2109642028808594, "step": 3303, "token_acc": 0.2763192083954205 }, { "epoch": 1.9369686309000294, "grad_norm": 0.29055430772029495, "learning_rate": 0.0004970075304735348, "loss": 3.197960615158081, "step": 3304, "token_acc": 0.2802554975915564 }, { "epoch": 1.9375549692172385, "grad_norm": 0.2897630141462624, "learning_rate": 0.0004970037915345786, "loss": 3.2007431983947754, "step": 3305, "token_acc": 0.2771131116549829 }, { "epoch": 1.9381413075344474, "grad_norm": 0.32785179104180306, "learning_rate": 0.0004970000502753547, "loss": 3.197744846343994, "step": 3306, "token_acc": 0.2782602174340302 }, { "epoch": 1.9387276458516562, "grad_norm": 0.31800592376082026, "learning_rate": 0.0004969963066958982, "loss": 3.198416233062744, "step": 3307, "token_acc": 0.27837627386155384 }, { "epoch": 1.9393139841688654, "grad_norm": 0.28903748322924483, "learning_rate": 0.0004969925607962441, "loss": 3.223043918609619, "step": 3308, "token_acc": 0.273139188638252 }, { "epoch": 1.9399003224860745, "grad_norm": 0.3052801998129629, "learning_rate": 0.0004969888125764277, "loss": 3.238306999206543, "step": 3309, "token_acc": 0.27303367188459016 }, { "epoch": 1.9404866608032836, "grad_norm": 0.33452790440258146, "learning_rate": 0.0004969850620364843, "loss": 3.208237409591675, "step": 3310, "token_acc": 0.2768305229020988 }, { "epoch": 1.9410729991204925, "grad_norm": 0.36812283946652274, "learning_rate": 0.0004969813091764491, "loss": 3.1692399978637695, "step": 3311, "token_acc": 0.2813889330998992 }, { "epoch": 1.9416593374377016, "grad_norm": 0.33720104522161176, "learning_rate": 0.0004969775539963572, "loss": 3.2040629386901855, "step": 3312, "token_acc": 0.2767496828470321 }, { "epoch": 1.9422456757549105, "grad_norm": 0.31947603938939084, "learning_rate": 0.0004969737964962441, "loss": 3.2441258430480957, "step": 3313, "token_acc": 0.2727668801923972 }, { "epoch": 1.9428320140721196, "grad_norm": 0.3509928067453811, "learning_rate": 0.0004969700366761449, "loss": 3.2401251792907715, "step": 3314, "token_acc": 0.272113694827191 }, { "epoch": 1.9434183523893287, "grad_norm": 0.34754609259944846, "learning_rate": 0.000496966274536095, "loss": 3.2266173362731934, "step": 3315, "token_acc": 0.2746948820722852 }, { "epoch": 1.9440046907065378, "grad_norm": 0.3024852444176208, "learning_rate": 0.0004969625100761298, "loss": 3.191174030303955, "step": 3316, "token_acc": 0.2775932322374211 }, { "epoch": 1.9445910290237467, "grad_norm": 0.3244930690870069, "learning_rate": 0.0004969587432962846, "loss": 3.216667652130127, "step": 3317, "token_acc": 0.2730635711031872 }, { "epoch": 1.9451773673409556, "grad_norm": 0.30955125621802587, "learning_rate": 0.0004969549741965948, "loss": 3.219914197921753, "step": 3318, "token_acc": 0.27517256853247457 }, { "epoch": 1.9457637056581647, "grad_norm": 0.2979258843778903, "learning_rate": 0.0004969512027770957, "loss": 3.1957848072052, "step": 3319, "token_acc": 0.27913488014859883 }, { "epoch": 1.9463500439753738, "grad_norm": 0.32516634886121065, "learning_rate": 0.0004969474290378228, "loss": 3.207803726196289, "step": 3320, "token_acc": 0.27683386875324917 }, { "epoch": 1.946936382292583, "grad_norm": 0.36500612874311833, "learning_rate": 0.0004969436529788118, "loss": 3.145071029663086, "step": 3321, "token_acc": 0.28426602376777715 }, { "epoch": 1.9475227206097918, "grad_norm": 0.32889123234054396, "learning_rate": 0.0004969398746000977, "loss": 3.2044808864593506, "step": 3322, "token_acc": 0.2760641620620226 }, { "epoch": 1.948109058927001, "grad_norm": 0.2833699533231078, "learning_rate": 0.0004969360939017164, "loss": 3.1625518798828125, "step": 3323, "token_acc": 0.2824332458218608 }, { "epoch": 1.9486953972442098, "grad_norm": 0.34591906653089916, "learning_rate": 0.0004969323108837031, "loss": 3.2540786266326904, "step": 3324, "token_acc": 0.26991830951791557 }, { "epoch": 1.949281735561419, "grad_norm": 0.36505975183001654, "learning_rate": 0.0004969285255460936, "loss": 3.154824733734131, "step": 3325, "token_acc": 0.2851618913228015 }, { "epoch": 1.949868073878628, "grad_norm": 0.39321431884086505, "learning_rate": 0.0004969247378889232, "loss": 3.224813938140869, "step": 3326, "token_acc": 0.2738829935206467 }, { "epoch": 1.9504544121958372, "grad_norm": 0.3846119354312488, "learning_rate": 0.0004969209479122277, "loss": 3.1737301349639893, "step": 3327, "token_acc": 0.2799449518989482 }, { "epoch": 1.951040750513046, "grad_norm": 0.3861292516327503, "learning_rate": 0.0004969171556160428, "loss": 3.2148985862731934, "step": 3328, "token_acc": 0.27699554294975687 }, { "epoch": 1.951627088830255, "grad_norm": 0.36739579869917127, "learning_rate": 0.0004969133610004037, "loss": 3.2122507095336914, "step": 3329, "token_acc": 0.27743145424320853 }, { "epoch": 1.952213427147464, "grad_norm": 0.38452749301211436, "learning_rate": 0.0004969095640653464, "loss": 3.222891330718994, "step": 3330, "token_acc": 0.27428287128793627 }, { "epoch": 1.9527997654646732, "grad_norm": 0.37707284398612584, "learning_rate": 0.0004969057648109064, "loss": 3.214402675628662, "step": 3331, "token_acc": 0.27536929744171235 }, { "epoch": 1.9533861037818823, "grad_norm": 0.3179865726349459, "learning_rate": 0.0004969019632371195, "loss": 3.1555724143981934, "step": 3332, "token_acc": 0.28410300461242555 }, { "epoch": 1.9539724420990912, "grad_norm": 0.38914224092933397, "learning_rate": 0.0004968981593440213, "loss": 3.1918838024139404, "step": 3333, "token_acc": 0.27796196951466207 }, { "epoch": 1.9545587804163, "grad_norm": 0.32347663111405445, "learning_rate": 0.0004968943531316477, "loss": 3.2015063762664795, "step": 3334, "token_acc": 0.27894223645653793 }, { "epoch": 1.9551451187335092, "grad_norm": 0.34641286638790986, "learning_rate": 0.0004968905446000344, "loss": 3.2281012535095215, "step": 3335, "token_acc": 0.27331931470612547 }, { "epoch": 1.9557314570507183, "grad_norm": 0.327647333238988, "learning_rate": 0.000496886733749217, "loss": 3.228736639022827, "step": 3336, "token_acc": 0.2746170874333022 }, { "epoch": 1.9563177953679274, "grad_norm": 0.2810965615304208, "learning_rate": 0.0004968829205792314, "loss": 3.227494716644287, "step": 3337, "token_acc": 0.2731039494735081 }, { "epoch": 1.9569041336851363, "grad_norm": 0.35703498332410666, "learning_rate": 0.0004968791050901135, "loss": 3.213259696960449, "step": 3338, "token_acc": 0.276112208738764 }, { "epoch": 1.9574904720023454, "grad_norm": 0.34176390576775517, "learning_rate": 0.000496875287281899, "loss": 3.1860976219177246, "step": 3339, "token_acc": 0.27833402976439203 }, { "epoch": 1.9580768103195543, "grad_norm": 0.29346855712087233, "learning_rate": 0.000496871467154624, "loss": 3.2253170013427734, "step": 3340, "token_acc": 0.2725922176526416 }, { "epoch": 1.9586631486367634, "grad_norm": 0.317951599455007, "learning_rate": 0.0004968676447083242, "loss": 3.226778507232666, "step": 3341, "token_acc": 0.27360041738470536 }, { "epoch": 1.9592494869539725, "grad_norm": 0.34047863011127416, "learning_rate": 0.0004968638199430354, "loss": 3.243438959121704, "step": 3342, "token_acc": 0.2726002703920685 }, { "epoch": 1.9598358252711816, "grad_norm": 0.3634988281518864, "learning_rate": 0.0004968599928587937, "loss": 3.217463731765747, "step": 3343, "token_acc": 0.2742538514354375 }, { "epoch": 1.9604221635883905, "grad_norm": 0.4147377498027789, "learning_rate": 0.000496856163455635, "loss": 3.2335925102233887, "step": 3344, "token_acc": 0.27256705460309516 }, { "epoch": 1.9610085019055994, "grad_norm": 0.355690117363527, "learning_rate": 0.0004968523317335954, "loss": 3.230142831802368, "step": 3345, "token_acc": 0.27425719500284107 }, { "epoch": 1.9615948402228085, "grad_norm": 0.31344275682623896, "learning_rate": 0.0004968484976927108, "loss": 3.2032601833343506, "step": 3346, "token_acc": 0.2756693037145701 }, { "epoch": 1.9621811785400176, "grad_norm": 0.32633238479715965, "learning_rate": 0.0004968446613330171, "loss": 3.210252285003662, "step": 3347, "token_acc": 0.2768882879891206 }, { "epoch": 1.9627675168572267, "grad_norm": 0.3030159048685932, "learning_rate": 0.0004968408226545504, "loss": 3.1647582054138184, "step": 3348, "token_acc": 0.2814034808869109 }, { "epoch": 1.9633538551744356, "grad_norm": 0.3081547936709801, "learning_rate": 0.0004968369816573468, "loss": 3.1871068477630615, "step": 3349, "token_acc": 0.280452049963417 }, { "epoch": 1.9639401934916447, "grad_norm": 0.30437978813655314, "learning_rate": 0.0004968331383414425, "loss": 3.24863338470459, "step": 3350, "token_acc": 0.27110649987256163 }, { "epoch": 1.9645265318088536, "grad_norm": 0.3169628854740949, "learning_rate": 0.0004968292927068733, "loss": 3.1832611560821533, "step": 3351, "token_acc": 0.27870598907242156 }, { "epoch": 1.9651128701260627, "grad_norm": 0.33305862106269346, "learning_rate": 0.0004968254447536756, "loss": 3.210705280303955, "step": 3352, "token_acc": 0.2771752864161473 }, { "epoch": 1.9656992084432718, "grad_norm": 0.3200427322994808, "learning_rate": 0.0004968215944818854, "loss": 3.206730842590332, "step": 3353, "token_acc": 0.277724473872886 }, { "epoch": 1.966285546760481, "grad_norm": 0.2746980214314008, "learning_rate": 0.0004968177418915391, "loss": 3.2191219329833984, "step": 3354, "token_acc": 0.27454794939834665 }, { "epoch": 1.9668718850776898, "grad_norm": 0.2760257099332996, "learning_rate": 0.0004968138869826725, "loss": 3.145629405975342, "step": 3355, "token_acc": 0.2850058730274016 }, { "epoch": 1.9674582233948987, "grad_norm": 0.31299435501510564, "learning_rate": 0.0004968100297553221, "loss": 3.1890993118286133, "step": 3356, "token_acc": 0.2806143716799025 }, { "epoch": 1.9680445617121078, "grad_norm": 0.3066981450081714, "learning_rate": 0.000496806170209524, "loss": 3.2040491104125977, "step": 3357, "token_acc": 0.27722351431550246 }, { "epoch": 1.968630900029317, "grad_norm": 0.32411535703844885, "learning_rate": 0.0004968023083453146, "loss": 3.181764602661133, "step": 3358, "token_acc": 0.28032071897440414 }, { "epoch": 1.969217238346526, "grad_norm": 0.29889532064273516, "learning_rate": 0.00049679844416273, "loss": 3.1503005027770996, "step": 3359, "token_acc": 0.28449555289353445 }, { "epoch": 1.969803576663735, "grad_norm": 0.28801914020054753, "learning_rate": 0.0004967945776618066, "loss": 3.1829357147216797, "step": 3360, "token_acc": 0.2795733275155444 }, { "epoch": 1.9703899149809438, "grad_norm": 0.3222715569418526, "learning_rate": 0.0004967907088425808, "loss": 3.2364463806152344, "step": 3361, "token_acc": 0.27116048819888705 }, { "epoch": 1.970976253298153, "grad_norm": 0.29812338114054704, "learning_rate": 0.0004967868377050887, "loss": 3.233306884765625, "step": 3362, "token_acc": 0.27390893482224854 }, { "epoch": 1.971562591615362, "grad_norm": 0.2829867325858652, "learning_rate": 0.0004967829642493669, "loss": 3.2222745418548584, "step": 3363, "token_acc": 0.2749352398182603 }, { "epoch": 1.9721489299325712, "grad_norm": 0.27640288131647706, "learning_rate": 0.0004967790884754516, "loss": 3.2057414054870605, "step": 3364, "token_acc": 0.27539596370719144 }, { "epoch": 1.97273526824978, "grad_norm": 0.28448650152955157, "learning_rate": 0.0004967752103833793, "loss": 3.167477607727051, "step": 3365, "token_acc": 0.2818621399176955 }, { "epoch": 1.9733216065669892, "grad_norm": 0.29509026980304615, "learning_rate": 0.0004967713299731866, "loss": 3.187300205230713, "step": 3366, "token_acc": 0.28179575986565913 }, { "epoch": 1.973907944884198, "grad_norm": 0.2996013777367461, "learning_rate": 0.0004967674472449097, "loss": 3.2192134857177734, "step": 3367, "token_acc": 0.27411062034131295 }, { "epoch": 1.9744942832014072, "grad_norm": 0.3050592507734187, "learning_rate": 0.0004967635621985851, "loss": 3.25030255317688, "step": 3368, "token_acc": 0.270256284339118 }, { "epoch": 1.9750806215186163, "grad_norm": 0.3984322109294327, "learning_rate": 0.0004967596748342493, "loss": 3.1466012001037598, "step": 3369, "token_acc": 0.2875937261713124 }, { "epoch": 1.9756669598358254, "grad_norm": 0.5117423370039131, "learning_rate": 0.000496755785151939, "loss": 3.216395854949951, "step": 3370, "token_acc": 0.27599473900971155 }, { "epoch": 1.9762532981530343, "grad_norm": 0.49348780601224923, "learning_rate": 0.0004967518931516905, "loss": 3.2253334522247314, "step": 3371, "token_acc": 0.27389060939311655 }, { "epoch": 1.9768396364702432, "grad_norm": 0.3627416596201303, "learning_rate": 0.0004967479988335406, "loss": 3.173816204071045, "step": 3372, "token_acc": 0.28169048228306687 }, { "epoch": 1.9774259747874523, "grad_norm": 0.3239377702970308, "learning_rate": 0.0004967441021975256, "loss": 3.2274770736694336, "step": 3373, "token_acc": 0.27451598695920715 }, { "epoch": 1.9780123131046614, "grad_norm": 0.39310271409069536, "learning_rate": 0.0004967402032436824, "loss": 3.224817991256714, "step": 3374, "token_acc": 0.27354017392773605 }, { "epoch": 1.9785986514218705, "grad_norm": 0.39903641201492623, "learning_rate": 0.0004967363019720474, "loss": 3.2189559936523438, "step": 3375, "token_acc": 0.27535255159693334 }, { "epoch": 1.9791849897390794, "grad_norm": 0.44438675326628685, "learning_rate": 0.0004967323983826574, "loss": 3.2311859130859375, "step": 3376, "token_acc": 0.2739985560023399 }, { "epoch": 1.9797713280562885, "grad_norm": 0.3427214435933039, "learning_rate": 0.0004967284924755488, "loss": 3.239051580429077, "step": 3377, "token_acc": 0.2720193498908619 }, { "epoch": 1.9803576663734974, "grad_norm": 0.3639660971884832, "learning_rate": 0.0004967245842507587, "loss": 3.218960762023926, "step": 3378, "token_acc": 0.2745266602602395 }, { "epoch": 1.9809440046907065, "grad_norm": 0.33176068390089725, "learning_rate": 0.0004967206737083235, "loss": 3.216127634048462, "step": 3379, "token_acc": 0.2766560103788969 }, { "epoch": 1.9815303430079156, "grad_norm": 0.2702012507392123, "learning_rate": 0.00049671676084828, "loss": 3.1952576637268066, "step": 3380, "token_acc": 0.27673677973633537 }, { "epoch": 1.9821166813251248, "grad_norm": 0.2748472340894447, "learning_rate": 0.000496712845670665, "loss": 3.182241678237915, "step": 3381, "token_acc": 0.2789931799071969 }, { "epoch": 1.9827030196423336, "grad_norm": 0.2922055211023222, "learning_rate": 0.0004967089281755153, "loss": 3.206305980682373, "step": 3382, "token_acc": 0.2754251324390275 }, { "epoch": 1.9832893579595425, "grad_norm": 0.2968008884116049, "learning_rate": 0.0004967050083628676, "loss": 3.1692495346069336, "step": 3383, "token_acc": 0.28164042285798774 }, { "epoch": 1.9838756962767516, "grad_norm": 0.26871514534168844, "learning_rate": 0.0004967010862327589, "loss": 3.2181644439697266, "step": 3384, "token_acc": 0.2765381032330724 }, { "epoch": 1.9844620345939608, "grad_norm": 0.3035574130477789, "learning_rate": 0.0004966971617852259, "loss": 3.1683802604675293, "step": 3385, "token_acc": 0.28147756095565374 }, { "epoch": 1.9850483729111699, "grad_norm": 0.32604001089114637, "learning_rate": 0.0004966932350203054, "loss": 3.2157745361328125, "step": 3386, "token_acc": 0.2750828166690298 }, { "epoch": 1.9856347112283788, "grad_norm": 0.3007621350925398, "learning_rate": 0.0004966893059380344, "loss": 3.19637393951416, "step": 3387, "token_acc": 0.27860962839474074 }, { "epoch": 1.9862210495455876, "grad_norm": 0.33411954408794264, "learning_rate": 0.0004966853745384499, "loss": 3.179593086242676, "step": 3388, "token_acc": 0.2811720551092507 }, { "epoch": 1.9868073878627968, "grad_norm": 0.3133667040489698, "learning_rate": 0.0004966814408215887, "loss": 3.204219341278076, "step": 3389, "token_acc": 0.2771576590981772 }, { "epoch": 1.9873937261800059, "grad_norm": 0.2858855748366626, "learning_rate": 0.0004966775047874876, "loss": 3.1873698234558105, "step": 3390, "token_acc": 0.2786249184605349 }, { "epoch": 1.987980064497215, "grad_norm": 0.2798331746046621, "learning_rate": 0.0004966735664361839, "loss": 3.19746470451355, "step": 3391, "token_acc": 0.27726318128197125 }, { "epoch": 1.9885664028144239, "grad_norm": 0.3135849821052749, "learning_rate": 0.0004966696257677144, "loss": 3.1850180625915527, "step": 3392, "token_acc": 0.2780155953036466 }, { "epoch": 1.989152741131633, "grad_norm": 0.31538329780212543, "learning_rate": 0.0004966656827821161, "loss": 3.2509994506835938, "step": 3393, "token_acc": 0.2710936985913069 }, { "epoch": 1.9897390794488419, "grad_norm": 0.2681813474883174, "learning_rate": 0.0004966617374794262, "loss": 3.2290990352630615, "step": 3394, "token_acc": 0.27330265117587693 }, { "epoch": 1.990325417766051, "grad_norm": 0.3434879539971122, "learning_rate": 0.0004966577898596815, "loss": 3.205097198486328, "step": 3395, "token_acc": 0.27613995271383157 }, { "epoch": 1.99091175608326, "grad_norm": 0.3232015180305107, "learning_rate": 0.0004966538399229194, "loss": 3.2253174781799316, "step": 3396, "token_acc": 0.2748125183292136 }, { "epoch": 1.9914980944004692, "grad_norm": 0.28593101575011953, "learning_rate": 0.0004966498876691768, "loss": 3.174898386001587, "step": 3397, "token_acc": 0.28204193444228026 }, { "epoch": 1.992084432717678, "grad_norm": 0.3010890692431842, "learning_rate": 0.0004966459330984909, "loss": 3.205470561981201, "step": 3398, "token_acc": 0.27923359134990294 }, { "epoch": 1.992670771034887, "grad_norm": 0.3071122834129306, "learning_rate": 0.0004966419762108988, "loss": 3.243103504180908, "step": 3399, "token_acc": 0.27229421130921205 }, { "epoch": 1.993257109352096, "grad_norm": 0.3840391763616149, "learning_rate": 0.0004966380170064376, "loss": 3.2434725761413574, "step": 3400, "token_acc": 0.2718017321730735 }, { "epoch": 1.9938434476693052, "grad_norm": 0.3445531193342586, "learning_rate": 0.0004966340554851447, "loss": 3.22159481048584, "step": 3401, "token_acc": 0.27383123122668707 }, { "epoch": 1.9944297859865143, "grad_norm": 0.38552247999754213, "learning_rate": 0.0004966300916470572, "loss": 3.241112232208252, "step": 3402, "token_acc": 0.272339960956894 }, { "epoch": 1.9950161243037232, "grad_norm": 0.40415804477175776, "learning_rate": 0.0004966261254922122, "loss": 3.226780652999878, "step": 3403, "token_acc": 0.27282641592135337 }, { "epoch": 1.9956024626209323, "grad_norm": 0.34206865696249206, "learning_rate": 0.0004966221570206472, "loss": 3.2067711353302, "step": 3404, "token_acc": 0.27706097328474877 }, { "epoch": 1.9961888009381412, "grad_norm": 0.32924262041078284, "learning_rate": 0.0004966181862323993, "loss": 3.2241430282592773, "step": 3405, "token_acc": 0.27307748772517443 }, { "epoch": 1.9967751392553503, "grad_norm": 0.40896761534670845, "learning_rate": 0.0004966142131275059, "loss": 3.2088308334350586, "step": 3406, "token_acc": 0.2755536346314505 }, { "epoch": 1.9973614775725594, "grad_norm": 0.36975675384336365, "learning_rate": 0.0004966102377060043, "loss": 3.2081775665283203, "step": 3407, "token_acc": 0.2762099053317595 }, { "epoch": 1.9979478158897686, "grad_norm": 0.30249492311660875, "learning_rate": 0.0004966062599679318, "loss": 3.2071213722229004, "step": 3408, "token_acc": 0.2771613230798523 }, { "epoch": 1.9985341542069774, "grad_norm": 0.3196843746497856, "learning_rate": 0.0004966022799133258, "loss": 3.1754231452941895, "step": 3409, "token_acc": 0.2816014527494837 }, { "epoch": 1.9991204925241863, "grad_norm": 0.32143911555271226, "learning_rate": 0.0004965982975422236, "loss": 3.1902358531951904, "step": 3410, "token_acc": 0.27771570086328695 }, { "epoch": 1.9997068308413954, "grad_norm": 0.3384666116808621, "learning_rate": 0.0004965943128546627, "loss": 3.1928365230560303, "step": 3411, "token_acc": 0.27727413783455174 }, { "epoch": 2.0, "grad_norm": 0.3350372638226426, "learning_rate": 0.0004965903258506806, "loss": 3.185788154602051, "step": 3412, "token_acc": 0.28105435754745106 }, { "epoch": 2.0, "eval_loss": 3.179800033569336, "eval_runtime": 6.4459, "eval_samples_per_second": 39.715, "eval_steps_per_second": 4.964, "eval_token_acc": 0.2796646736115392, "step": 3412 }, { "epoch": 2.000586338317209, "grad_norm": 0.35875580883392394, "learning_rate": 0.0004965863365303146, "loss": 3.1508355140686035, "step": 3413, "token_acc": 0.2829138676960799 }, { "epoch": 2.0011726766344182, "grad_norm": 0.33384211734752284, "learning_rate": 0.0004965823448936024, "loss": 3.1868162155151367, "step": 3414, "token_acc": 0.2782218622800467 }, { "epoch": 2.001759014951627, "grad_norm": 0.3597370072349644, "learning_rate": 0.0004965783509405812, "loss": 3.2320971488952637, "step": 3415, "token_acc": 0.27390920718388684 }, { "epoch": 2.002345353268836, "grad_norm": 0.40828956745933, "learning_rate": 0.0004965743546712887, "loss": 3.1918787956237793, "step": 3416, "token_acc": 0.27680324185397465 }, { "epoch": 2.002931691586045, "grad_norm": 0.3311832707862652, "learning_rate": 0.0004965703560857624, "loss": 3.1691036224365234, "step": 3417, "token_acc": 0.2807174138858005 }, { "epoch": 2.0035180299032542, "grad_norm": 0.33338729419960617, "learning_rate": 0.0004965663551840399, "loss": 3.133450508117676, "step": 3418, "token_acc": 0.28587711708537766 }, { "epoch": 2.0041043682204633, "grad_norm": 0.30465703089497426, "learning_rate": 0.0004965623519661587, "loss": 3.2127926349639893, "step": 3419, "token_acc": 0.2745030691448808 }, { "epoch": 2.0046907065376725, "grad_norm": 0.33295322789137144, "learning_rate": 0.0004965583464321564, "loss": 3.1746695041656494, "step": 3420, "token_acc": 0.2792896236602932 }, { "epoch": 2.005277044854881, "grad_norm": 0.29376194440567804, "learning_rate": 0.0004965543385820708, "loss": 3.181506633758545, "step": 3421, "token_acc": 0.27898057684743205 }, { "epoch": 2.0058633831720902, "grad_norm": 0.3072354569450418, "learning_rate": 0.0004965503284159393, "loss": 3.1919167041778564, "step": 3422, "token_acc": 0.27944150621849007 }, { "epoch": 2.0064497214892993, "grad_norm": 0.2861043073621896, "learning_rate": 0.0004965463159337998, "loss": 3.1077888011932373, "step": 3423, "token_acc": 0.28789548329325215 }, { "epoch": 2.0070360598065085, "grad_norm": 0.31513841047034075, "learning_rate": 0.0004965423011356898, "loss": 3.1577324867248535, "step": 3424, "token_acc": 0.28225988397077784 }, { "epoch": 2.0076223981237176, "grad_norm": 0.3103290881226257, "learning_rate": 0.0004965382840216472, "loss": 3.149661064147949, "step": 3425, "token_acc": 0.28335570009728883 }, { "epoch": 2.0082087364409262, "grad_norm": 0.3402066694432324, "learning_rate": 0.0004965342645917096, "loss": 3.1585540771484375, "step": 3426, "token_acc": 0.28156966559777713 }, { "epoch": 2.0087950747581353, "grad_norm": 0.39288743381694846, "learning_rate": 0.0004965302428459147, "loss": 3.1738882064819336, "step": 3427, "token_acc": 0.2801654300953563 }, { "epoch": 2.0093814130753445, "grad_norm": 0.387838202403588, "learning_rate": 0.0004965262187843005, "loss": 3.1637301445007324, "step": 3428, "token_acc": 0.2810338912178752 }, { "epoch": 2.0099677513925536, "grad_norm": 0.3240338473238948, "learning_rate": 0.0004965221924069046, "loss": 3.1559393405914307, "step": 3429, "token_acc": 0.28152189788636883 }, { "epoch": 2.0105540897097627, "grad_norm": 0.3421680580279285, "learning_rate": 0.0004965181637137649, "loss": 3.1266002655029297, "step": 3430, "token_acc": 0.2865291897509631 }, { "epoch": 2.0111404280269713, "grad_norm": 0.3030109667188654, "learning_rate": 0.0004965141327049193, "loss": 3.1224703788757324, "step": 3431, "token_acc": 0.28618385097047205 }, { "epoch": 2.0117267663441805, "grad_norm": 0.32424619233641283, "learning_rate": 0.0004965100993804055, "loss": 3.1342368125915527, "step": 3432, "token_acc": 0.28405660976074126 }, { "epoch": 2.0123131046613896, "grad_norm": 0.33764924128893886, "learning_rate": 0.0004965060637402616, "loss": 3.1585075855255127, "step": 3433, "token_acc": 0.2827173788153686 }, { "epoch": 2.0128994429785987, "grad_norm": 0.274633759943592, "learning_rate": 0.0004965020257845254, "loss": 3.138598918914795, "step": 3434, "token_acc": 0.2844765804254468 }, { "epoch": 2.013485781295808, "grad_norm": 0.3110274420573675, "learning_rate": 0.0004964979855132348, "loss": 3.1533985137939453, "step": 3435, "token_acc": 0.2837424542081928 }, { "epoch": 2.014072119613017, "grad_norm": 0.3091539342473717, "learning_rate": 0.0004964939429264277, "loss": 3.154952049255371, "step": 3436, "token_acc": 0.2829913978379935 }, { "epoch": 2.0146584579302256, "grad_norm": 0.3295448010968533, "learning_rate": 0.0004964898980241423, "loss": 3.134345293045044, "step": 3437, "token_acc": 0.2838138236916939 }, { "epoch": 2.0152447962474347, "grad_norm": 0.3442431660662871, "learning_rate": 0.0004964858508064164, "loss": 3.143533945083618, "step": 3438, "token_acc": 0.28328351404033186 }, { "epoch": 2.015831134564644, "grad_norm": 0.36977907461327286, "learning_rate": 0.000496481801273288, "loss": 3.1626033782958984, "step": 3439, "token_acc": 0.2804114847842604 }, { "epoch": 2.016417472881853, "grad_norm": 0.4070247361963261, "learning_rate": 0.0004964777494247953, "loss": 3.172485828399658, "step": 3440, "token_acc": 0.28201909553660404 }, { "epoch": 2.017003811199062, "grad_norm": 0.3562799194813373, "learning_rate": 0.0004964736952609763, "loss": 3.16035795211792, "step": 3441, "token_acc": 0.28164421269950507 }, { "epoch": 2.0175901495162707, "grad_norm": 0.3025547194537972, "learning_rate": 0.000496469638781869, "loss": 3.180424213409424, "step": 3442, "token_acc": 0.28117562716372485 }, { "epoch": 2.01817648783348, "grad_norm": 0.340688486249103, "learning_rate": 0.0004964655799875115, "loss": 3.2261693477630615, "step": 3443, "token_acc": 0.271766645765113 }, { "epoch": 2.018762826150689, "grad_norm": 0.29516423177617485, "learning_rate": 0.0004964615188779421, "loss": 3.1849887371063232, "step": 3444, "token_acc": 0.2779431541727738 }, { "epoch": 2.019349164467898, "grad_norm": 0.30390236103735796, "learning_rate": 0.0004964574554531989, "loss": 3.1781201362609863, "step": 3445, "token_acc": 0.277971381737439 }, { "epoch": 2.019935502785107, "grad_norm": 0.3420737946630496, "learning_rate": 0.0004964533897133199, "loss": 3.1747143268585205, "step": 3446, "token_acc": 0.2798935749736723 }, { "epoch": 2.0205218411023163, "grad_norm": 0.2993293046245871, "learning_rate": 0.0004964493216583435, "loss": 3.1464414596557617, "step": 3447, "token_acc": 0.2839758334429292 }, { "epoch": 2.021108179419525, "grad_norm": 0.31849663159498026, "learning_rate": 0.0004964452512883076, "loss": 3.1190826892852783, "step": 3448, "token_acc": 0.2883136415197747 }, { "epoch": 2.021694517736734, "grad_norm": 0.28680905052226563, "learning_rate": 0.0004964411786032509, "loss": 3.121431827545166, "step": 3449, "token_acc": 0.2888548245452585 }, { "epoch": 2.022280856053943, "grad_norm": 0.27241065565757416, "learning_rate": 0.0004964371036032113, "loss": 3.1582865715026855, "step": 3450, "token_acc": 0.2805362387205746 }, { "epoch": 2.0228671943711523, "grad_norm": 0.29258297918491294, "learning_rate": 0.0004964330262882271, "loss": 3.1589155197143555, "step": 3451, "token_acc": 0.28101455777471807 }, { "epoch": 2.0234535326883614, "grad_norm": 0.27361369502145405, "learning_rate": 0.0004964289466583369, "loss": 3.21328067779541, "step": 3452, "token_acc": 0.2759543284246309 }, { "epoch": 2.02403987100557, "grad_norm": 0.28047057240366896, "learning_rate": 0.0004964248647135787, "loss": 3.0913705825805664, "step": 3453, "token_acc": 0.29065486800556434 }, { "epoch": 2.024626209322779, "grad_norm": 0.29635496724541266, "learning_rate": 0.000496420780453991, "loss": 3.221021890640259, "step": 3454, "token_acc": 0.273634291429907 }, { "epoch": 2.0252125476399883, "grad_norm": 0.29690948410474016, "learning_rate": 0.000496416693879612, "loss": 3.1651062965393066, "step": 3455, "token_acc": 0.2801676267580051 }, { "epoch": 2.0257988859571974, "grad_norm": 0.2679877195880277, "learning_rate": 0.0004964126049904804, "loss": 3.1235530376434326, "step": 3456, "token_acc": 0.28820668563904345 }, { "epoch": 2.0263852242744065, "grad_norm": 0.31758540462184087, "learning_rate": 0.0004964085137866343, "loss": 3.13019061088562, "step": 3457, "token_acc": 0.28610102344007926 }, { "epoch": 2.026971562591615, "grad_norm": 0.3522954776138269, "learning_rate": 0.0004964044202681123, "loss": 3.17744779586792, "step": 3458, "token_acc": 0.27839479176759885 }, { "epoch": 2.0275579009088243, "grad_norm": 0.31616605168380135, "learning_rate": 0.0004964003244349528, "loss": 3.0942463874816895, "step": 3459, "token_acc": 0.29109055899017733 }, { "epoch": 2.0281442392260334, "grad_norm": 0.2933340531072689, "learning_rate": 0.0004963962262871942, "loss": 3.1837494373321533, "step": 3460, "token_acc": 0.27938652828443716 }, { "epoch": 2.0287305775432425, "grad_norm": 0.3661998329185042, "learning_rate": 0.0004963921258248752, "loss": 3.1639838218688965, "step": 3461, "token_acc": 0.27997121697890953 }, { "epoch": 2.0293169158604516, "grad_norm": 0.39801333982721926, "learning_rate": 0.0004963880230480341, "loss": 3.151569366455078, "step": 3462, "token_acc": 0.28339760417927096 }, { "epoch": 2.0299032541776607, "grad_norm": 0.2918972213657598, "learning_rate": 0.0004963839179567095, "loss": 3.1583714485168457, "step": 3463, "token_acc": 0.2836825000264654 }, { "epoch": 2.0304895924948694, "grad_norm": 0.31358455919066347, "learning_rate": 0.0004963798105509402, "loss": 3.1492512226104736, "step": 3464, "token_acc": 0.2833523272010275 }, { "epoch": 2.0310759308120785, "grad_norm": 0.37175887310118133, "learning_rate": 0.0004963757008307644, "loss": 3.1634984016418457, "step": 3465, "token_acc": 0.28234213781249834 }, { "epoch": 2.0316622691292876, "grad_norm": 0.2888300833681739, "learning_rate": 0.0004963715887962209, "loss": 3.1662116050720215, "step": 3466, "token_acc": 0.2824034289370771 }, { "epoch": 2.0322486074464967, "grad_norm": 0.30137999250448483, "learning_rate": 0.0004963674744473484, "loss": 3.1572279930114746, "step": 3467, "token_acc": 0.2819186571351892 }, { "epoch": 2.032834945763706, "grad_norm": 0.3308303974368115, "learning_rate": 0.0004963633577841854, "loss": 3.188631296157837, "step": 3468, "token_acc": 0.27630911856304774 }, { "epoch": 2.0334212840809145, "grad_norm": 0.3199950863406161, "learning_rate": 0.0004963592388067706, "loss": 3.1774768829345703, "step": 3469, "token_acc": 0.2794430258074568 }, { "epoch": 2.0340076223981236, "grad_norm": 0.2996700842341045, "learning_rate": 0.0004963551175151429, "loss": 3.1685638427734375, "step": 3470, "token_acc": 0.2798951888028434 }, { "epoch": 2.0345939607153327, "grad_norm": 0.28280763402728765, "learning_rate": 0.0004963509939093406, "loss": 3.1463968753814697, "step": 3471, "token_acc": 0.28357994952970866 }, { "epoch": 2.035180299032542, "grad_norm": 0.29933857702188066, "learning_rate": 0.0004963468679894027, "loss": 3.167685031890869, "step": 3472, "token_acc": 0.28046617817443326 }, { "epoch": 2.035766637349751, "grad_norm": 0.3709134440599054, "learning_rate": 0.0004963427397553682, "loss": 3.1859445571899414, "step": 3473, "token_acc": 0.27930743418190856 }, { "epoch": 2.03635297566696, "grad_norm": 0.37947904954862194, "learning_rate": 0.0004963386092072754, "loss": 3.1485390663146973, "step": 3474, "token_acc": 0.28306456408565295 }, { "epoch": 2.0369393139841687, "grad_norm": 0.32484343676740385, "learning_rate": 0.0004963344763451633, "loss": 3.1283059120178223, "step": 3475, "token_acc": 0.2866285309465025 }, { "epoch": 2.037525652301378, "grad_norm": 0.281079589854818, "learning_rate": 0.0004963303411690708, "loss": 3.159367084503174, "step": 3476, "token_acc": 0.2807503791231257 }, { "epoch": 2.038111990618587, "grad_norm": 0.3083643422651259, "learning_rate": 0.0004963262036790366, "loss": 3.123465061187744, "step": 3477, "token_acc": 0.2842507818870573 }, { "epoch": 2.038698328935796, "grad_norm": 0.3577096881189804, "learning_rate": 0.0004963220638750998, "loss": 3.1622657775878906, "step": 3478, "token_acc": 0.28208336330658007 }, { "epoch": 2.039284667253005, "grad_norm": 0.34129021682301103, "learning_rate": 0.000496317921757299, "loss": 3.171210289001465, "step": 3479, "token_acc": 0.2820871357079776 }, { "epoch": 2.039871005570214, "grad_norm": 0.30148110609353196, "learning_rate": 0.0004963137773256732, "loss": 3.1927154064178467, "step": 3480, "token_acc": 0.2766802441334645 }, { "epoch": 2.040457343887423, "grad_norm": 0.3439958800312842, "learning_rate": 0.0004963096305802614, "loss": 3.156517505645752, "step": 3481, "token_acc": 0.2843224671124022 }, { "epoch": 2.041043682204632, "grad_norm": 0.30938180630834594, "learning_rate": 0.0004963054815211026, "loss": 3.1485118865966797, "step": 3482, "token_acc": 0.28331045500109764 }, { "epoch": 2.041630020521841, "grad_norm": 0.3275798730834719, "learning_rate": 0.0004963013301482357, "loss": 3.1244630813598633, "step": 3483, "token_acc": 0.28709181621307733 }, { "epoch": 2.0422163588390503, "grad_norm": 0.3523921767713871, "learning_rate": 0.0004962971764616997, "loss": 3.193903923034668, "step": 3484, "token_acc": 0.2772761708416647 }, { "epoch": 2.042802697156259, "grad_norm": 0.2770416130014744, "learning_rate": 0.0004962930204615336, "loss": 3.181400775909424, "step": 3485, "token_acc": 0.2797170617877511 }, { "epoch": 2.043389035473468, "grad_norm": 0.3256875788238273, "learning_rate": 0.0004962888621477764, "loss": 3.154294490814209, "step": 3486, "token_acc": 0.2802842850562893 }, { "epoch": 2.043975373790677, "grad_norm": 0.28410716712179396, "learning_rate": 0.0004962847015204672, "loss": 3.1605420112609863, "step": 3487, "token_acc": 0.28230657215722743 }, { "epoch": 2.0445617121078863, "grad_norm": 0.31885282666731374, "learning_rate": 0.0004962805385796453, "loss": 3.162942409515381, "step": 3488, "token_acc": 0.2812634084547331 }, { "epoch": 2.0451480504250954, "grad_norm": 0.3517513111549293, "learning_rate": 0.0004962763733253494, "loss": 3.1280012130737305, "step": 3489, "token_acc": 0.2866785631158352 }, { "epoch": 2.0457343887423045, "grad_norm": 0.3013108459745572, "learning_rate": 0.0004962722057576189, "loss": 3.1190743446350098, "step": 3490, "token_acc": 0.288037540023171 }, { "epoch": 2.046320727059513, "grad_norm": 0.34340153046298844, "learning_rate": 0.0004962680358764929, "loss": 3.158818006515503, "step": 3491, "token_acc": 0.28323040885860307 }, { "epoch": 2.0469070653767223, "grad_norm": 0.3178031395919165, "learning_rate": 0.0004962638636820105, "loss": 3.175481081008911, "step": 3492, "token_acc": 0.2801295535569811 }, { "epoch": 2.0474934036939314, "grad_norm": 0.3494292597401866, "learning_rate": 0.0004962596891742111, "loss": 3.1425704956054688, "step": 3493, "token_acc": 0.28476312353501554 }, { "epoch": 2.0480797420111405, "grad_norm": 0.38791443768303097, "learning_rate": 0.0004962555123531336, "loss": 3.190159559249878, "step": 3494, "token_acc": 0.27901312513794996 }, { "epoch": 2.0486660803283496, "grad_norm": 0.35494919740835, "learning_rate": 0.0004962513332188174, "loss": 3.1541690826416016, "step": 3495, "token_acc": 0.2825747021863361 }, { "epoch": 2.0492524186455583, "grad_norm": 0.3114678285031747, "learning_rate": 0.0004962471517713018, "loss": 3.1423988342285156, "step": 3496, "token_acc": 0.28280823018807266 }, { "epoch": 2.0498387569627674, "grad_norm": 0.28853143094853734, "learning_rate": 0.0004962429680106261, "loss": 3.181385040283203, "step": 3497, "token_acc": 0.2780049947518911 }, { "epoch": 2.0504250952799765, "grad_norm": 0.3069139379075612, "learning_rate": 0.0004962387819368294, "loss": 3.1778111457824707, "step": 3498, "token_acc": 0.2801535567416222 }, { "epoch": 2.0510114335971856, "grad_norm": 0.24579750477275658, "learning_rate": 0.0004962345935499512, "loss": 3.1404266357421875, "step": 3499, "token_acc": 0.285482450160974 }, { "epoch": 2.0515977719143947, "grad_norm": 0.29266988651919235, "learning_rate": 0.0004962304028500309, "loss": 3.1500110626220703, "step": 3500, "token_acc": 0.2822400437965042 }, { "epoch": 2.052184110231604, "grad_norm": 0.31269373322578076, "learning_rate": 0.0004962262098371075, "loss": 3.150394916534424, "step": 3501, "token_acc": 0.28254694566209804 }, { "epoch": 2.0527704485488125, "grad_norm": 0.3038688679674328, "learning_rate": 0.0004962220145112209, "loss": 3.1210784912109375, "step": 3502, "token_acc": 0.2876300899262403 }, { "epoch": 2.0533567868660216, "grad_norm": 0.2624599682869427, "learning_rate": 0.0004962178168724102, "loss": 3.1782212257385254, "step": 3503, "token_acc": 0.27897679958679866 }, { "epoch": 2.0539431251832307, "grad_norm": 0.3358499889499518, "learning_rate": 0.0004962136169207148, "loss": 3.1585726737976074, "step": 3504, "token_acc": 0.28222849497541785 }, { "epoch": 2.05452946350044, "grad_norm": 0.31958405395188955, "learning_rate": 0.0004962094146561744, "loss": 3.185286521911621, "step": 3505, "token_acc": 0.27590871064338823 }, { "epoch": 2.055115801817649, "grad_norm": 0.2786987652964806, "learning_rate": 0.0004962052100788282, "loss": 3.138186454772949, "step": 3506, "token_acc": 0.2839263448034476 }, { "epoch": 2.0557021401348576, "grad_norm": 0.3549713155772115, "learning_rate": 0.0004962010031887159, "loss": 3.1547603607177734, "step": 3507, "token_acc": 0.28420984062187865 }, { "epoch": 2.0562884784520667, "grad_norm": 0.3284620560610857, "learning_rate": 0.000496196793985877, "loss": 3.178173542022705, "step": 3508, "token_acc": 0.2805207348863294 }, { "epoch": 2.056874816769276, "grad_norm": 0.3049871511294685, "learning_rate": 0.0004961925824703508, "loss": 3.174515724182129, "step": 3509, "token_acc": 0.27711550688078096 }, { "epoch": 2.057461155086485, "grad_norm": 0.3469896875869796, "learning_rate": 0.0004961883686421772, "loss": 3.1745896339416504, "step": 3510, "token_acc": 0.27968247277122754 }, { "epoch": 2.058047493403694, "grad_norm": 0.3471731869961092, "learning_rate": 0.0004961841525013955, "loss": 3.133080244064331, "step": 3511, "token_acc": 0.28482637167410346 }, { "epoch": 2.0586338317209028, "grad_norm": 0.37933898487936735, "learning_rate": 0.0004961799340480454, "loss": 3.1512584686279297, "step": 3512, "token_acc": 0.283558724377107 }, { "epoch": 2.059220170038112, "grad_norm": 0.3543709774917021, "learning_rate": 0.0004961757132821667, "loss": 3.1309523582458496, "step": 3513, "token_acc": 0.2842828967691345 }, { "epoch": 2.059806508355321, "grad_norm": 0.3068409480636232, "learning_rate": 0.0004961714902037988, "loss": 3.1454062461853027, "step": 3514, "token_acc": 0.2830581239999789 }, { "epoch": 2.06039284667253, "grad_norm": 0.3046654902798207, "learning_rate": 0.0004961672648129815, "loss": 3.1560494899749756, "step": 3515, "token_acc": 0.28205461087236444 }, { "epoch": 2.060979184989739, "grad_norm": 0.3368244147021095, "learning_rate": 0.0004961630371097544, "loss": 3.1810343265533447, "step": 3516, "token_acc": 0.2774701115973284 }, { "epoch": 2.0615655233069483, "grad_norm": 0.3701660986100902, "learning_rate": 0.0004961588070941573, "loss": 3.149977922439575, "step": 3517, "token_acc": 0.28391301432395766 }, { "epoch": 2.062151861624157, "grad_norm": 0.33764286712926145, "learning_rate": 0.0004961545747662299, "loss": 3.1634271144866943, "step": 3518, "token_acc": 0.2803246937616614 }, { "epoch": 2.062738199941366, "grad_norm": 0.29929017768404687, "learning_rate": 0.000496150340126012, "loss": 3.1488876342773438, "step": 3519, "token_acc": 0.283398571952586 }, { "epoch": 2.063324538258575, "grad_norm": 0.3860074943447778, "learning_rate": 0.0004961461031735433, "loss": 3.162026882171631, "step": 3520, "token_acc": 0.2818660044054325 }, { "epoch": 2.0639108765757843, "grad_norm": 0.40322662307535806, "learning_rate": 0.0004961418639088637, "loss": 3.1602020263671875, "step": 3521, "token_acc": 0.28260213416670005 }, { "epoch": 2.0644972148929934, "grad_norm": 0.3063372995425531, "learning_rate": 0.000496137622332013, "loss": 3.168941020965576, "step": 3522, "token_acc": 0.2815684937792519 }, { "epoch": 2.065083553210202, "grad_norm": 0.2740548937315513, "learning_rate": 0.000496133378443031, "loss": 3.1793575286865234, "step": 3523, "token_acc": 0.27917914748038974 }, { "epoch": 2.065669891527411, "grad_norm": 0.3498037039136486, "learning_rate": 0.0004961291322419575, "loss": 3.126283884048462, "step": 3524, "token_acc": 0.2856138235885071 }, { "epoch": 2.0662562298446203, "grad_norm": 0.30430644091772163, "learning_rate": 0.0004961248837288325, "loss": 3.2212233543395996, "step": 3525, "token_acc": 0.27320839130316177 }, { "epoch": 2.0668425681618294, "grad_norm": 0.3080976696911605, "learning_rate": 0.0004961206329036959, "loss": 3.1706457138061523, "step": 3526, "token_acc": 0.2791342081873791 }, { "epoch": 2.0674289064790385, "grad_norm": 0.31202474498579247, "learning_rate": 0.0004961163797665876, "loss": 3.138956069946289, "step": 3527, "token_acc": 0.28454223579068366 }, { "epoch": 2.068015244796247, "grad_norm": 0.33213744345953095, "learning_rate": 0.0004961121243175476, "loss": 3.1391143798828125, "step": 3528, "token_acc": 0.2841513135470845 }, { "epoch": 2.0686015831134563, "grad_norm": 0.2985902388928904, "learning_rate": 0.0004961078665566158, "loss": 3.17056941986084, "step": 3529, "token_acc": 0.28063272948397777 }, { "epoch": 2.0691879214306654, "grad_norm": 0.2684457845970054, "learning_rate": 0.0004961036064838321, "loss": 3.145906448364258, "step": 3530, "token_acc": 0.2838580320643757 }, { "epoch": 2.0697742597478745, "grad_norm": 0.2915488975426558, "learning_rate": 0.0004960993440992368, "loss": 3.174548625946045, "step": 3531, "token_acc": 0.28032516531357876 }, { "epoch": 2.0703605980650837, "grad_norm": 0.306815196158011, "learning_rate": 0.0004960950794028698, "loss": 3.2025833129882812, "step": 3532, "token_acc": 0.27622380369897925 }, { "epoch": 2.0709469363822928, "grad_norm": 0.3544692014168742, "learning_rate": 0.0004960908123947711, "loss": 3.1268396377563477, "step": 3533, "token_acc": 0.28515680866278476 }, { "epoch": 2.0715332746995014, "grad_norm": 0.40542279443083007, "learning_rate": 0.0004960865430749808, "loss": 3.176687717437744, "step": 3534, "token_acc": 0.28045908739691033 }, { "epoch": 2.0721196130167105, "grad_norm": 0.3339832266301935, "learning_rate": 0.000496082271443539, "loss": 3.155947208404541, "step": 3535, "token_acc": 0.2816314567335996 }, { "epoch": 2.0727059513339197, "grad_norm": 0.2993645985591695, "learning_rate": 0.000496077997500486, "loss": 3.1546711921691895, "step": 3536, "token_acc": 0.283005465047547 }, { "epoch": 2.0732922896511288, "grad_norm": 0.3072473793823034, "learning_rate": 0.0004960737212458617, "loss": 3.1387758255004883, "step": 3537, "token_acc": 0.285692081575668 }, { "epoch": 2.073878627968338, "grad_norm": 0.2660573505225701, "learning_rate": 0.0004960694426797064, "loss": 3.1558258533477783, "step": 3538, "token_acc": 0.28303490010776755 }, { "epoch": 2.0744649662855466, "grad_norm": 0.2920108384327116, "learning_rate": 0.0004960651618020602, "loss": 3.1421775817871094, "step": 3539, "token_acc": 0.2820939552776611 }, { "epoch": 2.0750513046027557, "grad_norm": 0.3179261860005554, "learning_rate": 0.0004960608786129634, "loss": 3.165583610534668, "step": 3540, "token_acc": 0.28070678003619837 }, { "epoch": 2.0756376429199648, "grad_norm": 0.27350676725042145, "learning_rate": 0.0004960565931124563, "loss": 3.153470516204834, "step": 3541, "token_acc": 0.2822707543082151 }, { "epoch": 2.076223981237174, "grad_norm": 0.29532435586417777, "learning_rate": 0.0004960523053005791, "loss": 3.154959201812744, "step": 3542, "token_acc": 0.2834544182265351 }, { "epoch": 2.076810319554383, "grad_norm": 0.3031581951789253, "learning_rate": 0.000496048015177372, "loss": 3.1637754440307617, "step": 3543, "token_acc": 0.2828321482762035 }, { "epoch": 2.077396657871592, "grad_norm": 0.3064542326024228, "learning_rate": 0.0004960437227428754, "loss": 3.1934001445770264, "step": 3544, "token_acc": 0.2777947574461752 }, { "epoch": 2.077982996188801, "grad_norm": 0.35160188302267953, "learning_rate": 0.0004960394279971295, "loss": 3.1339306831359863, "step": 3545, "token_acc": 0.2846268569497623 }, { "epoch": 2.07856933450601, "grad_norm": 0.2764336814962135, "learning_rate": 0.0004960351309401746, "loss": 3.1865100860595703, "step": 3546, "token_acc": 0.2771878366745192 }, { "epoch": 2.079155672823219, "grad_norm": 0.3510090452699469, "learning_rate": 0.0004960308315720514, "loss": 3.1798365116119385, "step": 3547, "token_acc": 0.27968411953947736 }, { "epoch": 2.079742011140428, "grad_norm": 0.3799781455739456, "learning_rate": 0.0004960265298928, "loss": 3.182807683944702, "step": 3548, "token_acc": 0.27887597351873333 }, { "epoch": 2.0803283494576372, "grad_norm": 0.36406411439377667, "learning_rate": 0.0004960222259024608, "loss": 3.1975419521331787, "step": 3549, "token_acc": 0.275740077326023 }, { "epoch": 2.080914687774846, "grad_norm": 0.3474623785513643, "learning_rate": 0.0004960179196010743, "loss": 3.159410238265991, "step": 3550, "token_acc": 0.28215285385521244 }, { "epoch": 2.081501026092055, "grad_norm": 0.32115604879709164, "learning_rate": 0.0004960136109886811, "loss": 3.1398887634277344, "step": 3551, "token_acc": 0.28241987058372836 }, { "epoch": 2.082087364409264, "grad_norm": 0.29639531791187373, "learning_rate": 0.0004960093000653214, "loss": 3.167410373687744, "step": 3552, "token_acc": 0.2817633710178929 }, { "epoch": 2.0826737027264732, "grad_norm": 0.3083883557202871, "learning_rate": 0.0004960049868310359, "loss": 3.1498327255249023, "step": 3553, "token_acc": 0.2834377138045366 }, { "epoch": 2.0832600410436823, "grad_norm": 0.3155841884946807, "learning_rate": 0.000496000671285865, "loss": 3.1379475593566895, "step": 3554, "token_acc": 0.2839919152669842 }, { "epoch": 2.0838463793608915, "grad_norm": 0.24933017086674397, "learning_rate": 0.0004959963534298494, "loss": 3.1477770805358887, "step": 3555, "token_acc": 0.28201478237065425 }, { "epoch": 2.0844327176781, "grad_norm": 0.2934278805987186, "learning_rate": 0.0004959920332630295, "loss": 3.1574227809906006, "step": 3556, "token_acc": 0.28191513598559065 }, { "epoch": 2.0850190559953092, "grad_norm": 0.29783189289246326, "learning_rate": 0.0004959877107854458, "loss": 3.1660821437835693, "step": 3557, "token_acc": 0.2796514232842247 }, { "epoch": 2.0856053943125183, "grad_norm": 0.30523193007804816, "learning_rate": 0.0004959833859971391, "loss": 3.1543831825256348, "step": 3558, "token_acc": 0.28249522393547716 }, { "epoch": 2.0861917326297275, "grad_norm": 0.26259297331144965, "learning_rate": 0.0004959790588981499, "loss": 3.149477481842041, "step": 3559, "token_acc": 0.2828397608475228 }, { "epoch": 2.0867780709469366, "grad_norm": 0.28941539483323775, "learning_rate": 0.000495974729488519, "loss": 3.1689987182617188, "step": 3560, "token_acc": 0.2808373798946172 }, { "epoch": 2.0873644092641452, "grad_norm": 0.2795003281244116, "learning_rate": 0.000495970397768287, "loss": 3.1483707427978516, "step": 3561, "token_acc": 0.28325412386232596 }, { "epoch": 2.0879507475813543, "grad_norm": 0.2705318757566965, "learning_rate": 0.0004959660637374945, "loss": 3.1755082607269287, "step": 3562, "token_acc": 0.27968984141700537 }, { "epoch": 2.0885370858985635, "grad_norm": 0.25696692362425827, "learning_rate": 0.0004959617273961822, "loss": 3.164635181427002, "step": 3563, "token_acc": 0.2793328491468137 }, { "epoch": 2.0891234242157726, "grad_norm": 0.339522279747748, "learning_rate": 0.0004959573887443911, "loss": 3.1535964012145996, "step": 3564, "token_acc": 0.28317065407670683 }, { "epoch": 2.0897097625329817, "grad_norm": 0.46136784136540854, "learning_rate": 0.0004959530477821615, "loss": 3.2367866039276123, "step": 3565, "token_acc": 0.2708910190019234 }, { "epoch": 2.0902961008501904, "grad_norm": 0.28940370952792466, "learning_rate": 0.0004959487045095347, "loss": 3.06662917137146, "step": 3566, "token_acc": 0.29549919431384236 }, { "epoch": 2.0908824391673995, "grad_norm": 0.3596063211600425, "learning_rate": 0.0004959443589265511, "loss": 3.178691864013672, "step": 3567, "token_acc": 0.28048094429556286 }, { "epoch": 2.0914687774846086, "grad_norm": 0.3529460929843656, "learning_rate": 0.0004959400110332517, "loss": 3.179659128189087, "step": 3568, "token_acc": 0.2797587925685803 }, { "epoch": 2.0920551158018177, "grad_norm": 0.38039943810288573, "learning_rate": 0.0004959356608296773, "loss": 3.133793354034424, "step": 3569, "token_acc": 0.28552357778366516 }, { "epoch": 2.092641454119027, "grad_norm": 0.2592183517906237, "learning_rate": 0.0004959313083158687, "loss": 3.142136335372925, "step": 3570, "token_acc": 0.2843974802087154 }, { "epoch": 2.093227792436236, "grad_norm": 0.40800673727403725, "learning_rate": 0.000495926953491867, "loss": 3.1452178955078125, "step": 3571, "token_acc": 0.28454228266677845 }, { "epoch": 2.0938141307534446, "grad_norm": 0.5210142107830779, "learning_rate": 0.0004959225963577129, "loss": 3.1473183631896973, "step": 3572, "token_acc": 0.2847555819434699 }, { "epoch": 2.0944004690706537, "grad_norm": 0.4145348629738687, "learning_rate": 0.0004959182369134473, "loss": 3.1342897415161133, "step": 3573, "token_acc": 0.2858269808395051 }, { "epoch": 2.094986807387863, "grad_norm": 0.3583098050781187, "learning_rate": 0.0004959138751591114, "loss": 3.1593070030212402, "step": 3574, "token_acc": 0.2806715419886219 }, { "epoch": 2.095573145705072, "grad_norm": 0.4167269287556698, "learning_rate": 0.000495909511094746, "loss": 3.132384777069092, "step": 3575, "token_acc": 0.2853847525268456 }, { "epoch": 2.096159484022281, "grad_norm": 0.38581607980149635, "learning_rate": 0.000495905144720392, "loss": 3.1706948280334473, "step": 3576, "token_acc": 0.2801888092964837 }, { "epoch": 2.0967458223394897, "grad_norm": 0.3531191538732225, "learning_rate": 0.0004959007760360905, "loss": 3.1476900577545166, "step": 3577, "token_acc": 0.28522986372554215 }, { "epoch": 2.097332160656699, "grad_norm": 0.3232527685131376, "learning_rate": 0.0004958964050418826, "loss": 3.1262009143829346, "step": 3578, "token_acc": 0.2871987782578938 }, { "epoch": 2.097918498973908, "grad_norm": 0.3258137224259238, "learning_rate": 0.0004958920317378094, "loss": 3.2171719074249268, "step": 3579, "token_acc": 0.2717514244295644 }, { "epoch": 2.098504837291117, "grad_norm": 0.3594676970055056, "learning_rate": 0.0004958876561239118, "loss": 3.102546453475952, "step": 3580, "token_acc": 0.28985525833168746 }, { "epoch": 2.099091175608326, "grad_norm": 0.3204960573877294, "learning_rate": 0.0004958832782002312, "loss": 3.133686065673828, "step": 3581, "token_acc": 0.28564496743996237 }, { "epoch": 2.099677513925535, "grad_norm": 0.34211683648549973, "learning_rate": 0.0004958788979668084, "loss": 3.171351909637451, "step": 3582, "token_acc": 0.2789041022047329 }, { "epoch": 2.100263852242744, "grad_norm": 0.3110129179509117, "learning_rate": 0.0004958745154236846, "loss": 3.1835148334503174, "step": 3583, "token_acc": 0.278656575312802 }, { "epoch": 2.100850190559953, "grad_norm": 0.31979236952360013, "learning_rate": 0.0004958701305709011, "loss": 3.1338422298431396, "step": 3584, "token_acc": 0.28550406452699856 }, { "epoch": 2.101436528877162, "grad_norm": 0.2713446825020389, "learning_rate": 0.0004958657434084992, "loss": 3.0960206985473633, "step": 3585, "token_acc": 0.289884119710653 }, { "epoch": 2.1020228671943713, "grad_norm": 0.31386981074298553, "learning_rate": 0.0004958613539365197, "loss": 3.1683120727539062, "step": 3586, "token_acc": 0.2804749553168407 }, { "epoch": 2.1026092055115804, "grad_norm": 0.3113756281513712, "learning_rate": 0.0004958569621550044, "loss": 3.1868560314178467, "step": 3587, "token_acc": 0.27897110503192457 }, { "epoch": 2.103195543828789, "grad_norm": 0.28771240902428363, "learning_rate": 0.0004958525680639939, "loss": 3.1838254928588867, "step": 3588, "token_acc": 0.2782339283792303 }, { "epoch": 2.103781882145998, "grad_norm": 0.33582880463286785, "learning_rate": 0.00049584817166353, "loss": 3.1608667373657227, "step": 3589, "token_acc": 0.280810848609478 }, { "epoch": 2.1043682204632073, "grad_norm": 0.3113992649087185, "learning_rate": 0.0004958437729536537, "loss": 3.1745858192443848, "step": 3590, "token_acc": 0.2797252675357463 }, { "epoch": 2.1049545587804164, "grad_norm": 0.3189441803697222, "learning_rate": 0.0004958393719344065, "loss": 3.194584846496582, "step": 3591, "token_acc": 0.2774452516319225 }, { "epoch": 2.1055408970976255, "grad_norm": 0.3035321778415073, "learning_rate": 0.0004958349686058297, "loss": 3.209015130996704, "step": 3592, "token_acc": 0.2754370337671483 }, { "epoch": 2.106127235414834, "grad_norm": 0.31401803937374556, "learning_rate": 0.0004958305629679646, "loss": 3.13073992729187, "step": 3593, "token_acc": 0.28734924438138326 }, { "epoch": 2.1067135737320433, "grad_norm": 0.30112332551751264, "learning_rate": 0.0004958261550208527, "loss": 3.1646361351013184, "step": 3594, "token_acc": 0.2802009533053609 }, { "epoch": 2.1072999120492524, "grad_norm": 0.30834455995259646, "learning_rate": 0.0004958217447645352, "loss": 3.201669216156006, "step": 3595, "token_acc": 0.2763970073421518 }, { "epoch": 2.1078862503664615, "grad_norm": 0.27898882328719726, "learning_rate": 0.0004958173321990537, "loss": 3.1747212409973145, "step": 3596, "token_acc": 0.2801926070380276 }, { "epoch": 2.1084725886836706, "grad_norm": 0.2703488550011949, "learning_rate": 0.0004958129173244497, "loss": 3.1584572792053223, "step": 3597, "token_acc": 0.28158012828973417 }, { "epoch": 2.1090589270008797, "grad_norm": 0.3623812975691925, "learning_rate": 0.0004958085001407644, "loss": 3.1583163738250732, "step": 3598, "token_acc": 0.28201280723629985 }, { "epoch": 2.1096452653180884, "grad_norm": 0.3360966740027526, "learning_rate": 0.0004958040806480397, "loss": 3.2056188583374023, "step": 3599, "token_acc": 0.27424639920362515 }, { "epoch": 2.1102316036352975, "grad_norm": 0.30144084590852394, "learning_rate": 0.0004957996588463167, "loss": 3.173847198486328, "step": 3600, "token_acc": 0.27989988316185643 }, { "epoch": 2.1108179419525066, "grad_norm": 0.30611084384579584, "learning_rate": 0.0004957952347356371, "loss": 3.1444382667541504, "step": 3601, "token_acc": 0.28286195722618274 }, { "epoch": 2.1114042802697157, "grad_norm": 0.31972592019080787, "learning_rate": 0.0004957908083160426, "loss": 3.113243818283081, "step": 3602, "token_acc": 0.2865525672371638 }, { "epoch": 2.111990618586925, "grad_norm": 0.3157037691374511, "learning_rate": 0.0004957863795875747, "loss": 3.1432249546051025, "step": 3603, "token_acc": 0.28469477923094266 }, { "epoch": 2.1125769569041335, "grad_norm": 0.3171980833511124, "learning_rate": 0.0004957819485502748, "loss": 3.095344066619873, "step": 3604, "token_acc": 0.291866167322451 }, { "epoch": 2.1131632952213426, "grad_norm": 0.3839340606490749, "learning_rate": 0.0004957775152041848, "loss": 3.1960833072662354, "step": 3605, "token_acc": 0.2798559856952234 }, { "epoch": 2.1137496335385517, "grad_norm": 0.27954622521272043, "learning_rate": 0.0004957730795493463, "loss": 3.1282520294189453, "step": 3606, "token_acc": 0.2867455249322326 }, { "epoch": 2.114335971855761, "grad_norm": 0.3793961575328709, "learning_rate": 0.0004957686415858008, "loss": 3.144864082336426, "step": 3607, "token_acc": 0.2828337634151787 }, { "epoch": 2.11492231017297, "grad_norm": 0.3101020668560908, "learning_rate": 0.0004957642013135901, "loss": 3.1440272331237793, "step": 3608, "token_acc": 0.28453413267672983 }, { "epoch": 2.115508648490179, "grad_norm": 0.27413482513032816, "learning_rate": 0.0004957597587327559, "loss": 3.145477771759033, "step": 3609, "token_acc": 0.2833999958227161 }, { "epoch": 2.1160949868073877, "grad_norm": 0.3080450288299567, "learning_rate": 0.00049575531384334, "loss": 3.1852211952209473, "step": 3610, "token_acc": 0.2797388689876629 }, { "epoch": 2.116681325124597, "grad_norm": 0.3006427922976153, "learning_rate": 0.0004957508666453839, "loss": 3.1854848861694336, "step": 3611, "token_acc": 0.27715569236996346 }, { "epoch": 2.117267663441806, "grad_norm": 0.2826035035745967, "learning_rate": 0.0004957464171389298, "loss": 3.1420111656188965, "step": 3612, "token_acc": 0.28245700399290496 }, { "epoch": 2.117854001759015, "grad_norm": 0.2986819921812465, "learning_rate": 0.0004957419653240191, "loss": 3.1110424995422363, "step": 3613, "token_acc": 0.2881288391005592 }, { "epoch": 2.118440340076224, "grad_norm": 0.33872078718671045, "learning_rate": 0.0004957375112006939, "loss": 3.1691362857818604, "step": 3614, "token_acc": 0.2817671102385683 }, { "epoch": 2.119026678393433, "grad_norm": 0.3128798013303021, "learning_rate": 0.0004957330547689958, "loss": 3.1248011589050293, "step": 3615, "token_acc": 0.2872706167171687 }, { "epoch": 2.119613016710642, "grad_norm": 0.2683150275348664, "learning_rate": 0.0004957285960289668, "loss": 3.1205973625183105, "step": 3616, "token_acc": 0.28713031255301447 }, { "epoch": 2.120199355027851, "grad_norm": 0.37653159990061064, "learning_rate": 0.0004957241349806487, "loss": 3.190948247909546, "step": 3617, "token_acc": 0.27640197118083715 }, { "epoch": 2.12078569334506, "grad_norm": 0.37129927110371946, "learning_rate": 0.0004957196716240836, "loss": 3.159011125564575, "step": 3618, "token_acc": 0.284125786163522 }, { "epoch": 2.1213720316622693, "grad_norm": 0.28785420549461566, "learning_rate": 0.0004957152059593133, "loss": 3.1618053913116455, "step": 3619, "token_acc": 0.280343362025396 }, { "epoch": 2.121958369979478, "grad_norm": 0.3273046544280014, "learning_rate": 0.0004957107379863797, "loss": 3.1540026664733887, "step": 3620, "token_acc": 0.28354251697083765 }, { "epoch": 2.122544708296687, "grad_norm": 0.3766075653031307, "learning_rate": 0.0004957062677053248, "loss": 3.1884827613830566, "step": 3621, "token_acc": 0.2775796556921183 }, { "epoch": 2.123131046613896, "grad_norm": 0.3484366317028475, "learning_rate": 0.0004957017951161906, "loss": 3.1318821907043457, "step": 3622, "token_acc": 0.28481793923603815 }, { "epoch": 2.1237173849311053, "grad_norm": 0.31774014289134156, "learning_rate": 0.0004956973202190192, "loss": 3.158658742904663, "step": 3623, "token_acc": 0.28263547583279985 }, { "epoch": 2.1243037232483144, "grad_norm": 0.3191124932942135, "learning_rate": 0.0004956928430138525, "loss": 3.150526523590088, "step": 3624, "token_acc": 0.2831809827093194 }, { "epoch": 2.1248900615655235, "grad_norm": 0.29894313388811217, "learning_rate": 0.0004956883635007325, "loss": 3.1347312927246094, "step": 3625, "token_acc": 0.2857051150026347 }, { "epoch": 2.125476399882732, "grad_norm": 0.31653078698668086, "learning_rate": 0.0004956838816797016, "loss": 3.1349759101867676, "step": 3626, "token_acc": 0.28595034970409655 }, { "epoch": 2.1260627381999413, "grad_norm": 0.34848202015218516, "learning_rate": 0.0004956793975508016, "loss": 3.0926027297973633, "step": 3627, "token_acc": 0.2918586707674923 }, { "epoch": 2.1266490765171504, "grad_norm": 0.31897901469524426, "learning_rate": 0.0004956749111140747, "loss": 3.144193649291992, "step": 3628, "token_acc": 0.2845022118656995 }, { "epoch": 2.1272354148343595, "grad_norm": 0.29412849638164157, "learning_rate": 0.0004956704223695631, "loss": 3.140152931213379, "step": 3629, "token_acc": 0.2841451329932569 }, { "epoch": 2.1278217531515686, "grad_norm": 0.3090549375247492, "learning_rate": 0.0004956659313173089, "loss": 3.1677536964416504, "step": 3630, "token_acc": 0.2803928138238052 }, { "epoch": 2.1284080914687773, "grad_norm": 0.32390791911403166, "learning_rate": 0.0004956614379573543, "loss": 3.1762261390686035, "step": 3631, "token_acc": 0.27862625313283207 }, { "epoch": 2.1289944297859864, "grad_norm": 0.29128490360247594, "learning_rate": 0.0004956569422897416, "loss": 3.148764133453369, "step": 3632, "token_acc": 0.28232832844152705 }, { "epoch": 2.1295807681031955, "grad_norm": 0.2579340724253643, "learning_rate": 0.0004956524443145129, "loss": 3.136120557785034, "step": 3633, "token_acc": 0.28262366462457594 }, { "epoch": 2.1301671064204046, "grad_norm": 0.2541766674452288, "learning_rate": 0.0004956479440317104, "loss": 3.13716459274292, "step": 3634, "token_acc": 0.285727084914411 }, { "epoch": 2.1307534447376137, "grad_norm": 0.29750740544822263, "learning_rate": 0.0004956434414413767, "loss": 3.155475378036499, "step": 3635, "token_acc": 0.28257027226481146 }, { "epoch": 2.1313397830548224, "grad_norm": 0.406233511747262, "learning_rate": 0.0004956389365435537, "loss": 3.163100004196167, "step": 3636, "token_acc": 0.2826364904797526 }, { "epoch": 2.1319261213720315, "grad_norm": 0.35022637861431155, "learning_rate": 0.000495634429338284, "loss": 3.1594338417053223, "step": 3637, "token_acc": 0.28154511651490505 }, { "epoch": 2.1325124596892406, "grad_norm": 0.28957473393967376, "learning_rate": 0.0004956299198256098, "loss": 3.17781925201416, "step": 3638, "token_acc": 0.27970177906930876 }, { "epoch": 2.1330987980064497, "grad_norm": 0.3284659685381087, "learning_rate": 0.0004956254080055735, "loss": 3.1698901653289795, "step": 3639, "token_acc": 0.2795845601145296 }, { "epoch": 2.133685136323659, "grad_norm": 0.3265616404479127, "learning_rate": 0.0004956208938782174, "loss": 3.2044849395751953, "step": 3640, "token_acc": 0.2752738268444388 }, { "epoch": 2.134271474640868, "grad_norm": 0.33145488900557457, "learning_rate": 0.0004956163774435841, "loss": 3.166576862335205, "step": 3641, "token_acc": 0.28192547915008265 }, { "epoch": 2.1348578129580766, "grad_norm": 0.2888325118426882, "learning_rate": 0.0004956118587017159, "loss": 3.167180299758911, "step": 3642, "token_acc": 0.28059656610472616 }, { "epoch": 2.1354441512752858, "grad_norm": 0.3969248172650549, "learning_rate": 0.0004956073376526551, "loss": 3.18566632270813, "step": 3643, "token_acc": 0.2798255897352189 }, { "epoch": 2.136030489592495, "grad_norm": 0.3873923896612051, "learning_rate": 0.0004956028142964444, "loss": 3.1877474784851074, "step": 3644, "token_acc": 0.2789083533482386 }, { "epoch": 2.136616827909704, "grad_norm": 0.35174316657122257, "learning_rate": 0.0004955982886331263, "loss": 3.1825504302978516, "step": 3645, "token_acc": 0.27903904027174514 }, { "epoch": 2.137203166226913, "grad_norm": 0.30333174330031526, "learning_rate": 0.0004955937606627432, "loss": 3.098595142364502, "step": 3646, "token_acc": 0.2880032909181763 }, { "epoch": 2.1377895045441218, "grad_norm": 0.3190452105466988, "learning_rate": 0.0004955892303853376, "loss": 3.191960573196411, "step": 3647, "token_acc": 0.27879788037741715 }, { "epoch": 2.138375842861331, "grad_norm": 0.3139275879740007, "learning_rate": 0.0004955846978009522, "loss": 3.1175947189331055, "step": 3648, "token_acc": 0.2869186872544598 }, { "epoch": 2.13896218117854, "grad_norm": 0.310624996255693, "learning_rate": 0.0004955801629096294, "loss": 3.195763111114502, "step": 3649, "token_acc": 0.2779277853298821 }, { "epoch": 2.139548519495749, "grad_norm": 0.33129846586246103, "learning_rate": 0.0004955756257114119, "loss": 3.132993698120117, "step": 3650, "token_acc": 0.2857432741357964 }, { "epoch": 2.140134857812958, "grad_norm": 0.3052242331281519, "learning_rate": 0.0004955710862063423, "loss": 3.153242349624634, "step": 3651, "token_acc": 0.2842799604593446 }, { "epoch": 2.1407211961301673, "grad_norm": 0.28661084091543776, "learning_rate": 0.0004955665443944633, "loss": 3.159463405609131, "step": 3652, "token_acc": 0.28187495725101674 }, { "epoch": 2.141307534447376, "grad_norm": 0.3010641563739908, "learning_rate": 0.0004955620002758175, "loss": 3.2164103984832764, "step": 3653, "token_acc": 0.27332953355513434 }, { "epoch": 2.141893872764585, "grad_norm": 0.30302134501089634, "learning_rate": 0.0004955574538504477, "loss": 3.1384472846984863, "step": 3654, "token_acc": 0.28503380145668183 }, { "epoch": 2.142480211081794, "grad_norm": 0.33286901032015953, "learning_rate": 0.0004955529051183965, "loss": 3.1714746952056885, "step": 3655, "token_acc": 0.2805121617078079 }, { "epoch": 2.1430665493990033, "grad_norm": 0.32146206065890054, "learning_rate": 0.0004955483540797065, "loss": 3.108811378479004, "step": 3656, "token_acc": 0.2895592898943996 }, { "epoch": 2.1436528877162124, "grad_norm": 0.2963711262028485, "learning_rate": 0.0004955438007344207, "loss": 3.1212687492370605, "step": 3657, "token_acc": 0.28726837675744255 }, { "epoch": 2.144239226033421, "grad_norm": 0.2882092903446034, "learning_rate": 0.0004955392450825818, "loss": 3.17252254486084, "step": 3658, "token_acc": 0.2790808404927026 }, { "epoch": 2.14482556435063, "grad_norm": 0.2964099986458252, "learning_rate": 0.0004955346871242325, "loss": 3.1505658626556396, "step": 3659, "token_acc": 0.28257402079198274 }, { "epoch": 2.1454119026678393, "grad_norm": 0.36338370262844644, "learning_rate": 0.0004955301268594157, "loss": 3.1737778186798096, "step": 3660, "token_acc": 0.2797064514842004 }, { "epoch": 2.1459982409850484, "grad_norm": 0.38216449775785266, "learning_rate": 0.000495525564288174, "loss": 3.1325647830963135, "step": 3661, "token_acc": 0.2842759416738011 }, { "epoch": 2.1465845793022575, "grad_norm": 0.3561807879860291, "learning_rate": 0.0004955209994105507, "loss": 3.1667537689208984, "step": 3662, "token_acc": 0.2819394716311032 }, { "epoch": 2.1471709176194667, "grad_norm": 0.2892664423731742, "learning_rate": 0.0004955164322265885, "loss": 3.1846001148223877, "step": 3663, "token_acc": 0.27947591359941065 }, { "epoch": 2.1477572559366753, "grad_norm": 0.3561720125443651, "learning_rate": 0.0004955118627363302, "loss": 3.1264169216156006, "step": 3664, "token_acc": 0.2866172572183701 }, { "epoch": 2.1483435942538844, "grad_norm": 0.29798665874086766, "learning_rate": 0.0004955072909398187, "loss": 3.1765551567077637, "step": 3665, "token_acc": 0.28098311347485944 }, { "epoch": 2.1489299325710935, "grad_norm": 0.2903833020835021, "learning_rate": 0.0004955027168370972, "loss": 3.115173101425171, "step": 3666, "token_acc": 0.2882780306584336 }, { "epoch": 2.1495162708883027, "grad_norm": 0.33147753924870615, "learning_rate": 0.0004954981404282083, "loss": 3.1446080207824707, "step": 3667, "token_acc": 0.2827679966311149 }, { "epoch": 2.1501026092055118, "grad_norm": 0.2694529552442061, "learning_rate": 0.0004954935617131952, "loss": 3.1314547061920166, "step": 3668, "token_acc": 0.2857307487182786 }, { "epoch": 2.1506889475227204, "grad_norm": 0.29964610844254486, "learning_rate": 0.000495488980692101, "loss": 3.129884719848633, "step": 3669, "token_acc": 0.28650484552706157 }, { "epoch": 2.1512752858399296, "grad_norm": 0.29408832695989184, "learning_rate": 0.0004954843973649686, "loss": 3.211188793182373, "step": 3670, "token_acc": 0.27340598610644834 }, { "epoch": 2.1518616241571387, "grad_norm": 0.31215013273479175, "learning_rate": 0.0004954798117318411, "loss": 3.1252026557922363, "step": 3671, "token_acc": 0.28516013734245477 }, { "epoch": 2.1524479624743478, "grad_norm": 0.26842706701612173, "learning_rate": 0.0004954752237927614, "loss": 3.158066511154175, "step": 3672, "token_acc": 0.2811730129230237 }, { "epoch": 2.153034300791557, "grad_norm": 0.26417922558178397, "learning_rate": 0.000495470633547773, "loss": 3.1407082080841064, "step": 3673, "token_acc": 0.2846216008935603 }, { "epoch": 2.1536206391087656, "grad_norm": 0.3318790351535135, "learning_rate": 0.0004954660409969186, "loss": 3.209529399871826, "step": 3674, "token_acc": 0.275680908348375 }, { "epoch": 2.1542069774259747, "grad_norm": 0.29411145006909367, "learning_rate": 0.0004954614461402416, "loss": 3.1204981803894043, "step": 3675, "token_acc": 0.2886117310614607 }, { "epoch": 2.154793315743184, "grad_norm": 0.24748749091675695, "learning_rate": 0.000495456848977785, "loss": 3.143026351928711, "step": 3676, "token_acc": 0.28393547427891763 }, { "epoch": 2.155379654060393, "grad_norm": 0.33180542942558133, "learning_rate": 0.0004954522495095921, "loss": 3.1502833366394043, "step": 3677, "token_acc": 0.2821055520661244 }, { "epoch": 2.155965992377602, "grad_norm": 0.34582967522351293, "learning_rate": 0.0004954476477357061, "loss": 3.104684829711914, "step": 3678, "token_acc": 0.288200235874456 }, { "epoch": 2.1565523306948107, "grad_norm": 0.27681622091184327, "learning_rate": 0.0004954430436561702, "loss": 3.1860389709472656, "step": 3679, "token_acc": 0.2790292928704258 }, { "epoch": 2.15713866901202, "grad_norm": 0.31460931294414396, "learning_rate": 0.0004954384372710275, "loss": 3.165003776550293, "step": 3680, "token_acc": 0.2825240522162637 }, { "epoch": 2.157725007329229, "grad_norm": 0.3229117914365331, "learning_rate": 0.0004954338285803216, "loss": 3.139177083969116, "step": 3681, "token_acc": 0.2853806433401064 }, { "epoch": 2.158311345646438, "grad_norm": 0.30192005178031267, "learning_rate": 0.0004954292175840955, "loss": 3.162809371948242, "step": 3682, "token_acc": 0.28244804723621514 }, { "epoch": 2.158897683963647, "grad_norm": 0.33006931262480044, "learning_rate": 0.0004954246042823926, "loss": 3.2366414070129395, "step": 3683, "token_acc": 0.27187196118334656 }, { "epoch": 2.1594840222808562, "grad_norm": 0.2538175543380687, "learning_rate": 0.0004954199886752564, "loss": 3.160055637359619, "step": 3684, "token_acc": 0.283541296555204 }, { "epoch": 2.160070360598065, "grad_norm": 0.3458549398184862, "learning_rate": 0.00049541537076273, "loss": 3.1605958938598633, "step": 3685, "token_acc": 0.28016138909903093 }, { "epoch": 2.160656698915274, "grad_norm": 0.2440113929363253, "learning_rate": 0.000495410750544857, "loss": 3.1378602981567383, "step": 3686, "token_acc": 0.2850267901146717 }, { "epoch": 2.161243037232483, "grad_norm": 0.2855754573489357, "learning_rate": 0.0004954061280216805, "loss": 3.1416077613830566, "step": 3687, "token_acc": 0.28433821948104704 }, { "epoch": 2.1618293755496922, "grad_norm": 0.2979997155246482, "learning_rate": 0.0004954015031932443, "loss": 3.1882548332214355, "step": 3688, "token_acc": 0.27819883630109626 }, { "epoch": 2.1624157138669013, "grad_norm": 0.3266638132185164, "learning_rate": 0.0004953968760595916, "loss": 3.1561222076416016, "step": 3689, "token_acc": 0.2811039802561945 }, { "epoch": 2.16300205218411, "grad_norm": 0.3545013440893388, "learning_rate": 0.0004953922466207659, "loss": 3.111231803894043, "step": 3690, "token_acc": 0.2881792887347191 }, { "epoch": 2.163588390501319, "grad_norm": 0.3475740902123315, "learning_rate": 0.0004953876148768106, "loss": 3.153034210205078, "step": 3691, "token_acc": 0.2848911299924376 }, { "epoch": 2.1641747288185282, "grad_norm": 0.31686603016173087, "learning_rate": 0.0004953829808277695, "loss": 3.1761345863342285, "step": 3692, "token_acc": 0.27841218343454177 }, { "epoch": 2.1647610671357373, "grad_norm": 0.2851270577375636, "learning_rate": 0.0004953783444736859, "loss": 3.155456304550171, "step": 3693, "token_acc": 0.282326879972736 }, { "epoch": 2.1653474054529465, "grad_norm": 0.2974155595548568, "learning_rate": 0.0004953737058146034, "loss": 3.1702611446380615, "step": 3694, "token_acc": 0.27938745683355537 }, { "epoch": 2.1659337437701556, "grad_norm": 0.2904824480784424, "learning_rate": 0.0004953690648505656, "loss": 3.174433708190918, "step": 3695, "token_acc": 0.2808895453128434 }, { "epoch": 2.1665200820873642, "grad_norm": 0.313715527111614, "learning_rate": 0.0004953644215816161, "loss": 3.150599479675293, "step": 3696, "token_acc": 0.28154788191512164 }, { "epoch": 2.1671064204045734, "grad_norm": 0.2592681018806169, "learning_rate": 0.0004953597760077984, "loss": 3.153822422027588, "step": 3697, "token_acc": 0.28162820899634006 }, { "epoch": 2.1676927587217825, "grad_norm": 0.26690029731819465, "learning_rate": 0.0004953551281291563, "loss": 3.121373176574707, "step": 3698, "token_acc": 0.286613313932339 }, { "epoch": 2.1682790970389916, "grad_norm": 0.30545037568889716, "learning_rate": 0.0004953504779457334, "loss": 3.119475841522217, "step": 3699, "token_acc": 0.28839900026732207 }, { "epoch": 2.1688654353562007, "grad_norm": 0.33656475838299044, "learning_rate": 0.0004953458254575733, "loss": 3.186060667037964, "step": 3700, "token_acc": 0.27927676229886267 }, { "epoch": 2.1694517736734094, "grad_norm": 0.2808184222321881, "learning_rate": 0.0004953411706647198, "loss": 3.1159610748291016, "step": 3701, "token_acc": 0.28839881277006335 }, { "epoch": 2.1700381119906185, "grad_norm": 0.3174823490833614, "learning_rate": 0.0004953365135672166, "loss": 3.106145143508911, "step": 3702, "token_acc": 0.2878886060325542 }, { "epoch": 2.1706244503078276, "grad_norm": 0.3148665330157047, "learning_rate": 0.0004953318541651075, "loss": 3.1284549236297607, "step": 3703, "token_acc": 0.2859016616618713 }, { "epoch": 2.1712107886250367, "grad_norm": 0.29801451371004684, "learning_rate": 0.0004953271924584363, "loss": 3.1831448078155518, "step": 3704, "token_acc": 0.2776782035985811 }, { "epoch": 2.171797126942246, "grad_norm": 0.3153530175579118, "learning_rate": 0.0004953225284472465, "loss": 3.1711905002593994, "step": 3705, "token_acc": 0.2803914894066175 }, { "epoch": 2.172383465259455, "grad_norm": 0.312192547400296, "learning_rate": 0.0004953178621315823, "loss": 3.186277151107788, "step": 3706, "token_acc": 0.2789642636530263 }, { "epoch": 2.1729698035766636, "grad_norm": 0.3501295826059464, "learning_rate": 0.0004953131935114873, "loss": 3.1341817378997803, "step": 3707, "token_acc": 0.28559234684017265 }, { "epoch": 2.1735561418938727, "grad_norm": 0.3506519200532044, "learning_rate": 0.0004953085225870053, "loss": 3.159444808959961, "step": 3708, "token_acc": 0.281260631719922 }, { "epoch": 2.174142480211082, "grad_norm": 0.29817563054121676, "learning_rate": 0.0004953038493581804, "loss": 3.1596174240112305, "step": 3709, "token_acc": 0.28222528650150547 }, { "epoch": 2.174728818528291, "grad_norm": 0.3066593369214165, "learning_rate": 0.0004952991738250564, "loss": 3.136115789413452, "step": 3710, "token_acc": 0.28355320085619296 }, { "epoch": 2.1753151568455, "grad_norm": 0.3370444403059494, "learning_rate": 0.0004952944959876772, "loss": 3.1707992553710938, "step": 3711, "token_acc": 0.2805408395175644 }, { "epoch": 2.1759014951627087, "grad_norm": 0.37391010393470003, "learning_rate": 0.0004952898158460866, "loss": 3.204577922821045, "step": 3712, "token_acc": 0.2759864613249153 }, { "epoch": 2.176487833479918, "grad_norm": 0.2657830348902625, "learning_rate": 0.0004952851334003289, "loss": 3.1445658206939697, "step": 3713, "token_acc": 0.2854460935599124 }, { "epoch": 2.177074171797127, "grad_norm": 0.25971227931948165, "learning_rate": 0.0004952804486504478, "loss": 3.146808624267578, "step": 3714, "token_acc": 0.28507834025578815 }, { "epoch": 2.177660510114336, "grad_norm": 0.2786292892731512, "learning_rate": 0.0004952757615964875, "loss": 3.1489758491516113, "step": 3715, "token_acc": 0.281100466316506 }, { "epoch": 2.178246848431545, "grad_norm": 0.2362994299496863, "learning_rate": 0.0004952710722384918, "loss": 3.1710357666015625, "step": 3716, "token_acc": 0.281224426067106 }, { "epoch": 2.1788331867487543, "grad_norm": 0.2532047515639838, "learning_rate": 0.000495266380576505, "loss": 3.1554884910583496, "step": 3717, "token_acc": 0.28152197425099007 }, { "epoch": 2.179419525065963, "grad_norm": 0.22986122855703853, "learning_rate": 0.000495261686610571, "loss": 3.1227917671203613, "step": 3718, "token_acc": 0.28741014559915506 }, { "epoch": 2.180005863383172, "grad_norm": 0.2634098691600834, "learning_rate": 0.0004952569903407339, "loss": 3.139397382736206, "step": 3719, "token_acc": 0.28651888962409805 }, { "epoch": 2.180592201700381, "grad_norm": 0.2837643155890122, "learning_rate": 0.000495252291767038, "loss": 3.1543872356414795, "step": 3720, "token_acc": 0.2821493210897039 }, { "epoch": 2.1811785400175903, "grad_norm": 0.2546826819111413, "learning_rate": 0.0004952475908895272, "loss": 3.147578239440918, "step": 3721, "token_acc": 0.2822189312258654 }, { "epoch": 2.1817648783347994, "grad_norm": 0.2810274487636963, "learning_rate": 0.0004952428877082458, "loss": 3.1817545890808105, "step": 3722, "token_acc": 0.27998364720156144 }, { "epoch": 2.182351216652008, "grad_norm": 0.31562468947121036, "learning_rate": 0.0004952381822232379, "loss": 3.2187581062316895, "step": 3723, "token_acc": 0.2741215223948102 }, { "epoch": 2.182937554969217, "grad_norm": 0.3426951638733073, "learning_rate": 0.0004952334744345477, "loss": 3.170959234237671, "step": 3724, "token_acc": 0.28076423425669556 }, { "epoch": 2.1835238932864263, "grad_norm": 0.3015856908724199, "learning_rate": 0.0004952287643422195, "loss": 3.151789426803589, "step": 3725, "token_acc": 0.2846478270290839 }, { "epoch": 2.1841102316036354, "grad_norm": 0.3328134386967711, "learning_rate": 0.0004952240519462976, "loss": 3.1855974197387695, "step": 3726, "token_acc": 0.27759027219897964 }, { "epoch": 2.1846965699208445, "grad_norm": 0.33849486866404144, "learning_rate": 0.0004952193372468261, "loss": 3.182040214538574, "step": 3727, "token_acc": 0.2775441923005208 }, { "epoch": 2.185282908238053, "grad_norm": 0.326255786965002, "learning_rate": 0.0004952146202438493, "loss": 3.1655569076538086, "step": 3728, "token_acc": 0.2818119105391145 }, { "epoch": 2.1858692465552623, "grad_norm": 0.3073557443291841, "learning_rate": 0.0004952099009374117, "loss": 3.19307804107666, "step": 3729, "token_acc": 0.27523539643349954 }, { "epoch": 2.1864555848724714, "grad_norm": 0.2845277701116671, "learning_rate": 0.0004952051793275574, "loss": 3.1658334732055664, "step": 3730, "token_acc": 0.2810134601776468 }, { "epoch": 2.1870419231896805, "grad_norm": 0.33074208237224484, "learning_rate": 0.0004952004554143309, "loss": 3.1735787391662598, "step": 3731, "token_acc": 0.28233142719688603 }, { "epoch": 2.1876282615068896, "grad_norm": 0.3300127189580756, "learning_rate": 0.0004951957291977766, "loss": 3.169307231903076, "step": 3732, "token_acc": 0.2797914604166187 }, { "epoch": 2.1882145998240983, "grad_norm": 0.3160419873532794, "learning_rate": 0.0004951910006779388, "loss": 3.1577887535095215, "step": 3733, "token_acc": 0.2821172662362498 }, { "epoch": 2.1888009381413074, "grad_norm": 0.3296306369355759, "learning_rate": 0.000495186269854862, "loss": 3.1503286361694336, "step": 3734, "token_acc": 0.2838889644500445 }, { "epoch": 2.1893872764585165, "grad_norm": 0.38021542726359653, "learning_rate": 0.0004951815367285904, "loss": 3.2131733894348145, "step": 3735, "token_acc": 0.2740344610375639 }, { "epoch": 2.1899736147757256, "grad_norm": 0.3160132516976753, "learning_rate": 0.0004951768012991688, "loss": 3.1848201751708984, "step": 3736, "token_acc": 0.2801549263486749 }, { "epoch": 2.1905599530929347, "grad_norm": 0.2912601084496391, "learning_rate": 0.0004951720635666414, "loss": 3.167044162750244, "step": 3737, "token_acc": 0.27999295882205233 }, { "epoch": 2.191146291410144, "grad_norm": 0.34282978128086616, "learning_rate": 0.000495167323531053, "loss": 3.144331216812134, "step": 3738, "token_acc": 0.2844745248278131 }, { "epoch": 2.1917326297273525, "grad_norm": 0.2827534041211752, "learning_rate": 0.0004951625811924478, "loss": 3.2110142707824707, "step": 3739, "token_acc": 0.2739920698971894 }, { "epoch": 2.1923189680445616, "grad_norm": 0.2704330668198203, "learning_rate": 0.0004951578365508707, "loss": 3.185276508331299, "step": 3740, "token_acc": 0.27909842149537334 }, { "epoch": 2.1929053063617707, "grad_norm": 0.2670792704998593, "learning_rate": 0.0004951530896063659, "loss": 3.176053524017334, "step": 3741, "token_acc": 0.28112527781640895 }, { "epoch": 2.19349164467898, "grad_norm": 0.2547878074988725, "learning_rate": 0.0004951483403589782, "loss": 3.1665143966674805, "step": 3742, "token_acc": 0.2799359641651683 }, { "epoch": 2.194077982996189, "grad_norm": 0.3274318425481537, "learning_rate": 0.0004951435888087523, "loss": 3.1828558444976807, "step": 3743, "token_acc": 0.279443785150405 }, { "epoch": 2.1946643213133976, "grad_norm": 0.32608385307928234, "learning_rate": 0.0004951388349557326, "loss": 3.162421226501465, "step": 3744, "token_acc": 0.28068263418161815 }, { "epoch": 2.1952506596306067, "grad_norm": 0.3512511174436906, "learning_rate": 0.000495134078799964, "loss": 3.1768875122070312, "step": 3745, "token_acc": 0.2787925761447903 }, { "epoch": 2.195836997947816, "grad_norm": 0.34682021229657245, "learning_rate": 0.0004951293203414909, "loss": 3.2027292251586914, "step": 3746, "token_acc": 0.2761715952112964 }, { "epoch": 2.196423336265025, "grad_norm": 0.3078646210905105, "learning_rate": 0.0004951245595803583, "loss": 3.190337657928467, "step": 3747, "token_acc": 0.2784136327301795 }, { "epoch": 2.197009674582234, "grad_norm": 0.3004387191477506, "learning_rate": 0.0004951197965166106, "loss": 3.1988492012023926, "step": 3748, "token_acc": 0.2755731482599497 }, { "epoch": 2.197596012899443, "grad_norm": 0.31047297484930964, "learning_rate": 0.0004951150311502929, "loss": 3.1783127784729004, "step": 3749, "token_acc": 0.2801631234122209 }, { "epoch": 2.198182351216652, "grad_norm": 0.3562918497362142, "learning_rate": 0.0004951102634814497, "loss": 3.1921141147613525, "step": 3750, "token_acc": 0.27772101306001346 }, { "epoch": 2.198768689533861, "grad_norm": 0.30863450537647635, "learning_rate": 0.0004951054935101258, "loss": 3.127758502960205, "step": 3751, "token_acc": 0.2870434960201367 }, { "epoch": 2.19935502785107, "grad_norm": 0.29814326466489566, "learning_rate": 0.0004951007212363661, "loss": 3.0999693870544434, "step": 3752, "token_acc": 0.28932952007669 }, { "epoch": 2.199941366168279, "grad_norm": 0.3106553178204928, "learning_rate": 0.0004950959466602155, "loss": 3.191160202026367, "step": 3753, "token_acc": 0.2771539044536077 }, { "epoch": 2.2005277044854883, "grad_norm": 0.2886387501880631, "learning_rate": 0.0004950911697817187, "loss": 3.187046527862549, "step": 3754, "token_acc": 0.2784452819411792 }, { "epoch": 2.201114042802697, "grad_norm": 0.3161131109884263, "learning_rate": 0.0004950863906009206, "loss": 3.113492488861084, "step": 3755, "token_acc": 0.28867689190514056 }, { "epoch": 2.201700381119906, "grad_norm": 0.22579771789283223, "learning_rate": 0.000495081609117866, "loss": 3.1345467567443848, "step": 3756, "token_acc": 0.28268931562906285 }, { "epoch": 2.202286719437115, "grad_norm": 0.2987120592109529, "learning_rate": 0.0004950768253326001, "loss": 3.1591715812683105, "step": 3757, "token_acc": 0.2819137971839158 }, { "epoch": 2.2028730577543243, "grad_norm": 0.2931091589073615, "learning_rate": 0.0004950720392451676, "loss": 3.1425862312316895, "step": 3758, "token_acc": 0.28395236386970296 }, { "epoch": 2.2034593960715334, "grad_norm": 0.2873903446101065, "learning_rate": 0.0004950672508556136, "loss": 3.1266496181488037, "step": 3759, "token_acc": 0.2871510073289814 }, { "epoch": 2.2040457343887425, "grad_norm": 0.2556194886323477, "learning_rate": 0.000495062460163983, "loss": 3.150421619415283, "step": 3760, "token_acc": 0.2848755601144523 }, { "epoch": 2.204632072705951, "grad_norm": 0.2573105231027643, "learning_rate": 0.0004950576671703208, "loss": 3.1235156059265137, "step": 3761, "token_acc": 0.28695186306287 }, { "epoch": 2.2052184110231603, "grad_norm": 0.3325154613377441, "learning_rate": 0.0004950528718746719, "loss": 3.1386680603027344, "step": 3762, "token_acc": 0.28505533811140993 }, { "epoch": 2.2058047493403694, "grad_norm": 0.35777715201107346, "learning_rate": 0.0004950480742770817, "loss": 3.1578664779663086, "step": 3763, "token_acc": 0.28491880923691637 }, { "epoch": 2.2063910876575785, "grad_norm": 0.2747822349005734, "learning_rate": 0.000495043274377595, "loss": 3.1813435554504395, "step": 3764, "token_acc": 0.2799403441643737 }, { "epoch": 2.2069774259747876, "grad_norm": 0.3127730937258205, "learning_rate": 0.0004950384721762568, "loss": 3.154592990875244, "step": 3765, "token_acc": 0.2825758537678767 }, { "epoch": 2.2075637642919963, "grad_norm": 0.3319689130252632, "learning_rate": 0.0004950336676731124, "loss": 3.1862101554870605, "step": 3766, "token_acc": 0.27914731515246904 }, { "epoch": 2.2081501026092054, "grad_norm": 0.29719171155977253, "learning_rate": 0.000495028860868207, "loss": 3.1682379245758057, "step": 3767, "token_acc": 0.2820816101017578 }, { "epoch": 2.2087364409264145, "grad_norm": 0.2965916989692894, "learning_rate": 0.0004950240517615855, "loss": 3.130030870437622, "step": 3768, "token_acc": 0.28576881885756095 }, { "epoch": 2.2093227792436236, "grad_norm": 0.32011024686620565, "learning_rate": 0.0004950192403532932, "loss": 3.1410634517669678, "step": 3769, "token_acc": 0.2871836168846393 }, { "epoch": 2.2099091175608327, "grad_norm": 0.238341190935579, "learning_rate": 0.0004950144266433755, "loss": 3.1870779991149902, "step": 3770, "token_acc": 0.2777431731458751 }, { "epoch": 2.210495455878042, "grad_norm": 0.30514750750831315, "learning_rate": 0.0004950096106318772, "loss": 3.135143280029297, "step": 3771, "token_acc": 0.2846700965743775 }, { "epoch": 2.2110817941952505, "grad_norm": 0.29170896907474464, "learning_rate": 0.0004950047923188438, "loss": 3.1798901557922363, "step": 3772, "token_acc": 0.2794307856689527 }, { "epoch": 2.2116681325124596, "grad_norm": 0.3894135930131722, "learning_rate": 0.0004949999717043206, "loss": 3.166572093963623, "step": 3773, "token_acc": 0.28148247559951894 }, { "epoch": 2.2122544708296688, "grad_norm": 0.2508310465549407, "learning_rate": 0.0004949951487883528, "loss": 3.1638011932373047, "step": 3774, "token_acc": 0.28385914108473587 }, { "epoch": 2.212840809146878, "grad_norm": 0.32891670263257017, "learning_rate": 0.0004949903235709857, "loss": 3.165375232696533, "step": 3775, "token_acc": 0.2803039543653214 }, { "epoch": 2.213427147464087, "grad_norm": 0.3196513226655616, "learning_rate": 0.0004949854960522646, "loss": 3.1705684661865234, "step": 3776, "token_acc": 0.28022365231667556 }, { "epoch": 2.2140134857812956, "grad_norm": 0.3013933225382682, "learning_rate": 0.000494980666232235, "loss": 3.181222438812256, "step": 3777, "token_acc": 0.2772660406845861 }, { "epoch": 2.2145998240985048, "grad_norm": 0.2786811800951262, "learning_rate": 0.0004949758341109419, "loss": 3.144491195678711, "step": 3778, "token_acc": 0.28403495924040434 }, { "epoch": 2.215186162415714, "grad_norm": 0.3080345412474166, "learning_rate": 0.0004949709996884312, "loss": 3.1757869720458984, "step": 3779, "token_acc": 0.2803350834257927 }, { "epoch": 2.215772500732923, "grad_norm": 0.3219952986332383, "learning_rate": 0.0004949661629647479, "loss": 3.1939444541931152, "step": 3780, "token_acc": 0.27802640067998646 }, { "epoch": 2.216358839050132, "grad_norm": 0.2575174889105713, "learning_rate": 0.0004949613239399377, "loss": 3.1882576942443848, "step": 3781, "token_acc": 0.2771468319210826 }, { "epoch": 2.2169451773673408, "grad_norm": 0.24402519247689416, "learning_rate": 0.000494956482614046, "loss": 3.1231582164764404, "step": 3782, "token_acc": 0.2846397557753236 }, { "epoch": 2.21753151568455, "grad_norm": 0.2711246530587684, "learning_rate": 0.0004949516389871182, "loss": 3.1705546379089355, "step": 3783, "token_acc": 0.2810583030764805 }, { "epoch": 2.218117854001759, "grad_norm": 0.30673550535045896, "learning_rate": 0.0004949467930591997, "loss": 3.1144230365753174, "step": 3784, "token_acc": 0.2877619636141344 }, { "epoch": 2.218704192318968, "grad_norm": 0.3297915595325778, "learning_rate": 0.0004949419448303362, "loss": 3.1422719955444336, "step": 3785, "token_acc": 0.284815349437298 }, { "epoch": 2.219290530636177, "grad_norm": 0.28322083433960177, "learning_rate": 0.0004949370943005732, "loss": 3.190800666809082, "step": 3786, "token_acc": 0.2772277763644335 }, { "epoch": 2.219876868953386, "grad_norm": 0.2957989959323077, "learning_rate": 0.0004949322414699563, "loss": 3.190504550933838, "step": 3787, "token_acc": 0.27512100025146274 }, { "epoch": 2.220463207270595, "grad_norm": 0.3307470194735579, "learning_rate": 0.0004949273863385311, "loss": 3.148383140563965, "step": 3788, "token_acc": 0.2822902657721752 }, { "epoch": 2.221049545587804, "grad_norm": 0.3294250518813636, "learning_rate": 0.0004949225289063431, "loss": 3.104329824447632, "step": 3789, "token_acc": 0.2886879604307604 }, { "epoch": 2.221635883905013, "grad_norm": 0.32018364012831674, "learning_rate": 0.0004949176691734378, "loss": 3.158704996109009, "step": 3790, "token_acc": 0.2821065649128231 }, { "epoch": 2.2222222222222223, "grad_norm": 0.30308458534249355, "learning_rate": 0.0004949128071398612, "loss": 3.1528539657592773, "step": 3791, "token_acc": 0.2832621111066736 }, { "epoch": 2.2228085605394314, "grad_norm": 0.30787586574331965, "learning_rate": 0.0004949079428056588, "loss": 3.15920090675354, "step": 3792, "token_acc": 0.28139433598055713 }, { "epoch": 2.22339489885664, "grad_norm": 0.31897223916472, "learning_rate": 0.0004949030761708762, "loss": 3.1382431983947754, "step": 3793, "token_acc": 0.28397484638622306 }, { "epoch": 2.223981237173849, "grad_norm": 0.33273101685313655, "learning_rate": 0.0004948982072355594, "loss": 3.130331039428711, "step": 3794, "token_acc": 0.2857558801264168 }, { "epoch": 2.2245675754910583, "grad_norm": 0.295520714532924, "learning_rate": 0.0004948933359997538, "loss": 3.170107126235962, "step": 3795, "token_acc": 0.28111115423928373 }, { "epoch": 2.2251539138082674, "grad_norm": 0.2651887698003276, "learning_rate": 0.0004948884624635053, "loss": 3.1857874393463135, "step": 3796, "token_acc": 0.27829263553338945 }, { "epoch": 2.2257402521254765, "grad_norm": 0.30419949767929005, "learning_rate": 0.0004948835866268597, "loss": 3.1911942958831787, "step": 3797, "token_acc": 0.2767834910358566 }, { "epoch": 2.226326590442685, "grad_norm": 0.31511868217198036, "learning_rate": 0.0004948787084898628, "loss": 3.2010602951049805, "step": 3798, "token_acc": 0.27473809215241835 }, { "epoch": 2.2269129287598943, "grad_norm": 0.3516995385246629, "learning_rate": 0.0004948738280525604, "loss": 3.1852400302886963, "step": 3799, "token_acc": 0.27958682662467 }, { "epoch": 2.2274992670771034, "grad_norm": 0.3029946543777094, "learning_rate": 0.0004948689453149985, "loss": 3.150453567504883, "step": 3800, "token_acc": 0.2820547065622191 }, { "epoch": 2.2280856053943126, "grad_norm": 0.2661075877364138, "learning_rate": 0.0004948640602772227, "loss": 3.1197733879089355, "step": 3801, "token_acc": 0.2877233869167804 }, { "epoch": 2.2286719437115217, "grad_norm": 0.2912803128699928, "learning_rate": 0.0004948591729392789, "loss": 3.180408239364624, "step": 3802, "token_acc": 0.2805510714276519 }, { "epoch": 2.2292582820287308, "grad_norm": 0.2763089953111624, "learning_rate": 0.0004948542833012133, "loss": 3.1445374488830566, "step": 3803, "token_acc": 0.28318792121623737 }, { "epoch": 2.2298446203459394, "grad_norm": 0.2769176304409203, "learning_rate": 0.0004948493913630716, "loss": 3.1564347743988037, "step": 3804, "token_acc": 0.2825479696685881 }, { "epoch": 2.2304309586631486, "grad_norm": 0.29077217594520294, "learning_rate": 0.0004948444971248998, "loss": 3.1660101413726807, "step": 3805, "token_acc": 0.2820621479427341 }, { "epoch": 2.2310172969803577, "grad_norm": 0.28650401284829974, "learning_rate": 0.0004948396005867438, "loss": 3.139133930206299, "step": 3806, "token_acc": 0.2851104624465277 }, { "epoch": 2.231603635297567, "grad_norm": 0.287620510266524, "learning_rate": 0.0004948347017486498, "loss": 3.2195794582366943, "step": 3807, "token_acc": 0.27286939207727834 }, { "epoch": 2.232189973614776, "grad_norm": 0.3120364238806162, "learning_rate": 0.0004948298006106636, "loss": 3.152463436126709, "step": 3808, "token_acc": 0.2814797593792683 }, { "epoch": 2.2327763119319846, "grad_norm": 0.2869231333652891, "learning_rate": 0.0004948248971728314, "loss": 3.156865119934082, "step": 3809, "token_acc": 0.2811637444835455 }, { "epoch": 2.2333626502491937, "grad_norm": 0.32741760050133945, "learning_rate": 0.0004948199914351992, "loss": 3.1683101654052734, "step": 3810, "token_acc": 0.2795143543562488 }, { "epoch": 2.233948988566403, "grad_norm": 0.30557749016069713, "learning_rate": 0.0004948150833978131, "loss": 3.139017105102539, "step": 3811, "token_acc": 0.28411266508745175 }, { "epoch": 2.234535326883612, "grad_norm": 0.3635901217009716, "learning_rate": 0.0004948101730607192, "loss": 3.132762908935547, "step": 3812, "token_acc": 0.28495207236398 }, { "epoch": 2.235121665200821, "grad_norm": 0.32801564599133337, "learning_rate": 0.0004948052604239635, "loss": 3.200976848602295, "step": 3813, "token_acc": 0.2769495767163072 }, { "epoch": 2.23570800351803, "grad_norm": 0.27834001136129105, "learning_rate": 0.0004948003454875923, "loss": 3.146116256713867, "step": 3814, "token_acc": 0.28222398484731925 }, { "epoch": 2.236294341835239, "grad_norm": 0.27723891837271386, "learning_rate": 0.0004947954282516518, "loss": 3.1998884677886963, "step": 3815, "token_acc": 0.2753354153846946 }, { "epoch": 2.236880680152448, "grad_norm": 0.27049286040890863, "learning_rate": 0.000494790508716188, "loss": 3.1218719482421875, "step": 3816, "token_acc": 0.2863773455879726 }, { "epoch": 2.237467018469657, "grad_norm": 0.32656509507300274, "learning_rate": 0.0004947855868812473, "loss": 3.1241960525512695, "step": 3817, "token_acc": 0.2882601668067499 }, { "epoch": 2.238053356786866, "grad_norm": 0.2577958142509023, "learning_rate": 0.0004947806627468758, "loss": 3.1178030967712402, "step": 3818, "token_acc": 0.2889268548727396 }, { "epoch": 2.2386396951040752, "grad_norm": 0.2609082362846041, "learning_rate": 0.00049477573631312, "loss": 3.134734869003296, "step": 3819, "token_acc": 0.28481944117107916 }, { "epoch": 2.239226033421284, "grad_norm": 0.26476610154970054, "learning_rate": 0.0004947708075800258, "loss": 3.130898952484131, "step": 3820, "token_acc": 0.2836286983606026 }, { "epoch": 2.239812371738493, "grad_norm": 0.2662261603829151, "learning_rate": 0.0004947658765476398, "loss": 3.1527795791625977, "step": 3821, "token_acc": 0.28350422784107837 }, { "epoch": 2.240398710055702, "grad_norm": 0.32881885122498833, "learning_rate": 0.0004947609432160081, "loss": 3.168898105621338, "step": 3822, "token_acc": 0.2811476494087145 }, { "epoch": 2.2409850483729112, "grad_norm": 0.30984157312918303, "learning_rate": 0.0004947560075851773, "loss": 3.17402720451355, "step": 3823, "token_acc": 0.279500130510714 }, { "epoch": 2.2415713866901203, "grad_norm": 0.3271752951652869, "learning_rate": 0.0004947510696551936, "loss": 3.1329240798950195, "step": 3824, "token_acc": 0.2869338824514151 }, { "epoch": 2.2421577250073295, "grad_norm": 0.301190421867787, "learning_rate": 0.0004947461294261033, "loss": 3.1421704292297363, "step": 3825, "token_acc": 0.2827037430246484 }, { "epoch": 2.242744063324538, "grad_norm": 0.30324831224476184, "learning_rate": 0.000494741186897953, "loss": 3.184340000152588, "step": 3826, "token_acc": 0.2779550392982059 }, { "epoch": 2.2433304016417472, "grad_norm": 0.3393659459661467, "learning_rate": 0.000494736242070789, "loss": 3.1682004928588867, "step": 3827, "token_acc": 0.27882895386133727 }, { "epoch": 2.2439167399589564, "grad_norm": 0.2959570734691476, "learning_rate": 0.0004947312949446579, "loss": 3.180204153060913, "step": 3828, "token_acc": 0.2791224933826688 }, { "epoch": 2.2445030782761655, "grad_norm": 0.25907055474663665, "learning_rate": 0.0004947263455196059, "loss": 3.1089446544647217, "step": 3829, "token_acc": 0.28720609282355347 }, { "epoch": 2.2450894165933746, "grad_norm": 0.3035475988054403, "learning_rate": 0.0004947213937956798, "loss": 3.1567494869232178, "step": 3830, "token_acc": 0.2818376734139914 }, { "epoch": 2.2456757549105832, "grad_norm": 0.32620891546640984, "learning_rate": 0.0004947164397729259, "loss": 3.1643857955932617, "step": 3831, "token_acc": 0.2801917537693327 }, { "epoch": 2.2462620932277924, "grad_norm": 0.27437277648272124, "learning_rate": 0.0004947114834513908, "loss": 3.1790771484375, "step": 3832, "token_acc": 0.2784870937813759 }, { "epoch": 2.2468484315450015, "grad_norm": 0.26321972510939834, "learning_rate": 0.0004947065248311211, "loss": 3.1478734016418457, "step": 3833, "token_acc": 0.2808062311270709 }, { "epoch": 2.2474347698622106, "grad_norm": 0.3321874451269814, "learning_rate": 0.0004947015639121632, "loss": 3.148843765258789, "step": 3834, "token_acc": 0.28269119657097186 }, { "epoch": 2.2480211081794197, "grad_norm": 0.3700346831447425, "learning_rate": 0.0004946966006945641, "loss": 3.1746716499328613, "step": 3835, "token_acc": 0.2799283009747845 }, { "epoch": 2.2486074464966284, "grad_norm": 0.30603689291046354, "learning_rate": 0.00049469163517837, "loss": 3.1255345344543457, "step": 3836, "token_acc": 0.2858172304944818 }, { "epoch": 2.2491937848138375, "grad_norm": 0.2685102091455314, "learning_rate": 0.0004946866673636277, "loss": 3.1777853965759277, "step": 3837, "token_acc": 0.2796378571059565 }, { "epoch": 2.2497801231310466, "grad_norm": 0.2869237613209994, "learning_rate": 0.000494681697250384, "loss": 3.1614084243774414, "step": 3838, "token_acc": 0.28344745452096826 }, { "epoch": 2.2503664614482557, "grad_norm": 0.32234525350389726, "learning_rate": 0.0004946767248386854, "loss": 3.138523817062378, "step": 3839, "token_acc": 0.2848541857695429 }, { "epoch": 2.250952799765465, "grad_norm": 0.32817198927358937, "learning_rate": 0.0004946717501285786, "loss": 3.152355432510376, "step": 3840, "token_acc": 0.2811997897553041 }, { "epoch": 2.2515391380826735, "grad_norm": 0.3259947974176206, "learning_rate": 0.0004946667731201105, "loss": 3.1380293369293213, "step": 3841, "token_acc": 0.2835224906021321 }, { "epoch": 2.2521254763998826, "grad_norm": 0.3135241874373357, "learning_rate": 0.0004946617938133278, "loss": 3.1739583015441895, "step": 3842, "token_acc": 0.28144295639137257 }, { "epoch": 2.2527118147170917, "grad_norm": 0.24817831952163097, "learning_rate": 0.0004946568122082772, "loss": 3.167630434036255, "step": 3843, "token_acc": 0.2812488916137597 }, { "epoch": 2.253298153034301, "grad_norm": 0.2731649608743652, "learning_rate": 0.0004946518283050055, "loss": 3.135435104370117, "step": 3844, "token_acc": 0.2855770869154391 }, { "epoch": 2.25388449135151, "grad_norm": 0.27231846322880665, "learning_rate": 0.0004946468421035596, "loss": 3.149603843688965, "step": 3845, "token_acc": 0.2828323462390931 }, { "epoch": 2.254470829668719, "grad_norm": 0.24587328343055956, "learning_rate": 0.0004946418536039862, "loss": 3.1687400341033936, "step": 3846, "token_acc": 0.2800664128357154 }, { "epoch": 2.2550571679859277, "grad_norm": 0.31737504074933487, "learning_rate": 0.0004946368628063323, "loss": 3.164569139480591, "step": 3847, "token_acc": 0.282083941642086 }, { "epoch": 2.255643506303137, "grad_norm": 0.3214138382296742, "learning_rate": 0.0004946318697106447, "loss": 3.145965576171875, "step": 3848, "token_acc": 0.2832863696200631 }, { "epoch": 2.256229844620346, "grad_norm": 0.2359192855100126, "learning_rate": 0.0004946268743169702, "loss": 3.1189632415771484, "step": 3849, "token_acc": 0.287117309876641 }, { "epoch": 2.256816182937555, "grad_norm": 0.2720548980320122, "learning_rate": 0.0004946218766253561, "loss": 3.1839418411254883, "step": 3850, "token_acc": 0.27657987172446663 }, { "epoch": 2.257402521254764, "grad_norm": 0.30545373074796633, "learning_rate": 0.000494616876635849, "loss": 3.1674296855926514, "step": 3851, "token_acc": 0.28053701199932396 }, { "epoch": 2.257988859571973, "grad_norm": 0.24998909046002024, "learning_rate": 0.000494611874348496, "loss": 3.124718189239502, "step": 3852, "token_acc": 0.28661570616194193 }, { "epoch": 2.258575197889182, "grad_norm": 0.26905560843251397, "learning_rate": 0.000494606869763344, "loss": 3.172092914581299, "step": 3853, "token_acc": 0.2808318259400904 }, { "epoch": 2.259161536206391, "grad_norm": 0.2560863257823114, "learning_rate": 0.0004946018628804401, "loss": 3.1784820556640625, "step": 3854, "token_acc": 0.28063623170465113 }, { "epoch": 2.2597478745236, "grad_norm": 0.35376361229022935, "learning_rate": 0.0004945968536998312, "loss": 3.154937267303467, "step": 3855, "token_acc": 0.2819474617227426 }, { "epoch": 2.2603342128408093, "grad_norm": 0.3224194780636899, "learning_rate": 0.0004945918422215646, "loss": 3.131925582885742, "step": 3856, "token_acc": 0.28493756685426175 }, { "epoch": 2.2609205511580184, "grad_norm": 0.23735956048188633, "learning_rate": 0.0004945868284456873, "loss": 3.1655850410461426, "step": 3857, "token_acc": 0.2809684801292273 }, { "epoch": 2.261506889475227, "grad_norm": 0.2724830779948872, "learning_rate": 0.0004945818123722461, "loss": 3.173205852508545, "step": 3858, "token_acc": 0.2785379039998791 }, { "epoch": 2.262093227792436, "grad_norm": 0.2893665611426745, "learning_rate": 0.0004945767940012885, "loss": 3.159245491027832, "step": 3859, "token_acc": 0.28136490136133696 }, { "epoch": 2.2626795661096453, "grad_norm": 0.3294490501456755, "learning_rate": 0.0004945717733328614, "loss": 3.147303581237793, "step": 3860, "token_acc": 0.2836458483646393 }, { "epoch": 2.2632659044268544, "grad_norm": 0.39727270008228005, "learning_rate": 0.0004945667503670121, "loss": 3.1702921390533447, "step": 3861, "token_acc": 0.2812443292287751 }, { "epoch": 2.2638522427440635, "grad_norm": 0.38444282689039877, "learning_rate": 0.0004945617251037878, "loss": 3.135335922241211, "step": 3862, "token_acc": 0.2856051619631102 }, { "epoch": 2.264438581061272, "grad_norm": 0.34753773880833516, "learning_rate": 0.0004945566975432356, "loss": 3.0984320640563965, "step": 3863, "token_acc": 0.2902048117001032 }, { "epoch": 2.2650249193784813, "grad_norm": 0.37794340169581914, "learning_rate": 0.0004945516676854028, "loss": 3.177751064300537, "step": 3864, "token_acc": 0.2815546906203256 }, { "epoch": 2.2656112576956904, "grad_norm": 0.28168794103502537, "learning_rate": 0.0004945466355303365, "loss": 3.200343132019043, "step": 3865, "token_acc": 0.2759573944753981 }, { "epoch": 2.2661975960128995, "grad_norm": 0.3234174212103772, "learning_rate": 0.0004945416010780843, "loss": 3.1428143978118896, "step": 3866, "token_acc": 0.28260335362532524 }, { "epoch": 2.2667839343301086, "grad_norm": 0.3220769757146058, "learning_rate": 0.0004945365643286931, "loss": 3.149648666381836, "step": 3867, "token_acc": 0.2816632178437802 }, { "epoch": 2.2673702726473177, "grad_norm": 0.3302134003482676, "learning_rate": 0.0004945315252822105, "loss": 3.1192336082458496, "step": 3868, "token_acc": 0.2872133706252684 }, { "epoch": 2.2679566109645264, "grad_norm": 0.2943345383392368, "learning_rate": 0.0004945264839386836, "loss": 3.1715235710144043, "step": 3869, "token_acc": 0.2803244281783676 }, { "epoch": 2.2685429492817355, "grad_norm": 0.2508000130858046, "learning_rate": 0.00049452144029816, "loss": 3.143495559692383, "step": 3870, "token_acc": 0.2849174902779344 }, { "epoch": 2.2691292875989446, "grad_norm": 0.26109236874790925, "learning_rate": 0.000494516394360687, "loss": 3.190122127532959, "step": 3871, "token_acc": 0.27819389596464156 }, { "epoch": 2.2697156259161537, "grad_norm": 0.33903049639162236, "learning_rate": 0.0004945113461263119, "loss": 3.11405086517334, "step": 3872, "token_acc": 0.28989891438427484 }, { "epoch": 2.270301964233363, "grad_norm": 0.3211160059253596, "learning_rate": 0.0004945062955950821, "loss": 3.1626482009887695, "step": 3873, "token_acc": 0.282796800652315 }, { "epoch": 2.2708883025505715, "grad_norm": 0.25234664979847354, "learning_rate": 0.0004945012427670452, "loss": 3.194135904312134, "step": 3874, "token_acc": 0.27816759517645046 }, { "epoch": 2.2714746408677806, "grad_norm": 0.287554603096279, "learning_rate": 0.0004944961876422487, "loss": 3.1574296951293945, "step": 3875, "token_acc": 0.2823687780015619 }, { "epoch": 2.2720609791849897, "grad_norm": 0.29182285292723903, "learning_rate": 0.0004944911302207398, "loss": 3.1222705841064453, "step": 3876, "token_acc": 0.2878861022958396 }, { "epoch": 2.272647317502199, "grad_norm": 0.2527807847386134, "learning_rate": 0.0004944860705025664, "loss": 3.172006607055664, "step": 3877, "token_acc": 0.2799828502384077 }, { "epoch": 2.273233655819408, "grad_norm": 0.31413541585309224, "learning_rate": 0.0004944810084877757, "loss": 3.193477153778076, "step": 3878, "token_acc": 0.2772202146510566 }, { "epoch": 2.273819994136617, "grad_norm": 0.2704083612356819, "learning_rate": 0.0004944759441764153, "loss": 3.1231212615966797, "step": 3879, "token_acc": 0.2874028166923348 }, { "epoch": 2.2744063324538257, "grad_norm": 0.31178899912445196, "learning_rate": 0.0004944708775685329, "loss": 3.1877129077911377, "step": 3880, "token_acc": 0.27901131884424835 }, { "epoch": 2.274992670771035, "grad_norm": 0.3596351741078318, "learning_rate": 0.000494465808664176, "loss": 3.1519899368286133, "step": 3881, "token_acc": 0.28307210828965446 }, { "epoch": 2.275579009088244, "grad_norm": 0.3020333907146802, "learning_rate": 0.0004944607374633923, "loss": 3.1304898262023926, "step": 3882, "token_acc": 0.284429292808041 }, { "epoch": 2.276165347405453, "grad_norm": 0.2726401919605312, "learning_rate": 0.0004944556639662294, "loss": 3.165764808654785, "step": 3883, "token_acc": 0.2802883975409057 }, { "epoch": 2.2767516857226617, "grad_norm": 0.32285574943583706, "learning_rate": 0.0004944505881727349, "loss": 3.1476478576660156, "step": 3884, "token_acc": 0.2816021967212252 }, { "epoch": 2.277338024039871, "grad_norm": 0.2823843503222295, "learning_rate": 0.0004944455100829565, "loss": 3.1601884365081787, "step": 3885, "token_acc": 0.28280859065821695 }, { "epoch": 2.27792436235708, "grad_norm": 0.27956131460385075, "learning_rate": 0.000494440429696942, "loss": 3.1895575523376465, "step": 3886, "token_acc": 0.27752539634186724 }, { "epoch": 2.278510700674289, "grad_norm": 0.26286987520841, "learning_rate": 0.000494435347014739, "loss": 3.1537184715270996, "step": 3887, "token_acc": 0.2815025933343604 }, { "epoch": 2.279097038991498, "grad_norm": 0.29758746109419504, "learning_rate": 0.0004944302620363953, "loss": 3.195587158203125, "step": 3888, "token_acc": 0.27922072657599106 }, { "epoch": 2.2796833773087073, "grad_norm": 0.30684174486894483, "learning_rate": 0.0004944251747619586, "loss": 3.1939287185668945, "step": 3889, "token_acc": 0.27768207532745526 }, { "epoch": 2.280269715625916, "grad_norm": 0.27791092948529533, "learning_rate": 0.000494420085191477, "loss": 3.153632164001465, "step": 3890, "token_acc": 0.28336240556026593 }, { "epoch": 2.280856053943125, "grad_norm": 0.2649049416220319, "learning_rate": 0.0004944149933249979, "loss": 3.0951740741729736, "step": 3891, "token_acc": 0.2902582883967504 }, { "epoch": 2.281442392260334, "grad_norm": 0.30366376849256915, "learning_rate": 0.0004944098991625692, "loss": 3.2140722274780273, "step": 3892, "token_acc": 0.2759592201140853 }, { "epoch": 2.2820287305775433, "grad_norm": 0.3373327820170129, "learning_rate": 0.000494404802704239, "loss": 3.178417682647705, "step": 3893, "token_acc": 0.278223548179036 }, { "epoch": 2.2826150688947524, "grad_norm": 0.3202448688341465, "learning_rate": 0.0004943997039500549, "loss": 3.155106782913208, "step": 3894, "token_acc": 0.2827261964451188 }, { "epoch": 2.283201407211961, "grad_norm": 0.2980731572464724, "learning_rate": 0.0004943946029000648, "loss": 3.144254446029663, "step": 3895, "token_acc": 0.2838209328226442 }, { "epoch": 2.28378774552917, "grad_norm": 0.2841218228206637, "learning_rate": 0.000494389499554317, "loss": 3.1533799171447754, "step": 3896, "token_acc": 0.28254233148811464 }, { "epoch": 2.2843740838463793, "grad_norm": 0.2808923935230558, "learning_rate": 0.0004943843939128591, "loss": 3.175994634628296, "step": 3897, "token_acc": 0.2805128261394308 }, { "epoch": 2.2849604221635884, "grad_norm": 0.30017912466260666, "learning_rate": 0.000494379285975739, "loss": 3.1532750129699707, "step": 3898, "token_acc": 0.28463766165536125 }, { "epoch": 2.2855467604807975, "grad_norm": 0.3369751939969032, "learning_rate": 0.0004943741757430049, "loss": 3.189305305480957, "step": 3899, "token_acc": 0.27964515473691337 }, { "epoch": 2.2861330987980066, "grad_norm": 0.2993892380303333, "learning_rate": 0.0004943690632147048, "loss": 3.165214776992798, "step": 3900, "token_acc": 0.2805131286300056 }, { "epoch": 2.2867194371152153, "grad_norm": 0.29396733129894437, "learning_rate": 0.0004943639483908865, "loss": 3.17478609085083, "step": 3901, "token_acc": 0.2781765473567104 }, { "epoch": 2.2873057754324244, "grad_norm": 0.2785000078066771, "learning_rate": 0.0004943588312715983, "loss": 3.166280746459961, "step": 3902, "token_acc": 0.28039920444023164 }, { "epoch": 2.2878921137496335, "grad_norm": 0.31709409298963454, "learning_rate": 0.0004943537118568881, "loss": 3.1413936614990234, "step": 3903, "token_acc": 0.2841890440386681 }, { "epoch": 2.2884784520668426, "grad_norm": 0.27014675030248797, "learning_rate": 0.0004943485901468041, "loss": 3.114736557006836, "step": 3904, "token_acc": 0.28979685707341424 }, { "epoch": 2.2890647903840518, "grad_norm": 0.28364520835474033, "learning_rate": 0.0004943434661413942, "loss": 3.1973624229431152, "step": 3905, "token_acc": 0.2774534822181816 }, { "epoch": 2.2896511287012604, "grad_norm": 0.2808540960665262, "learning_rate": 0.0004943383398407069, "loss": 3.1188249588012695, "step": 3906, "token_acc": 0.28633595768237174 }, { "epoch": 2.2902374670184695, "grad_norm": 0.26979949470625997, "learning_rate": 0.00049433321124479, "loss": 3.144449472427368, "step": 3907, "token_acc": 0.2825334465642254 }, { "epoch": 2.2908238053356786, "grad_norm": 0.3317182504829572, "learning_rate": 0.000494328080353692, "loss": 3.1412973403930664, "step": 3908, "token_acc": 0.2829566111809845 }, { "epoch": 2.2914101436528878, "grad_norm": 0.3462209536885495, "learning_rate": 0.0004943229471674607, "loss": 3.1163129806518555, "step": 3909, "token_acc": 0.2878311784654515 }, { "epoch": 2.291996481970097, "grad_norm": 0.25013331055383425, "learning_rate": 0.0004943178116861446, "loss": 3.1724843978881836, "step": 3910, "token_acc": 0.27883928084070003 }, { "epoch": 2.292582820287306, "grad_norm": 0.29640242868774136, "learning_rate": 0.0004943126739097919, "loss": 3.156826972961426, "step": 3911, "token_acc": 0.2807963805492542 }, { "epoch": 2.2931691586045146, "grad_norm": 0.32890770083561716, "learning_rate": 0.0004943075338384509, "loss": 3.1356372833251953, "step": 3912, "token_acc": 0.2840835215965049 }, { "epoch": 2.2937554969217238, "grad_norm": 0.2751362602346232, "learning_rate": 0.0004943023914721699, "loss": 3.143984317779541, "step": 3913, "token_acc": 0.2828619868208649 }, { "epoch": 2.294341835238933, "grad_norm": 0.266215350163477, "learning_rate": 0.000494297246810997, "loss": 3.1524040699005127, "step": 3914, "token_acc": 0.28338190755240433 }, { "epoch": 2.294928173556142, "grad_norm": 0.32392491080374525, "learning_rate": 0.0004942920998549807, "loss": 3.1548280715942383, "step": 3915, "token_acc": 0.28271467636630077 }, { "epoch": 2.295514511873351, "grad_norm": 0.3186682447488782, "learning_rate": 0.0004942869506041693, "loss": 3.1278862953186035, "step": 3916, "token_acc": 0.2848551425288171 }, { "epoch": 2.2961008501905598, "grad_norm": 0.31465866970918616, "learning_rate": 0.0004942817990586111, "loss": 3.176079750061035, "step": 3917, "token_acc": 0.2792354460726708 }, { "epoch": 2.296687188507769, "grad_norm": 0.32176942882249593, "learning_rate": 0.0004942766452183547, "loss": 3.1912670135498047, "step": 3918, "token_acc": 0.2777089680498699 }, { "epoch": 2.297273526824978, "grad_norm": 0.3656040823068835, "learning_rate": 0.0004942714890834483, "loss": 3.1739344596862793, "step": 3919, "token_acc": 0.2780263498361221 }, { "epoch": 2.297859865142187, "grad_norm": 0.32566068012364663, "learning_rate": 0.0004942663306539405, "loss": 3.1790390014648438, "step": 3920, "token_acc": 0.279256636987316 }, { "epoch": 2.298446203459396, "grad_norm": 0.2769985156626455, "learning_rate": 0.0004942611699298796, "loss": 3.1757187843322754, "step": 3921, "token_acc": 0.2820224541596015 }, { "epoch": 2.2990325417766053, "grad_norm": 0.3052832209277952, "learning_rate": 0.0004942560069113142, "loss": 3.1661975383758545, "step": 3922, "token_acc": 0.28091679869824004 }, { "epoch": 2.299618880093814, "grad_norm": 0.3112915127147975, "learning_rate": 0.0004942508415982928, "loss": 3.156914710998535, "step": 3923, "token_acc": 0.2826732231805037 }, { "epoch": 2.300205218411023, "grad_norm": 0.37493133747537133, "learning_rate": 0.0004942456739908637, "loss": 3.17193603515625, "step": 3924, "token_acc": 0.27917988430677365 }, { "epoch": 2.300791556728232, "grad_norm": 0.30009062242596574, "learning_rate": 0.0004942405040890758, "loss": 3.1218883991241455, "step": 3925, "token_acc": 0.2854247557841292 }, { "epoch": 2.3013778950454413, "grad_norm": 0.2692483083230852, "learning_rate": 0.0004942353318929774, "loss": 3.162898540496826, "step": 3926, "token_acc": 0.2808329299278385 }, { "epoch": 2.3019642333626504, "grad_norm": 0.3802494874312786, "learning_rate": 0.0004942301574026172, "loss": 3.1569154262542725, "step": 3927, "token_acc": 0.27951131054017925 }, { "epoch": 2.302550571679859, "grad_norm": 0.34674268222322163, "learning_rate": 0.0004942249806180437, "loss": 3.115814447402954, "step": 3928, "token_acc": 0.28652745881509606 }, { "epoch": 2.303136909997068, "grad_norm": 0.31747691770564734, "learning_rate": 0.0004942198015393057, "loss": 3.1416079998016357, "step": 3929, "token_acc": 0.2838086905199505 }, { "epoch": 2.3037232483142773, "grad_norm": 0.36753539195994017, "learning_rate": 0.0004942146201664517, "loss": 3.1530566215515137, "step": 3930, "token_acc": 0.2834558539038923 }, { "epoch": 2.3043095866314864, "grad_norm": 0.26942361972965423, "learning_rate": 0.0004942094364995304, "loss": 3.1065523624420166, "step": 3931, "token_acc": 0.2883228325539199 }, { "epoch": 2.3048959249486956, "grad_norm": 0.30884105939737105, "learning_rate": 0.0004942042505385907, "loss": 3.1122541427612305, "step": 3932, "token_acc": 0.2872049766944912 }, { "epoch": 2.3054822632659047, "grad_norm": 0.403694243383865, "learning_rate": 0.000494199062283681, "loss": 3.159322500228882, "step": 3933, "token_acc": 0.2818386229711568 }, { "epoch": 2.3060686015831133, "grad_norm": 0.3661677765343658, "learning_rate": 0.0004941938717348502, "loss": 3.125225782394409, "step": 3934, "token_acc": 0.28405573369213244 }, { "epoch": 2.3066549399003224, "grad_norm": 0.30627501637965, "learning_rate": 0.0004941886788921469, "loss": 3.168328046798706, "step": 3935, "token_acc": 0.2801591748640684 }, { "epoch": 2.3072412782175316, "grad_norm": 0.31617263767736264, "learning_rate": 0.0004941834837556201, "loss": 3.125821113586426, "step": 3936, "token_acc": 0.2859601204590249 }, { "epoch": 2.3078276165347407, "grad_norm": 0.3706222755913848, "learning_rate": 0.0004941782863253186, "loss": 3.193310260772705, "step": 3937, "token_acc": 0.27671050192573066 }, { "epoch": 2.3084139548519493, "grad_norm": 0.38931355205361134, "learning_rate": 0.000494173086601291, "loss": 3.165721893310547, "step": 3938, "token_acc": 0.27979172780309985 }, { "epoch": 2.3090002931691584, "grad_norm": 0.2907466859373121, "learning_rate": 0.0004941678845835864, "loss": 3.153498649597168, "step": 3939, "token_acc": 0.2827911884975597 }, { "epoch": 2.3095866314863676, "grad_norm": 0.30848660609153267, "learning_rate": 0.0004941626802722535, "loss": 3.177926540374756, "step": 3940, "token_acc": 0.2789154851344691 }, { "epoch": 2.3101729698035767, "grad_norm": 0.36349449846897497, "learning_rate": 0.0004941574736673412, "loss": 3.1330854892730713, "step": 3941, "token_acc": 0.2844436847123806 }, { "epoch": 2.310759308120786, "grad_norm": 0.26739818078873123, "learning_rate": 0.0004941522647688984, "loss": 3.116626739501953, "step": 3942, "token_acc": 0.28746450388732864 }, { "epoch": 2.311345646437995, "grad_norm": 0.2890124532439609, "learning_rate": 0.0004941470535769742, "loss": 3.156836986541748, "step": 3943, "token_acc": 0.28451289855033635 }, { "epoch": 2.3119319847552036, "grad_norm": 0.25041120977772835, "learning_rate": 0.0004941418400916173, "loss": 3.117131233215332, "step": 3944, "token_acc": 0.2859157132059736 }, { "epoch": 2.3125183230724127, "grad_norm": 0.2965372704381502, "learning_rate": 0.0004941366243128768, "loss": 3.1520376205444336, "step": 3945, "token_acc": 0.2818441440373137 }, { "epoch": 2.313104661389622, "grad_norm": 0.24446183801353258, "learning_rate": 0.0004941314062408018, "loss": 3.140288829803467, "step": 3946, "token_acc": 0.2838446551500195 }, { "epoch": 2.313690999706831, "grad_norm": 0.2858152316901607, "learning_rate": 0.0004941261858754411, "loss": 3.2104921340942383, "step": 3947, "token_acc": 0.27508570369684054 }, { "epoch": 2.31427733802404, "grad_norm": 0.2807372806965721, "learning_rate": 0.000494120963216844, "loss": 3.167325973510742, "step": 3948, "token_acc": 0.2820974097783593 }, { "epoch": 2.3148636763412487, "grad_norm": 0.298617957931145, "learning_rate": 0.0004941157382650593, "loss": 3.1299381256103516, "step": 3949, "token_acc": 0.28588010342879944 }, { "epoch": 2.315450014658458, "grad_norm": 0.3328875925979855, "learning_rate": 0.0004941105110201361, "loss": 3.1952295303344727, "step": 3950, "token_acc": 0.27685804860866503 }, { "epoch": 2.316036352975667, "grad_norm": 0.27993508311367893, "learning_rate": 0.0004941052814821237, "loss": 3.179368019104004, "step": 3951, "token_acc": 0.27853155271993946 }, { "epoch": 2.316622691292876, "grad_norm": 0.27208189822011764, "learning_rate": 0.0004941000496510712, "loss": 3.1553988456726074, "step": 3952, "token_acc": 0.28255558270217446 }, { "epoch": 2.317209029610085, "grad_norm": 0.2906117613745728, "learning_rate": 0.0004940948155270276, "loss": 3.13283109664917, "step": 3953, "token_acc": 0.2849087229647118 }, { "epoch": 2.3177953679272942, "grad_norm": 0.23200456280574802, "learning_rate": 0.000494089579110042, "loss": 3.1319456100463867, "step": 3954, "token_acc": 0.2853875079407365 }, { "epoch": 2.318381706244503, "grad_norm": 0.27156625862554395, "learning_rate": 0.0004940843404001639, "loss": 3.1421096324920654, "step": 3955, "token_acc": 0.2836336567585605 }, { "epoch": 2.318968044561712, "grad_norm": 0.25999007877423913, "learning_rate": 0.0004940790993974422, "loss": 3.1816375255584717, "step": 3956, "token_acc": 0.2798555396659961 }, { "epoch": 2.319554382878921, "grad_norm": 0.2535092620806209, "learning_rate": 0.0004940738561019264, "loss": 3.1191036701202393, "step": 3957, "token_acc": 0.28780223462517146 }, { "epoch": 2.3201407211961302, "grad_norm": 0.27356935213757333, "learning_rate": 0.0004940686105136655, "loss": 3.1959195137023926, "step": 3958, "token_acc": 0.27543486453877924 }, { "epoch": 2.3207270595133394, "grad_norm": 0.2532781014714082, "learning_rate": 0.0004940633626327089, "loss": 3.170225143432617, "step": 3959, "token_acc": 0.27967962519776635 }, { "epoch": 2.321313397830548, "grad_norm": 0.31218292184798657, "learning_rate": 0.0004940581124591058, "loss": 3.165313720703125, "step": 3960, "token_acc": 0.2818811536889771 }, { "epoch": 2.321899736147757, "grad_norm": 0.36911755450408884, "learning_rate": 0.0004940528599929058, "loss": 3.171064853668213, "step": 3961, "token_acc": 0.2791504559906292 }, { "epoch": 2.3224860744649662, "grad_norm": 0.29033998962323826, "learning_rate": 0.0004940476052341579, "loss": 3.1544559001922607, "step": 3962, "token_acc": 0.28208769373426945 }, { "epoch": 2.3230724127821754, "grad_norm": 0.25904069964386445, "learning_rate": 0.0004940423481829117, "loss": 3.1153059005737305, "step": 3963, "token_acc": 0.28822642818081173 }, { "epoch": 2.3236587510993845, "grad_norm": 0.2837859334986874, "learning_rate": 0.0004940370888392165, "loss": 3.173543691635132, "step": 3964, "token_acc": 0.2793589706769293 }, { "epoch": 2.3242450894165936, "grad_norm": 0.2984049085507968, "learning_rate": 0.0004940318272031216, "loss": 3.1182613372802734, "step": 3965, "token_acc": 0.2868576841108217 }, { "epoch": 2.3248314277338022, "grad_norm": 0.3004806646151251, "learning_rate": 0.0004940265632746765, "loss": 3.157388687133789, "step": 3966, "token_acc": 0.2816664987191794 }, { "epoch": 2.3254177660510114, "grad_norm": 0.25186433358815535, "learning_rate": 0.0004940212970539308, "loss": 3.1801986694335938, "step": 3967, "token_acc": 0.2793455176484238 }, { "epoch": 2.3260041043682205, "grad_norm": 0.27804333835954365, "learning_rate": 0.0004940160285409337, "loss": 3.1617660522460938, "step": 3968, "token_acc": 0.28207074173051616 }, { "epoch": 2.3265904426854296, "grad_norm": 0.27507179174472846, "learning_rate": 0.000494010757735735, "loss": 3.1777541637420654, "step": 3969, "token_acc": 0.2794753195753165 }, { "epoch": 2.3271767810026387, "grad_norm": 0.2728657950931429, "learning_rate": 0.000494005484638384, "loss": 3.169367790222168, "step": 3970, "token_acc": 0.2797783361265116 }, { "epoch": 2.3277631193198474, "grad_norm": 0.2699673390677939, "learning_rate": 0.0004940002092489301, "loss": 3.169825315475464, "step": 3971, "token_acc": 0.2795208638904832 }, { "epoch": 2.3283494576370565, "grad_norm": 0.2562718595891836, "learning_rate": 0.0004939949315674231, "loss": 3.1752443313598633, "step": 3972, "token_acc": 0.2784165856818918 }, { "epoch": 2.3289357959542656, "grad_norm": 0.27027362421607853, "learning_rate": 0.0004939896515939126, "loss": 3.187270164489746, "step": 3973, "token_acc": 0.2773837418771187 }, { "epoch": 2.3295221342714747, "grad_norm": 0.322310044671525, "learning_rate": 0.0004939843693284481, "loss": 3.1919784545898438, "step": 3974, "token_acc": 0.2752439112899045 }, { "epoch": 2.330108472588684, "grad_norm": 0.25917071776406353, "learning_rate": 0.0004939790847710791, "loss": 3.1480650901794434, "step": 3975, "token_acc": 0.283121095307723 }, { "epoch": 2.330694810905893, "grad_norm": 0.29256948812926303, "learning_rate": 0.0004939737979218555, "loss": 3.13071870803833, "step": 3976, "token_acc": 0.2850702219369269 }, { "epoch": 2.3312811492231016, "grad_norm": 0.355983327424739, "learning_rate": 0.0004939685087808267, "loss": 3.1213719844818115, "step": 3977, "token_acc": 0.28660329886845715 }, { "epoch": 2.3318674875403107, "grad_norm": 0.4462612165228996, "learning_rate": 0.0004939632173480426, "loss": 3.137389659881592, "step": 3978, "token_acc": 0.28391803827962464 }, { "epoch": 2.33245382585752, "grad_norm": 0.42123050673687495, "learning_rate": 0.0004939579236235528, "loss": 3.1404547691345215, "step": 3979, "token_acc": 0.2839996874149486 }, { "epoch": 2.333040164174729, "grad_norm": 0.3020603665689284, "learning_rate": 0.0004939526276074071, "loss": 3.1084227561950684, "step": 3980, "token_acc": 0.2901078772776163 }, { "epoch": 2.333626502491938, "grad_norm": 0.36511023866417164, "learning_rate": 0.000493947329299655, "loss": 3.192930221557617, "step": 3981, "token_acc": 0.276633631688403 }, { "epoch": 2.3342128408091467, "grad_norm": 0.3110911895364269, "learning_rate": 0.0004939420287003466, "loss": 3.169325828552246, "step": 3982, "token_acc": 0.2808059304314769 }, { "epoch": 2.334799179126356, "grad_norm": 0.33003785448185197, "learning_rate": 0.0004939367258095317, "loss": 3.116131067276001, "step": 3983, "token_acc": 0.28655666558342857 }, { "epoch": 2.335385517443565, "grad_norm": 0.35248184867215654, "learning_rate": 0.0004939314206272598, "loss": 3.1521859169006348, "step": 3984, "token_acc": 0.2816936570652686 }, { "epoch": 2.335971855760774, "grad_norm": 0.30803708914492633, "learning_rate": 0.0004939261131535809, "loss": 3.128833770751953, "step": 3985, "token_acc": 0.2867014266286512 }, { "epoch": 2.336558194077983, "grad_norm": 0.3361488742673685, "learning_rate": 0.0004939208033885449, "loss": 3.1462182998657227, "step": 3986, "token_acc": 0.2848019960640245 }, { "epoch": 2.3371445323951923, "grad_norm": 0.34731938369147075, "learning_rate": 0.0004939154913322016, "loss": 3.1032023429870605, "step": 3987, "token_acc": 0.2891486684839589 }, { "epoch": 2.337730870712401, "grad_norm": 0.3334990850308182, "learning_rate": 0.000493910176984601, "loss": 3.1642775535583496, "step": 3988, "token_acc": 0.27947172882779986 }, { "epoch": 2.33831720902961, "grad_norm": 0.27416137275532815, "learning_rate": 0.000493904860345793, "loss": 3.148524284362793, "step": 3989, "token_acc": 0.2839143250147377 }, { "epoch": 2.338903547346819, "grad_norm": 0.320259817653581, "learning_rate": 0.0004938995414158273, "loss": 3.1541898250579834, "step": 3990, "token_acc": 0.28154540126158367 }, { "epoch": 2.3394898856640283, "grad_norm": 0.2768282240883893, "learning_rate": 0.0004938942201947543, "loss": 3.131925106048584, "step": 3991, "token_acc": 0.2853591918347077 }, { "epoch": 2.340076223981237, "grad_norm": 0.273659546836402, "learning_rate": 0.0004938888966826236, "loss": 3.1534693241119385, "step": 3992, "token_acc": 0.28042292687189346 }, { "epoch": 2.340662562298446, "grad_norm": 0.3151100472043264, "learning_rate": 0.0004938835708794855, "loss": 3.123828887939453, "step": 3993, "token_acc": 0.28522968693630496 }, { "epoch": 2.341248900615655, "grad_norm": 0.33725714209704866, "learning_rate": 0.0004938782427853898, "loss": 3.1644792556762695, "step": 3994, "token_acc": 0.28067677645940103 }, { "epoch": 2.3418352389328643, "grad_norm": 0.4047552218966542, "learning_rate": 0.0004938729124003866, "loss": 3.1369543075561523, "step": 3995, "token_acc": 0.2841544067679257 }, { "epoch": 2.3424215772500734, "grad_norm": 0.348581109982346, "learning_rate": 0.000493867579724526, "loss": 3.163398265838623, "step": 3996, "token_acc": 0.2817965044955098 }, { "epoch": 2.3430079155672825, "grad_norm": 0.2667126137814938, "learning_rate": 0.0004938622447578582, "loss": 3.135937213897705, "step": 3997, "token_acc": 0.28565611647624534 }, { "epoch": 2.343594253884491, "grad_norm": 0.267303131580925, "learning_rate": 0.0004938569075004331, "loss": 3.1574630737304688, "step": 3998, "token_acc": 0.2810470581602161 }, { "epoch": 2.3441805922017003, "grad_norm": 0.26745200425516374, "learning_rate": 0.0004938515679523011, "loss": 3.1264781951904297, "step": 3999, "token_acc": 0.28595221915763946 }, { "epoch": 2.3447669305189094, "grad_norm": 0.2688300192193967, "learning_rate": 0.0004938462261135121, "loss": 3.096856117248535, "step": 4000, "token_acc": 0.2897349739830139 }, { "epoch": 2.3453532688361185, "grad_norm": 0.25405227045064427, "learning_rate": 0.0004938408819841164, "loss": 3.1228528022766113, "step": 4001, "token_acc": 0.2873666981867471 }, { "epoch": 2.3459396071533276, "grad_norm": 0.2884296113783983, "learning_rate": 0.0004938355355641643, "loss": 3.186028003692627, "step": 4002, "token_acc": 0.2769003078069607 }, { "epoch": 2.3465259454705363, "grad_norm": 0.26051591206357727, "learning_rate": 0.0004938301868537058, "loss": 3.15671706199646, "step": 4003, "token_acc": 0.28157566490966884 }, { "epoch": 2.3471122837877454, "grad_norm": 0.27665756626266563, "learning_rate": 0.0004938248358527913, "loss": 3.1955313682556152, "step": 4004, "token_acc": 0.27567125371905565 }, { "epoch": 2.3476986221049545, "grad_norm": 0.2190041817743469, "learning_rate": 0.000493819482561471, "loss": 3.172473430633545, "step": 4005, "token_acc": 0.2798969188915761 }, { "epoch": 2.3482849604221636, "grad_norm": 0.25579136033083066, "learning_rate": 0.0004938141269797954, "loss": 3.193089008331299, "step": 4006, "token_acc": 0.27685645650708024 }, { "epoch": 2.3488712987393727, "grad_norm": 0.2988296065470271, "learning_rate": 0.0004938087691078143, "loss": 3.1299846172332764, "step": 4007, "token_acc": 0.2847666385980102 }, { "epoch": 2.349457637056582, "grad_norm": 0.34174377554982466, "learning_rate": 0.0004938034089455786, "loss": 3.1282472610473633, "step": 4008, "token_acc": 0.2869298564187029 }, { "epoch": 2.3500439753737905, "grad_norm": 0.2954870089577022, "learning_rate": 0.0004937980464931383, "loss": 3.1838347911834717, "step": 4009, "token_acc": 0.2792873635881474 }, { "epoch": 2.3506303136909996, "grad_norm": 0.24362040653844436, "learning_rate": 0.0004937926817505439, "loss": 3.1690683364868164, "step": 4010, "token_acc": 0.2798725596529284 }, { "epoch": 2.3512166520082087, "grad_norm": 0.3234849344313527, "learning_rate": 0.0004937873147178457, "loss": 3.134331464767456, "step": 4011, "token_acc": 0.284568084668663 }, { "epoch": 2.351802990325418, "grad_norm": 0.2837742241760359, "learning_rate": 0.0004937819453950942, "loss": 3.1493167877197266, "step": 4012, "token_acc": 0.2833623038007524 }, { "epoch": 2.352389328642627, "grad_norm": 0.3249334552706245, "learning_rate": 0.00049377657378234, "loss": 3.1755361557006836, "step": 4013, "token_acc": 0.27741696757812 }, { "epoch": 2.3529756669598356, "grad_norm": 0.35545724682376334, "learning_rate": 0.0004937711998796332, "loss": 3.161740303039551, "step": 4014, "token_acc": 0.2807676580432353 }, { "epoch": 2.3535620052770447, "grad_norm": 0.26454969303670217, "learning_rate": 0.0004937658236870245, "loss": 3.1504836082458496, "step": 4015, "token_acc": 0.2816376466701425 }, { "epoch": 2.354148343594254, "grad_norm": 0.3928685282247113, "learning_rate": 0.0004937604452045644, "loss": 3.123591423034668, "step": 4016, "token_acc": 0.28682305957855075 }, { "epoch": 2.354734681911463, "grad_norm": 0.3254622356512941, "learning_rate": 0.0004937550644323034, "loss": 3.158384323120117, "step": 4017, "token_acc": 0.2802929075604877 }, { "epoch": 2.355321020228672, "grad_norm": 0.3174958619786969, "learning_rate": 0.0004937496813702919, "loss": 3.1805267333984375, "step": 4018, "token_acc": 0.2802285732544795 }, { "epoch": 2.355907358545881, "grad_norm": 0.2589008997969103, "learning_rate": 0.0004937442960185807, "loss": 3.1231000423431396, "step": 4019, "token_acc": 0.2867034919166649 }, { "epoch": 2.35649369686309, "grad_norm": 0.34367314426343626, "learning_rate": 0.0004937389083772203, "loss": 3.1560707092285156, "step": 4020, "token_acc": 0.2827240324925272 }, { "epoch": 2.357080035180299, "grad_norm": 0.28506128200065484, "learning_rate": 0.0004937335184462614, "loss": 3.177469253540039, "step": 4021, "token_acc": 0.27729345614030587 }, { "epoch": 2.357666373497508, "grad_norm": 0.30081359916660666, "learning_rate": 0.0004937281262257544, "loss": 3.165536880493164, "step": 4022, "token_acc": 0.27862168087444217 }, { "epoch": 2.358252711814717, "grad_norm": 0.2868817523103614, "learning_rate": 0.0004937227317157501, "loss": 3.130434036254883, "step": 4023, "token_acc": 0.2868024833735954 }, { "epoch": 2.3588390501319263, "grad_norm": 0.2658631021021933, "learning_rate": 0.0004937173349162991, "loss": 3.1382298469543457, "step": 4024, "token_acc": 0.2841774159441742 }, { "epoch": 2.359425388449135, "grad_norm": 0.27630209701266484, "learning_rate": 0.0004937119358274522, "loss": 3.1533422470092773, "step": 4025, "token_acc": 0.284287871376619 }, { "epoch": 2.360011726766344, "grad_norm": 0.26806193253509747, "learning_rate": 0.0004937065344492601, "loss": 3.1603002548217773, "step": 4026, "token_acc": 0.2827379050297182 }, { "epoch": 2.360598065083553, "grad_norm": 0.2878893156666818, "learning_rate": 0.0004937011307817735, "loss": 3.1473021507263184, "step": 4027, "token_acc": 0.283210009103429 }, { "epoch": 2.3611844034007623, "grad_norm": 0.27372966378220154, "learning_rate": 0.0004936957248250431, "loss": 3.161133289337158, "step": 4028, "token_acc": 0.28148498915782993 }, { "epoch": 2.3617707417179714, "grad_norm": 0.32987362880637805, "learning_rate": 0.0004936903165791199, "loss": 3.1719350814819336, "step": 4029, "token_acc": 0.278568392072714 }, { "epoch": 2.3623570800351805, "grad_norm": 0.2805507637830382, "learning_rate": 0.0004936849060440543, "loss": 3.1277289390563965, "step": 4030, "token_acc": 0.28452087188978 }, { "epoch": 2.362943418352389, "grad_norm": 0.33163267071368285, "learning_rate": 0.0004936794932198977, "loss": 3.1783652305603027, "step": 4031, "token_acc": 0.2802775675260815 }, { "epoch": 2.3635297566695983, "grad_norm": 0.2393012788430242, "learning_rate": 0.0004936740781067004, "loss": 3.1389975547790527, "step": 4032, "token_acc": 0.2846301481272356 }, { "epoch": 2.3641160949868074, "grad_norm": 0.2909760459874164, "learning_rate": 0.0004936686607045135, "loss": 3.0992393493652344, "step": 4033, "token_acc": 0.28946476570402624 }, { "epoch": 2.3647024333040165, "grad_norm": 0.24001186493798704, "learning_rate": 0.000493663241013388, "loss": 3.1664459705352783, "step": 4034, "token_acc": 0.2798219274654691 }, { "epoch": 2.3652887716212256, "grad_norm": 0.31053089378158255, "learning_rate": 0.0004936578190333745, "loss": 3.0978147983551025, "step": 4035, "token_acc": 0.2919864696382259 }, { "epoch": 2.3658751099384343, "grad_norm": 0.29250564650884, "learning_rate": 0.0004936523947645243, "loss": 3.170487880706787, "step": 4036, "token_acc": 0.2793936654184947 }, { "epoch": 2.3664614482556434, "grad_norm": 0.28161171625902087, "learning_rate": 0.0004936469682068882, "loss": 3.1361031532287598, "step": 4037, "token_acc": 0.2839699973211894 }, { "epoch": 2.3670477865728525, "grad_norm": 0.35072093783031416, "learning_rate": 0.000493641539360517, "loss": 3.168086051940918, "step": 4038, "token_acc": 0.2815140399861308 }, { "epoch": 2.3676341248900616, "grad_norm": 0.32737886561048474, "learning_rate": 0.0004936361082254619, "loss": 3.1163172721862793, "step": 4039, "token_acc": 0.28807761313044056 }, { "epoch": 2.3682204632072708, "grad_norm": 0.3348837610010347, "learning_rate": 0.0004936306748017739, "loss": 3.138913154602051, "step": 4040, "token_acc": 0.28368995940737324 }, { "epoch": 2.36880680152448, "grad_norm": 0.2526109604313326, "learning_rate": 0.000493625239089504, "loss": 3.139894962310791, "step": 4041, "token_acc": 0.28477981700465516 }, { "epoch": 2.3693931398416885, "grad_norm": 0.3320634406713377, "learning_rate": 0.0004936198010887032, "loss": 3.151625633239746, "step": 4042, "token_acc": 0.28076933033056356 }, { "epoch": 2.3699794781588976, "grad_norm": 0.3323202087643768, "learning_rate": 0.0004936143607994227, "loss": 3.0928902626037598, "step": 4043, "token_acc": 0.29072783656563844 }, { "epoch": 2.3705658164761068, "grad_norm": 0.26271573821993627, "learning_rate": 0.0004936089182217136, "loss": 3.0799219608306885, "step": 4044, "token_acc": 0.29257940033821195 }, { "epoch": 2.371152154793316, "grad_norm": 0.30796021540792395, "learning_rate": 0.000493603473355627, "loss": 3.1372227668762207, "step": 4045, "token_acc": 0.28536175710594314 }, { "epoch": 2.3717384931105245, "grad_norm": 0.31112680816289107, "learning_rate": 0.000493598026201214, "loss": 3.145521879196167, "step": 4046, "token_acc": 0.28273259174593873 }, { "epoch": 2.3723248314277336, "grad_norm": 0.2547932141582526, "learning_rate": 0.0004935925767585258, "loss": 3.1690471172332764, "step": 4047, "token_acc": 0.28082155132657455 }, { "epoch": 2.3729111697449428, "grad_norm": 0.3067688908458565, "learning_rate": 0.0004935871250276135, "loss": 3.1275105476379395, "step": 4048, "token_acc": 0.28545256486432957 }, { "epoch": 2.373497508062152, "grad_norm": 0.2805179341133092, "learning_rate": 0.0004935816710085285, "loss": 3.1554250717163086, "step": 4049, "token_acc": 0.28226631896760124 }, { "epoch": 2.374083846379361, "grad_norm": 0.28285718271004157, "learning_rate": 0.0004935762147013218, "loss": 3.1893157958984375, "step": 4050, "token_acc": 0.27810859911182884 }, { "epoch": 2.37467018469657, "grad_norm": 0.31795040351553366, "learning_rate": 0.0004935707561060449, "loss": 3.1490261554718018, "step": 4051, "token_acc": 0.28443693711896506 }, { "epoch": 2.3752565230137788, "grad_norm": 0.3021004988282593, "learning_rate": 0.000493565295222749, "loss": 3.1375069618225098, "step": 4052, "token_acc": 0.28617642438555524 }, { "epoch": 2.375842861330988, "grad_norm": 0.2743867322823992, "learning_rate": 0.0004935598320514853, "loss": 3.142432689666748, "step": 4053, "token_acc": 0.28379500913219274 }, { "epoch": 2.376429199648197, "grad_norm": 0.2729379695650785, "learning_rate": 0.0004935543665923051, "loss": 3.1676464080810547, "step": 4054, "token_acc": 0.28077004507319614 }, { "epoch": 2.377015537965406, "grad_norm": 0.2964195777850554, "learning_rate": 0.0004935488988452598, "loss": 3.1309988498687744, "step": 4055, "token_acc": 0.2866052434028869 }, { "epoch": 2.377601876282615, "grad_norm": 0.28457075331299414, "learning_rate": 0.000493543428810401, "loss": 3.1408252716064453, "step": 4056, "token_acc": 0.2843132597530786 }, { "epoch": 2.378188214599824, "grad_norm": 0.34623553441194527, "learning_rate": 0.0004935379564877797, "loss": 3.1029162406921387, "step": 4057, "token_acc": 0.2884390315265891 }, { "epoch": 2.378774552917033, "grad_norm": 0.3695914380533764, "learning_rate": 0.0004935324818774475, "loss": 3.162315845489502, "step": 4058, "token_acc": 0.2815020402944843 }, { "epoch": 2.379360891234242, "grad_norm": 0.30028005565483523, "learning_rate": 0.0004935270049794558, "loss": 3.164185047149658, "step": 4059, "token_acc": 0.28234970273698073 }, { "epoch": 2.379947229551451, "grad_norm": 0.26438452217338815, "learning_rate": 0.000493521525793856, "loss": 3.1331026554107666, "step": 4060, "token_acc": 0.285255655504796 }, { "epoch": 2.3805335678686603, "grad_norm": 0.24336475276660716, "learning_rate": 0.0004935160443206997, "loss": 3.168433427810669, "step": 4061, "token_acc": 0.28047342197006303 }, { "epoch": 2.3811199061858694, "grad_norm": 0.2589052213552716, "learning_rate": 0.0004935105605600383, "loss": 3.140836000442505, "step": 4062, "token_acc": 0.2842867829233226 }, { "epoch": 2.381706244503078, "grad_norm": 0.30157662895422804, "learning_rate": 0.0004935050745119233, "loss": 3.113926410675049, "step": 4063, "token_acc": 0.2870446716138725 }, { "epoch": 2.382292582820287, "grad_norm": 0.41415725712738666, "learning_rate": 0.0004934995861764062, "loss": 3.096813440322876, "step": 4064, "token_acc": 0.2890652004405109 }, { "epoch": 2.3828789211374963, "grad_norm": 0.3377295879502104, "learning_rate": 0.0004934940955535386, "loss": 3.148503065109253, "step": 4065, "token_acc": 0.28233725238215446 }, { "epoch": 2.3834652594547054, "grad_norm": 0.23028799966024388, "learning_rate": 0.0004934886026433722, "loss": 3.144071578979492, "step": 4066, "token_acc": 0.2828380423814329 }, { "epoch": 2.3840515977719146, "grad_norm": 0.30702097604114503, "learning_rate": 0.0004934831074459585, "loss": 3.1880664825439453, "step": 4067, "token_acc": 0.27869606819128034 }, { "epoch": 2.3846379360891232, "grad_norm": 0.2581701224243676, "learning_rate": 0.000493477609961349, "loss": 3.1330957412719727, "step": 4068, "token_acc": 0.28431855479866364 }, { "epoch": 2.3852242744063323, "grad_norm": 0.3121199480347057, "learning_rate": 0.0004934721101895954, "loss": 3.127673387527466, "step": 4069, "token_acc": 0.28644807537775535 }, { "epoch": 2.3858106127235414, "grad_norm": 0.27740788582797143, "learning_rate": 0.0004934666081307496, "loss": 3.110970973968506, "step": 4070, "token_acc": 0.2883976668427774 }, { "epoch": 2.3863969510407506, "grad_norm": 0.26695029307583873, "learning_rate": 0.0004934611037848629, "loss": 3.145266532897949, "step": 4071, "token_acc": 0.2827191365813495 }, { "epoch": 2.3869832893579597, "grad_norm": 0.3355222803854897, "learning_rate": 0.0004934555971519872, "loss": 3.1385531425476074, "step": 4072, "token_acc": 0.28259410667814183 }, { "epoch": 2.387569627675169, "grad_norm": 0.32344054787752197, "learning_rate": 0.0004934500882321743, "loss": 3.108651876449585, "step": 4073, "token_acc": 0.28665177504222006 }, { "epoch": 2.3881559659923774, "grad_norm": 0.26285109154248626, "learning_rate": 0.0004934445770254758, "loss": 3.1573333740234375, "step": 4074, "token_acc": 0.2833565374172521 }, { "epoch": 2.3887423043095866, "grad_norm": 0.27335983051668883, "learning_rate": 0.0004934390635319436, "loss": 3.167174816131592, "step": 4075, "token_acc": 0.2813889184302882 }, { "epoch": 2.3893286426267957, "grad_norm": 0.27002179432602424, "learning_rate": 0.0004934335477516295, "loss": 3.1499409675598145, "step": 4076, "token_acc": 0.2825924980862465 }, { "epoch": 2.389914980944005, "grad_norm": 0.27710952457774657, "learning_rate": 0.0004934280296845852, "loss": 3.16397762298584, "step": 4077, "token_acc": 0.28078295437897755 }, { "epoch": 2.390501319261214, "grad_norm": 0.27246124377460496, "learning_rate": 0.0004934225093308625, "loss": 3.145174503326416, "step": 4078, "token_acc": 0.28499365706787755 }, { "epoch": 2.3910876575784226, "grad_norm": 0.26057186861358933, "learning_rate": 0.0004934169866905135, "loss": 3.134951591491699, "step": 4079, "token_acc": 0.2852747110948988 }, { "epoch": 2.3916739958956317, "grad_norm": 0.2798257223702878, "learning_rate": 0.0004934114617635898, "loss": 3.1059961318969727, "step": 4080, "token_acc": 0.28707484761446783 }, { "epoch": 2.392260334212841, "grad_norm": 0.28362520849016537, "learning_rate": 0.0004934059345501435, "loss": 3.142159938812256, "step": 4081, "token_acc": 0.28452248550904785 }, { "epoch": 2.39284667253005, "grad_norm": 0.31383810330243866, "learning_rate": 0.0004934004050502263, "loss": 3.138308048248291, "step": 4082, "token_acc": 0.2843043759658693 }, { "epoch": 2.393433010847259, "grad_norm": 0.3155225989837768, "learning_rate": 0.0004933948732638904, "loss": 3.1475093364715576, "step": 4083, "token_acc": 0.28331787060024777 }, { "epoch": 2.394019349164468, "grad_norm": 0.26086610124537146, "learning_rate": 0.0004933893391911876, "loss": 3.1382765769958496, "step": 4084, "token_acc": 0.28297447708774087 }, { "epoch": 2.394605687481677, "grad_norm": 0.339735441378694, "learning_rate": 0.00049338380283217, "loss": 3.1605401039123535, "step": 4085, "token_acc": 0.2805394061501943 }, { "epoch": 2.395192025798886, "grad_norm": 0.37786700657508204, "learning_rate": 0.0004933782641868894, "loss": 3.1292455196380615, "step": 4086, "token_acc": 0.285063339204911 }, { "epoch": 2.395778364116095, "grad_norm": 0.28721391005681457, "learning_rate": 0.0004933727232553981, "loss": 3.1868762969970703, "step": 4087, "token_acc": 0.2777047283437648 }, { "epoch": 2.396364702433304, "grad_norm": 0.2579902605141577, "learning_rate": 0.0004933671800377479, "loss": 3.104323387145996, "step": 4088, "token_acc": 0.2893102211349132 }, { "epoch": 2.3969510407505132, "grad_norm": 0.3299529748874557, "learning_rate": 0.0004933616345339911, "loss": 3.1254196166992188, "step": 4089, "token_acc": 0.2863896377852757 }, { "epoch": 2.397537379067722, "grad_norm": 0.24838459133821034, "learning_rate": 0.0004933560867441797, "loss": 3.1728739738464355, "step": 4090, "token_acc": 0.2791913904038877 }, { "epoch": 2.398123717384931, "grad_norm": 0.33561991548192066, "learning_rate": 0.0004933505366683657, "loss": 3.1828389167785645, "step": 4091, "token_acc": 0.2781146397364652 }, { "epoch": 2.39871005570214, "grad_norm": 0.2598931955585801, "learning_rate": 0.0004933449843066013, "loss": 3.151083469390869, "step": 4092, "token_acc": 0.28187937067522834 }, { "epoch": 2.3992963940193492, "grad_norm": 0.3425687238866714, "learning_rate": 0.0004933394296589387, "loss": 3.089930772781372, "step": 4093, "token_acc": 0.2900241613112128 }, { "epoch": 2.3998827323365584, "grad_norm": 0.30447962691173036, "learning_rate": 0.0004933338727254301, "loss": 3.1673331260681152, "step": 4094, "token_acc": 0.27943464938315177 }, { "epoch": 2.4004690706537675, "grad_norm": 0.32602099609588964, "learning_rate": 0.0004933283135061277, "loss": 3.165236473083496, "step": 4095, "token_acc": 0.28015353549866967 }, { "epoch": 2.401055408970976, "grad_norm": 0.2977355361720388, "learning_rate": 0.0004933227520010836, "loss": 3.2125751972198486, "step": 4096, "token_acc": 0.274417364379618 }, { "epoch": 2.4016417472881852, "grad_norm": 0.32008640793279763, "learning_rate": 0.0004933171882103501, "loss": 3.150895833969116, "step": 4097, "token_acc": 0.2827747372254651 }, { "epoch": 2.4022280856053944, "grad_norm": 0.3122187543629649, "learning_rate": 0.0004933116221339796, "loss": 3.1602330207824707, "step": 4098, "token_acc": 0.2812706561353062 }, { "epoch": 2.4028144239226035, "grad_norm": 0.307871790964921, "learning_rate": 0.0004933060537720242, "loss": 3.1681432723999023, "step": 4099, "token_acc": 0.28057955507084964 }, { "epoch": 2.403400762239812, "grad_norm": 0.3600218656442076, "learning_rate": 0.0004933004831245364, "loss": 3.167104959487915, "step": 4100, "token_acc": 0.281351371931873 }, { "epoch": 2.4039871005570213, "grad_norm": 0.2857925533151999, "learning_rate": 0.0004932949101915683, "loss": 3.1797680854797363, "step": 4101, "token_acc": 0.2792059015416545 }, { "epoch": 2.4045734388742304, "grad_norm": 0.28310141269076017, "learning_rate": 0.0004932893349731723, "loss": 3.1275579929351807, "step": 4102, "token_acc": 0.28668382569500467 }, { "epoch": 2.4051597771914395, "grad_norm": 0.3953732165627844, "learning_rate": 0.000493283757469401, "loss": 3.146716833114624, "step": 4103, "token_acc": 0.2819510362080858 }, { "epoch": 2.4057461155086486, "grad_norm": 0.34086716300798964, "learning_rate": 0.0004932781776803065, "loss": 3.202929973602295, "step": 4104, "token_acc": 0.27649836255310195 }, { "epoch": 2.4063324538258577, "grad_norm": 0.32721596986891277, "learning_rate": 0.0004932725956059414, "loss": 3.181713342666626, "step": 4105, "token_acc": 0.2793457070439017 }, { "epoch": 2.4069187921430664, "grad_norm": 0.31054398517466103, "learning_rate": 0.000493267011246358, "loss": 3.1325745582580566, "step": 4106, "token_acc": 0.286374695863747 }, { "epoch": 2.4075051304602755, "grad_norm": 0.3606109884193681, "learning_rate": 0.000493261424601609, "loss": 3.123775005340576, "step": 4107, "token_acc": 0.28622277287498493 }, { "epoch": 2.4080914687774846, "grad_norm": 0.3179994979862523, "learning_rate": 0.0004932558356717466, "loss": 3.1152191162109375, "step": 4108, "token_acc": 0.2865733325896355 }, { "epoch": 2.4086778070946937, "grad_norm": 0.275258802316626, "learning_rate": 0.0004932502444568235, "loss": 3.155104160308838, "step": 4109, "token_acc": 0.28337545542705356 }, { "epoch": 2.409264145411903, "grad_norm": 0.3364813915158878, "learning_rate": 0.000493244650956892, "loss": 3.1361351013183594, "step": 4110, "token_acc": 0.2835962558935723 }, { "epoch": 2.4098504837291115, "grad_norm": 0.26292514259308347, "learning_rate": 0.0004932390551720048, "loss": 3.130554676055908, "step": 4111, "token_acc": 0.2851221476510067 }, { "epoch": 2.4104368220463206, "grad_norm": 0.3066565292712497, "learning_rate": 0.0004932334571022145, "loss": 3.152090549468994, "step": 4112, "token_acc": 0.2812216580752684 }, { "epoch": 2.4110231603635297, "grad_norm": 0.25256612744351087, "learning_rate": 0.0004932278567475737, "loss": 3.098417043685913, "step": 4113, "token_acc": 0.29005599791116893 }, { "epoch": 2.411609498680739, "grad_norm": 0.2721411239759966, "learning_rate": 0.0004932222541081348, "loss": 3.145481586456299, "step": 4114, "token_acc": 0.2838695292929558 }, { "epoch": 2.412195836997948, "grad_norm": 0.22599539963413645, "learning_rate": 0.0004932166491839507, "loss": 3.1612601280212402, "step": 4115, "token_acc": 0.2824076501242516 }, { "epoch": 2.412782175315157, "grad_norm": 0.3022587962613591, "learning_rate": 0.0004932110419750738, "loss": 3.177727222442627, "step": 4116, "token_acc": 0.27852007569971754 }, { "epoch": 2.4133685136323657, "grad_norm": 0.30763859944274985, "learning_rate": 0.000493205432481557, "loss": 3.138605833053589, "step": 4117, "token_acc": 0.2863578340757533 }, { "epoch": 2.413954851949575, "grad_norm": 0.2352536566945773, "learning_rate": 0.0004931998207034528, "loss": 3.1431665420532227, "step": 4118, "token_acc": 0.2826046662193807 }, { "epoch": 2.414541190266784, "grad_norm": 0.31237118490587434, "learning_rate": 0.000493194206640814, "loss": 3.173316240310669, "step": 4119, "token_acc": 0.28012032970978834 }, { "epoch": 2.415127528583993, "grad_norm": 0.3134306802686362, "learning_rate": 0.0004931885902936935, "loss": 3.1762430667877197, "step": 4120, "token_acc": 0.27753673434010745 }, { "epoch": 2.415713866901202, "grad_norm": 0.40413384841349204, "learning_rate": 0.0004931829716621438, "loss": 3.1484591960906982, "step": 4121, "token_acc": 0.28246398138665446 }, { "epoch": 2.416300205218411, "grad_norm": 0.3652326662573222, "learning_rate": 0.0004931773507462176, "loss": 3.1374175548553467, "step": 4122, "token_acc": 0.28323764849576066 }, { "epoch": 2.41688654353562, "grad_norm": 0.2787192995217331, "learning_rate": 0.0004931717275459681, "loss": 3.112290143966675, "step": 4123, "token_acc": 0.28707988407282276 }, { "epoch": 2.417472881852829, "grad_norm": 0.3096256363565667, "learning_rate": 0.0004931661020614478, "loss": 3.1806652545928955, "step": 4124, "token_acc": 0.27818994062191155 }, { "epoch": 2.418059220170038, "grad_norm": 0.3263757030294466, "learning_rate": 0.0004931604742927096, "loss": 3.175119400024414, "step": 4125, "token_acc": 0.2779952528302825 }, { "epoch": 2.4186455584872473, "grad_norm": 0.32171825048421016, "learning_rate": 0.0004931548442398065, "loss": 3.155787706375122, "step": 4126, "token_acc": 0.2828748655146307 }, { "epoch": 2.4192318968044564, "grad_norm": 0.3002516736234532, "learning_rate": 0.0004931492119027912, "loss": 3.147311210632324, "step": 4127, "token_acc": 0.281658062556939 }, { "epoch": 2.419818235121665, "grad_norm": 0.2616922706603251, "learning_rate": 0.0004931435772817168, "loss": 3.1236133575439453, "step": 4128, "token_acc": 0.2862291507347622 }, { "epoch": 2.420404573438874, "grad_norm": 0.24445385964476138, "learning_rate": 0.0004931379403766361, "loss": 3.142514228820801, "step": 4129, "token_acc": 0.2830488528844689 }, { "epoch": 2.4209909117560833, "grad_norm": 0.265518764054282, "learning_rate": 0.0004931323011876021, "loss": 3.135782480239868, "step": 4130, "token_acc": 0.28349745898105017 }, { "epoch": 2.4215772500732924, "grad_norm": 0.24828711718407814, "learning_rate": 0.0004931266597146676, "loss": 3.1276228427886963, "step": 4131, "token_acc": 0.2852538544179976 }, { "epoch": 2.4221635883905015, "grad_norm": 0.3161760645363754, "learning_rate": 0.000493121015957886, "loss": 3.1190271377563477, "step": 4132, "token_acc": 0.28724781744087824 }, { "epoch": 2.42274992670771, "grad_norm": 0.3448451434715971, "learning_rate": 0.0004931153699173099, "loss": 3.1582865715026855, "step": 4133, "token_acc": 0.2822012403224307 }, { "epoch": 2.4233362650249193, "grad_norm": 0.2655848173631843, "learning_rate": 0.0004931097215929925, "loss": 3.100529193878174, "step": 4134, "token_acc": 0.28825113553630316 }, { "epoch": 2.4239226033421284, "grad_norm": 0.28147055840140633, "learning_rate": 0.0004931040709849869, "loss": 3.1324245929718018, "step": 4135, "token_acc": 0.2849888342433748 }, { "epoch": 2.4245089416593375, "grad_norm": 0.3863513852128823, "learning_rate": 0.0004930984180933462, "loss": 3.087904453277588, "step": 4136, "token_acc": 0.29149913475572503 }, { "epoch": 2.4250952799765466, "grad_norm": 0.2630892275375379, "learning_rate": 0.0004930927629181234, "loss": 3.1913859844207764, "step": 4137, "token_acc": 0.2769523282204503 }, { "epoch": 2.4256816182937557, "grad_norm": 0.3311874495556526, "learning_rate": 0.0004930871054593716, "loss": 3.158717155456543, "step": 4138, "token_acc": 0.28216183938715783 }, { "epoch": 2.4262679566109644, "grad_norm": 0.26871327413622054, "learning_rate": 0.0004930814457171441, "loss": 3.137910842895508, "step": 4139, "token_acc": 0.285189778824897 }, { "epoch": 2.4268542949281735, "grad_norm": 0.33739768520535274, "learning_rate": 0.0004930757836914939, "loss": 3.1281514167785645, "step": 4140, "token_acc": 0.28614501940356274 }, { "epoch": 2.4274406332453826, "grad_norm": 0.26375577887458684, "learning_rate": 0.0004930701193824744, "loss": 3.1643097400665283, "step": 4141, "token_acc": 0.27949293508582995 }, { "epoch": 2.4280269715625917, "grad_norm": 0.2865114725356215, "learning_rate": 0.0004930644527901385, "loss": 3.124934196472168, "step": 4142, "token_acc": 0.2844657683056152 }, { "epoch": 2.4286133098798004, "grad_norm": 0.2787478182409089, "learning_rate": 0.0004930587839145396, "loss": 3.1285805702209473, "step": 4143, "token_acc": 0.28579032965622025 }, { "epoch": 2.4291996481970095, "grad_norm": 0.30236186931630615, "learning_rate": 0.0004930531127557311, "loss": 3.1423957347869873, "step": 4144, "token_acc": 0.28300767877913263 }, { "epoch": 2.4297859865142186, "grad_norm": 0.24351281275965278, "learning_rate": 0.000493047439313766, "loss": 3.160454034805298, "step": 4145, "token_acc": 0.2823586333037814 }, { "epoch": 2.4303723248314277, "grad_norm": 0.3137301832262234, "learning_rate": 0.0004930417635886976, "loss": 3.1892623901367188, "step": 4146, "token_acc": 0.2764546567817926 }, { "epoch": 2.430958663148637, "grad_norm": 0.33155461279884, "learning_rate": 0.0004930360855805796, "loss": 3.151230812072754, "step": 4147, "token_acc": 0.28088501902305746 }, { "epoch": 2.431545001465846, "grad_norm": 0.3460235188602479, "learning_rate": 0.000493030405289465, "loss": 3.132094383239746, "step": 4148, "token_acc": 0.28577278878724677 }, { "epoch": 2.432131339783055, "grad_norm": 0.28853688060948096, "learning_rate": 0.0004930247227154072, "loss": 3.203066349029541, "step": 4149, "token_acc": 0.27543740967374924 }, { "epoch": 2.4327176781002637, "grad_norm": 0.3128670072739596, "learning_rate": 0.0004930190378584596, "loss": 3.158848285675049, "step": 4150, "token_acc": 0.28278090583011606 }, { "epoch": 2.433304016417473, "grad_norm": 0.2659541699608326, "learning_rate": 0.0004930133507186756, "loss": 3.1860828399658203, "step": 4151, "token_acc": 0.2771219844398126 }, { "epoch": 2.433890354734682, "grad_norm": 0.3610622440174549, "learning_rate": 0.0004930076612961086, "loss": 3.1482510566711426, "step": 4152, "token_acc": 0.28393887748893004 }, { "epoch": 2.434476693051891, "grad_norm": 0.25400727208734536, "learning_rate": 0.0004930019695908121, "loss": 3.0948405265808105, "step": 4153, "token_acc": 0.290667462565655 }, { "epoch": 2.4350630313690997, "grad_norm": 0.3106252673093002, "learning_rate": 0.0004929962756028396, "loss": 3.121525287628174, "step": 4154, "token_acc": 0.2853909780095146 }, { "epoch": 2.435649369686309, "grad_norm": 0.26493612703858604, "learning_rate": 0.0004929905793322445, "loss": 3.1614787578582764, "step": 4155, "token_acc": 0.28172556775887414 }, { "epoch": 2.436235708003518, "grad_norm": 0.29910276962565174, "learning_rate": 0.0004929848807790803, "loss": 3.1297683715820312, "step": 4156, "token_acc": 0.2847041300420812 }, { "epoch": 2.436822046320727, "grad_norm": 0.2485899773683061, "learning_rate": 0.0004929791799434006, "loss": 3.1873116493225098, "step": 4157, "token_acc": 0.2757823804534222 }, { "epoch": 2.437408384637936, "grad_norm": 0.34689503204076666, "learning_rate": 0.0004929734768252589, "loss": 3.1448440551757812, "step": 4158, "token_acc": 0.284070946303469 }, { "epoch": 2.4379947229551453, "grad_norm": 0.2647971573709019, "learning_rate": 0.0004929677714247089, "loss": 3.1847267150878906, "step": 4159, "token_acc": 0.2783791875453027 }, { "epoch": 2.438581061272354, "grad_norm": 0.275487446603698, "learning_rate": 0.000492962063741804, "loss": 3.149015426635742, "step": 4160, "token_acc": 0.2815154490511722 }, { "epoch": 2.439167399589563, "grad_norm": 0.25270398957826845, "learning_rate": 0.000492956353776598, "loss": 3.1334729194641113, "step": 4161, "token_acc": 0.2856022204558189 }, { "epoch": 2.439753737906772, "grad_norm": 0.2986367662498128, "learning_rate": 0.0004929506415291444, "loss": 3.150852680206299, "step": 4162, "token_acc": 0.2820326165792056 }, { "epoch": 2.4403400762239813, "grad_norm": 0.28561022837885724, "learning_rate": 0.0004929449269994967, "loss": 3.1485679149627686, "step": 4163, "token_acc": 0.28192553661289177 }, { "epoch": 2.4409264145411904, "grad_norm": 0.2579067773451289, "learning_rate": 0.000492939210187709, "loss": 3.1455910205841064, "step": 4164, "token_acc": 0.2817013712544439 }, { "epoch": 2.441512752858399, "grad_norm": 0.2649746034316926, "learning_rate": 0.0004929334910938347, "loss": 3.1592254638671875, "step": 4165, "token_acc": 0.28298098277237543 }, { "epoch": 2.442099091175608, "grad_norm": 0.2969163676399799, "learning_rate": 0.0004929277697179277, "loss": 3.135164976119995, "step": 4166, "token_acc": 0.2843562947070902 }, { "epoch": 2.4426854294928173, "grad_norm": 0.3207941388496579, "learning_rate": 0.0004929220460600417, "loss": 3.202554225921631, "step": 4167, "token_acc": 0.27601026665541656 }, { "epoch": 2.4432717678100264, "grad_norm": 0.2677869631433697, "learning_rate": 0.0004929163201202303, "loss": 3.1436314582824707, "step": 4168, "token_acc": 0.2838059332344015 }, { "epoch": 2.4438581061272355, "grad_norm": 0.242170689723494, "learning_rate": 0.0004929105918985474, "loss": 3.1898326873779297, "step": 4169, "token_acc": 0.2769615970048327 }, { "epoch": 2.4444444444444446, "grad_norm": 0.2917618502161134, "learning_rate": 0.0004929048613950468, "loss": 3.1665778160095215, "step": 4170, "token_acc": 0.2817072114968401 }, { "epoch": 2.4450307827616533, "grad_norm": 0.36972532152077836, "learning_rate": 0.0004928991286097825, "loss": 3.1536307334899902, "step": 4171, "token_acc": 0.28326246857639426 }, { "epoch": 2.4456171210788624, "grad_norm": 0.32769026456770717, "learning_rate": 0.000492893393542808, "loss": 3.1668663024902344, "step": 4172, "token_acc": 0.2794675738344151 }, { "epoch": 2.4462034593960715, "grad_norm": 0.21826115470366664, "learning_rate": 0.0004928876561941776, "loss": 3.165198564529419, "step": 4173, "token_acc": 0.2805083001336672 }, { "epoch": 2.4467897977132806, "grad_norm": 0.2525647905124037, "learning_rate": 0.0004928819165639448, "loss": 3.1652674674987793, "step": 4174, "token_acc": 0.2804764754638174 }, { "epoch": 2.4473761360304898, "grad_norm": 0.24973832293172119, "learning_rate": 0.0004928761746521637, "loss": 3.1039247512817383, "step": 4175, "token_acc": 0.29084482354150576 }, { "epoch": 2.4479624743476984, "grad_norm": 0.27811566943399496, "learning_rate": 0.0004928704304588881, "loss": 3.1877975463867188, "step": 4176, "token_acc": 0.2753458884952452 }, { "epoch": 2.4485488126649075, "grad_norm": 0.3572517789804198, "learning_rate": 0.0004928646839841722, "loss": 3.143075466156006, "step": 4177, "token_acc": 0.28347821056893896 }, { "epoch": 2.4491351509821166, "grad_norm": 0.34927544312848724, "learning_rate": 0.0004928589352280699, "loss": 3.150834798812866, "step": 4178, "token_acc": 0.2818185731890414 }, { "epoch": 2.4497214892993258, "grad_norm": 0.26749069012328264, "learning_rate": 0.0004928531841906352, "loss": 3.1356890201568604, "step": 4179, "token_acc": 0.28432401086011017 }, { "epoch": 2.450307827616535, "grad_norm": 0.3163747417639619, "learning_rate": 0.0004928474308719219, "loss": 3.1242334842681885, "step": 4180, "token_acc": 0.2864676402465141 }, { "epoch": 2.450894165933744, "grad_norm": 0.2295863988942232, "learning_rate": 0.0004928416752719843, "loss": 3.1421024799346924, "step": 4181, "token_acc": 0.28301806010633074 }, { "epoch": 2.4514805042509527, "grad_norm": 0.31023578703748433, "learning_rate": 0.0004928359173908765, "loss": 3.1689114570617676, "step": 4182, "token_acc": 0.2804534725362624 }, { "epoch": 2.4520668425681618, "grad_norm": 0.32132829357731, "learning_rate": 0.0004928301572286524, "loss": 3.1850528717041016, "step": 4183, "token_acc": 0.2764554305004034 }, { "epoch": 2.452653180885371, "grad_norm": 0.24807097322166693, "learning_rate": 0.0004928243947853662, "loss": 3.1628308296203613, "step": 4184, "token_acc": 0.2826047806815952 }, { "epoch": 2.45323951920258, "grad_norm": 0.29537704560204525, "learning_rate": 0.000492818630061072, "loss": 3.127662181854248, "step": 4185, "token_acc": 0.28291572093763284 }, { "epoch": 2.453825857519789, "grad_norm": 0.29488447445114074, "learning_rate": 0.000492812863055824, "loss": 3.083648681640625, "step": 4186, "token_acc": 0.2926736682272555 }, { "epoch": 2.4544121958369978, "grad_norm": 0.34296162287324217, "learning_rate": 0.0004928070937696763, "loss": 3.131272315979004, "step": 4187, "token_acc": 0.2848218012067127 }, { "epoch": 2.454998534154207, "grad_norm": 0.3405302602748392, "learning_rate": 0.0004928013222026832, "loss": 3.198709011077881, "step": 4188, "token_acc": 0.27616023002025747 }, { "epoch": 2.455584872471416, "grad_norm": 0.2578411532327479, "learning_rate": 0.0004927955483548989, "loss": 3.1064698696136475, "step": 4189, "token_acc": 0.2882348467564073 }, { "epoch": 2.456171210788625, "grad_norm": 0.32971478825560063, "learning_rate": 0.0004927897722263774, "loss": 3.1714906692504883, "step": 4190, "token_acc": 0.2794472749013635 }, { "epoch": 2.456757549105834, "grad_norm": 0.26703301123693507, "learning_rate": 0.0004927839938171734, "loss": 3.072561740875244, "step": 4191, "token_acc": 0.2914858358163838 }, { "epoch": 2.4573438874230433, "grad_norm": 0.3187975031701169, "learning_rate": 0.0004927782131273408, "loss": 3.099703550338745, "step": 4192, "token_acc": 0.29018246373310086 }, { "epoch": 2.457930225740252, "grad_norm": 0.27841817030302446, "learning_rate": 0.0004927724301569341, "loss": 3.1477863788604736, "step": 4193, "token_acc": 0.28245284791723274 }, { "epoch": 2.458516564057461, "grad_norm": 0.3107033461527404, "learning_rate": 0.0004927666449060075, "loss": 3.137528419494629, "step": 4194, "token_acc": 0.2839246382982208 }, { "epoch": 2.45910290237467, "grad_norm": 0.2937806579561194, "learning_rate": 0.0004927608573746154, "loss": 3.1533889770507812, "step": 4195, "token_acc": 0.2844599211563732 }, { "epoch": 2.4596892406918793, "grad_norm": 0.31881748413630634, "learning_rate": 0.0004927550675628122, "loss": 3.1699368953704834, "step": 4196, "token_acc": 0.279223266715391 }, { "epoch": 2.460275579009088, "grad_norm": 0.2977123082936681, "learning_rate": 0.0004927492754706522, "loss": 3.148170232772827, "step": 4197, "token_acc": 0.2826829798599349 }, { "epoch": 2.460861917326297, "grad_norm": 0.3130183165660392, "learning_rate": 0.0004927434810981898, "loss": 3.1294474601745605, "step": 4198, "token_acc": 0.28655273925681807 }, { "epoch": 2.4614482556435062, "grad_norm": 0.3155202396776548, "learning_rate": 0.0004927376844454797, "loss": 3.1539344787597656, "step": 4199, "token_acc": 0.28121282792763014 }, { "epoch": 2.4620345939607153, "grad_norm": 0.2801842965345091, "learning_rate": 0.0004927318855125761, "loss": 3.0997154712677, "step": 4200, "token_acc": 0.28903814432176034 }, { "epoch": 2.4626209322779244, "grad_norm": 0.30852818632655105, "learning_rate": 0.0004927260842995335, "loss": 3.1385796070098877, "step": 4201, "token_acc": 0.2850856884382837 }, { "epoch": 2.4632072705951336, "grad_norm": 0.3174216567748999, "learning_rate": 0.0004927202808064063, "loss": 3.1967341899871826, "step": 4202, "token_acc": 0.2761190912556066 }, { "epoch": 2.4637936089123427, "grad_norm": 0.3127201734274958, "learning_rate": 0.0004927144750332493, "loss": 3.210827112197876, "step": 4203, "token_acc": 0.27585854750651356 }, { "epoch": 2.4643799472295513, "grad_norm": 0.3100123033233525, "learning_rate": 0.0004927086669801168, "loss": 3.135878086090088, "step": 4204, "token_acc": 0.28531658371703084 }, { "epoch": 2.4649662855467604, "grad_norm": 0.29123300944516933, "learning_rate": 0.0004927028566470633, "loss": 3.159703254699707, "step": 4205, "token_acc": 0.2810755242958033 }, { "epoch": 2.4655526238639696, "grad_norm": 0.3169398102046388, "learning_rate": 0.0004926970440341436, "loss": 3.1234395503997803, "step": 4206, "token_acc": 0.2856850238539722 }, { "epoch": 2.4661389621811787, "grad_norm": 0.32786407189713923, "learning_rate": 0.0004926912291414123, "loss": 3.140085220336914, "step": 4207, "token_acc": 0.28308418852000505 }, { "epoch": 2.4667253004983873, "grad_norm": 0.31909777654113397, "learning_rate": 0.0004926854119689239, "loss": 3.1785402297973633, "step": 4208, "token_acc": 0.2798355511605634 }, { "epoch": 2.4673116388155965, "grad_norm": 0.2603921936728504, "learning_rate": 0.000492679592516733, "loss": 3.1161351203918457, "step": 4209, "token_acc": 0.2866432697570301 }, { "epoch": 2.4678979771328056, "grad_norm": 0.28566648902168706, "learning_rate": 0.0004926737707848944, "loss": 3.146257162094116, "step": 4210, "token_acc": 0.2831503019631466 }, { "epoch": 2.4684843154500147, "grad_norm": 0.3311792571287571, "learning_rate": 0.0004926679467734627, "loss": 3.1663293838500977, "step": 4211, "token_acc": 0.27972298737071877 }, { "epoch": 2.469070653767224, "grad_norm": 0.2920541350305395, "learning_rate": 0.0004926621204824926, "loss": 3.2038497924804688, "step": 4212, "token_acc": 0.2743212331914827 }, { "epoch": 2.469656992084433, "grad_norm": 0.3169651690225659, "learning_rate": 0.000492656291912039, "loss": 3.1694750785827637, "step": 4213, "token_acc": 0.278409294103823 }, { "epoch": 2.4702433304016416, "grad_norm": 0.367698108143396, "learning_rate": 0.0004926504610621564, "loss": 3.1810524463653564, "step": 4214, "token_acc": 0.2770920333765093 }, { "epoch": 2.4708296687188507, "grad_norm": 0.3222032054448611, "learning_rate": 0.0004926446279328998, "loss": 3.1648154258728027, "step": 4215, "token_acc": 0.28176942680004563 }, { "epoch": 2.47141600703606, "grad_norm": 0.2557004144868029, "learning_rate": 0.0004926387925243237, "loss": 3.114588499069214, "step": 4216, "token_acc": 0.28587690098557533 }, { "epoch": 2.472002345353269, "grad_norm": 0.2637862781168629, "learning_rate": 0.0004926329548364833, "loss": 3.1347289085388184, "step": 4217, "token_acc": 0.2854036415979182 }, { "epoch": 2.472588683670478, "grad_norm": 0.24874164791712872, "learning_rate": 0.0004926271148694332, "loss": 3.107349395751953, "step": 4218, "token_acc": 0.2886656426120885 }, { "epoch": 2.4731750219876867, "grad_norm": 0.2732698878771074, "learning_rate": 0.0004926212726232283, "loss": 3.163975715637207, "step": 4219, "token_acc": 0.27964605978619855 }, { "epoch": 2.473761360304896, "grad_norm": 0.42054354776419856, "learning_rate": 0.0004926154280979234, "loss": 3.1351051330566406, "step": 4220, "token_acc": 0.2862769507866716 }, { "epoch": 2.474347698622105, "grad_norm": 0.45850723675087535, "learning_rate": 0.0004926095812935736, "loss": 3.1475491523742676, "step": 4221, "token_acc": 0.2842240729890349 }, { "epoch": 2.474934036939314, "grad_norm": 0.264343481547017, "learning_rate": 0.0004926037322102336, "loss": 3.129584312438965, "step": 4222, "token_acc": 0.28305626802609596 }, { "epoch": 2.475520375256523, "grad_norm": 0.3217916991597029, "learning_rate": 0.0004925978808479585, "loss": 3.157909631729126, "step": 4223, "token_acc": 0.28130903851287087 }, { "epoch": 2.4761067135737322, "grad_norm": 0.2852363283332628, "learning_rate": 0.0004925920272068031, "loss": 3.1636481285095215, "step": 4224, "token_acc": 0.28172602343851005 }, { "epoch": 2.476693051890941, "grad_norm": 0.30036414136795775, "learning_rate": 0.0004925861712868227, "loss": 3.1593754291534424, "step": 4225, "token_acc": 0.28274572690955946 }, { "epoch": 2.47727939020815, "grad_norm": 0.31368061905501227, "learning_rate": 0.0004925803130880719, "loss": 3.1193454265594482, "step": 4226, "token_acc": 0.2875602100261618 }, { "epoch": 2.477865728525359, "grad_norm": 0.2538877932025326, "learning_rate": 0.0004925744526106061, "loss": 3.14658784866333, "step": 4227, "token_acc": 0.2823971455541269 }, { "epoch": 2.4784520668425682, "grad_norm": 0.2971608514508549, "learning_rate": 0.0004925685898544801, "loss": 3.143190383911133, "step": 4228, "token_acc": 0.28430127837903396 }, { "epoch": 2.4790384051597774, "grad_norm": 0.3101850656008902, "learning_rate": 0.0004925627248197491, "loss": 3.1665735244750977, "step": 4229, "token_acc": 0.2808483898786362 }, { "epoch": 2.479624743476986, "grad_norm": 0.2853338373635848, "learning_rate": 0.0004925568575064683, "loss": 3.0559873580932617, "step": 4230, "token_acc": 0.29455190216415117 }, { "epoch": 2.480211081794195, "grad_norm": 0.3079789439127816, "learning_rate": 0.0004925509879146925, "loss": 3.1213254928588867, "step": 4231, "token_acc": 0.2853008330442616 }, { "epoch": 2.4807974201114043, "grad_norm": 0.23365005078398637, "learning_rate": 0.000492545116044477, "loss": 3.1593503952026367, "step": 4232, "token_acc": 0.2822676743336211 }, { "epoch": 2.4813837584286134, "grad_norm": 0.3401747512749149, "learning_rate": 0.0004925392418958771, "loss": 3.1303277015686035, "step": 4233, "token_acc": 0.2849772310789873 }, { "epoch": 2.4819700967458225, "grad_norm": 0.2976141430883711, "learning_rate": 0.0004925333654689477, "loss": 3.1732425689697266, "step": 4234, "token_acc": 0.27814858144897914 }, { "epoch": 2.4825564350630316, "grad_norm": 0.31204318663521496, "learning_rate": 0.0004925274867637442, "loss": 3.134174346923828, "step": 4235, "token_acc": 0.28627241257580627 }, { "epoch": 2.4831427733802403, "grad_norm": 0.27507313707966113, "learning_rate": 0.0004925216057803218, "loss": 3.1691324710845947, "step": 4236, "token_acc": 0.2797478129866184 }, { "epoch": 2.4837291116974494, "grad_norm": 0.2798317718680493, "learning_rate": 0.0004925157225187357, "loss": 3.1303861141204834, "step": 4237, "token_acc": 0.2858177146272178 }, { "epoch": 2.4843154500146585, "grad_norm": 0.32877360599087635, "learning_rate": 0.0004925098369790412, "loss": 3.1254799365997314, "step": 4238, "token_acc": 0.28797540547293404 }, { "epoch": 2.4849017883318676, "grad_norm": 0.2515607816995969, "learning_rate": 0.0004925039491612935, "loss": 3.148189067840576, "step": 4239, "token_acc": 0.28161157755862326 }, { "epoch": 2.4854881266490767, "grad_norm": 0.30336740299206366, "learning_rate": 0.0004924980590655481, "loss": 3.1954843997955322, "step": 4240, "token_acc": 0.27653655229382107 }, { "epoch": 2.4860744649662854, "grad_norm": 0.2463990404723353, "learning_rate": 0.00049249216669186, "loss": 3.1458182334899902, "step": 4241, "token_acc": 0.2816627113502711 }, { "epoch": 2.4866608032834945, "grad_norm": 0.29452863088120024, "learning_rate": 0.0004924862720402849, "loss": 3.1190481185913086, "step": 4242, "token_acc": 0.28853939121323696 }, { "epoch": 2.4872471416007036, "grad_norm": 0.25048427701672565, "learning_rate": 0.000492480375110878, "loss": 3.1333179473876953, "step": 4243, "token_acc": 0.28418590313163 }, { "epoch": 2.4878334799179127, "grad_norm": 0.273340430114594, "learning_rate": 0.0004924744759036948, "loss": 3.1438660621643066, "step": 4244, "token_acc": 0.2840406654443052 }, { "epoch": 2.488419818235122, "grad_norm": 0.3049434276679553, "learning_rate": 0.0004924685744187906, "loss": 3.152846097946167, "step": 4245, "token_acc": 0.2819012336412522 }, { "epoch": 2.489006156552331, "grad_norm": 0.27403403011737354, "learning_rate": 0.0004924626706562208, "loss": 3.125669240951538, "step": 4246, "token_acc": 0.2855597251072077 }, { "epoch": 2.4895924948695396, "grad_norm": 0.28463700493746086, "learning_rate": 0.000492456764616041, "loss": 3.159043312072754, "step": 4247, "token_acc": 0.28121481304267554 }, { "epoch": 2.4901788331867487, "grad_norm": 0.2521780456704807, "learning_rate": 0.0004924508562983066, "loss": 3.139467239379883, "step": 4248, "token_acc": 0.28392874822392344 }, { "epoch": 2.490765171503958, "grad_norm": 0.3112607097123613, "learning_rate": 0.0004924449457030731, "loss": 3.1357169151306152, "step": 4249, "token_acc": 0.282311327050909 }, { "epoch": 2.491351509821167, "grad_norm": 0.2564671993879261, "learning_rate": 0.0004924390328303961, "loss": 3.1473641395568848, "step": 4250, "token_acc": 0.2823216827780433 }, { "epoch": 2.4919378481383756, "grad_norm": 0.3009530481884104, "learning_rate": 0.000492433117680331, "loss": 3.137119770050049, "step": 4251, "token_acc": 0.2849263957323777 }, { "epoch": 2.4925241864555847, "grad_norm": 0.28251510034457167, "learning_rate": 0.0004924272002529334, "loss": 3.1396188735961914, "step": 4252, "token_acc": 0.2830706051058537 }, { "epoch": 2.493110524772794, "grad_norm": 0.3285647140206444, "learning_rate": 0.0004924212805482589, "loss": 3.1661481857299805, "step": 4253, "token_acc": 0.2789961650760448 }, { "epoch": 2.493696863090003, "grad_norm": 0.25808545466235516, "learning_rate": 0.0004924153585663633, "loss": 3.133260726928711, "step": 4254, "token_acc": 0.2866272430450191 }, { "epoch": 2.494283201407212, "grad_norm": 0.3086101211645659, "learning_rate": 0.000492409434307302, "loss": 3.152337074279785, "step": 4255, "token_acc": 0.2832935928213243 }, { "epoch": 2.494869539724421, "grad_norm": 0.26678250462601166, "learning_rate": 0.0004924035077711308, "loss": 3.131521701812744, "step": 4256, "token_acc": 0.2838843577046315 }, { "epoch": 2.49545587804163, "grad_norm": 0.30354118955796067, "learning_rate": 0.000492397578957905, "loss": 3.139087677001953, "step": 4257, "token_acc": 0.2840618438633327 }, { "epoch": 2.496042216358839, "grad_norm": 0.2420116542544201, "learning_rate": 0.0004923916478676808, "loss": 3.1340725421905518, "step": 4258, "token_acc": 0.28425254221180346 }, { "epoch": 2.496628554676048, "grad_norm": 0.2584900077782168, "learning_rate": 0.0004923857145005137, "loss": 3.087541341781616, "step": 4259, "token_acc": 0.28960529272511587 }, { "epoch": 2.497214892993257, "grad_norm": 0.2436011326871065, "learning_rate": 0.0004923797788564595, "loss": 3.101266384124756, "step": 4260, "token_acc": 0.2907542134317991 }, { "epoch": 2.4978012313104663, "grad_norm": 0.26503644145185634, "learning_rate": 0.0004923738409355737, "loss": 3.1852493286132812, "step": 4261, "token_acc": 0.27827252134337566 }, { "epoch": 2.498387569627675, "grad_norm": 0.25518759515027695, "learning_rate": 0.0004923679007379124, "loss": 3.112393617630005, "step": 4262, "token_acc": 0.28702717523120563 }, { "epoch": 2.498973907944884, "grad_norm": 0.24577736625728883, "learning_rate": 0.0004923619582635311, "loss": 3.143197536468506, "step": 4263, "token_acc": 0.28351113967982156 }, { "epoch": 2.499560246262093, "grad_norm": 0.23761577363047687, "learning_rate": 0.0004923560135124859, "loss": 3.1861720085144043, "step": 4264, "token_acc": 0.27916490043663805 }, { "epoch": 2.5001465845793023, "grad_norm": 0.30169261774841344, "learning_rate": 0.0004923500664848326, "loss": 3.134916067123413, "step": 4265, "token_acc": 0.2849277961528534 }, { "epoch": 2.5007329228965114, "grad_norm": 0.261200152779632, "learning_rate": 0.000492344117180627, "loss": 3.130162239074707, "step": 4266, "token_acc": 0.284659194135722 }, { "epoch": 2.5013192612137205, "grad_norm": 0.2868925459183805, "learning_rate": 0.0004923381655999249, "loss": 3.1220574378967285, "step": 4267, "token_acc": 0.2874931987264784 }, { "epoch": 2.5019055995309296, "grad_norm": 0.2297280022925021, "learning_rate": 0.0004923322117427823, "loss": 3.1427063941955566, "step": 4268, "token_acc": 0.2828639955920246 }, { "epoch": 2.5024919378481383, "grad_norm": 0.269069665462166, "learning_rate": 0.0004923262556092551, "loss": 3.1416192054748535, "step": 4269, "token_acc": 0.28258880946287346 }, { "epoch": 2.5030782761653474, "grad_norm": 0.26701035680832236, "learning_rate": 0.0004923202971993993, "loss": 3.179413318634033, "step": 4270, "token_acc": 0.2786554023368152 }, { "epoch": 2.5036646144825565, "grad_norm": 0.2803470675948156, "learning_rate": 0.0004923143365132708, "loss": 3.161494731903076, "step": 4271, "token_acc": 0.2817550447032257 }, { "epoch": 2.5042509527997656, "grad_norm": 0.3277185157340391, "learning_rate": 0.0004923083735509257, "loss": 3.1134235858917236, "step": 4272, "token_acc": 0.2863776215155609 }, { "epoch": 2.5048372911169743, "grad_norm": 0.375287007803871, "learning_rate": 0.0004923024083124199, "loss": 3.1536192893981934, "step": 4273, "token_acc": 0.27926387922533624 }, { "epoch": 2.5054236294341834, "grad_norm": 0.2811102389120282, "learning_rate": 0.0004922964407978094, "loss": 3.1751699447631836, "step": 4274, "token_acc": 0.28195738660539527 }, { "epoch": 2.5060099677513925, "grad_norm": 0.30788889426938676, "learning_rate": 0.0004922904710071505, "loss": 3.1252379417419434, "step": 4275, "token_acc": 0.2863395134054725 }, { "epoch": 2.5065963060686016, "grad_norm": 0.29832754044621906, "learning_rate": 0.000492284498940499, "loss": 3.1817221641540527, "step": 4276, "token_acc": 0.2792992159825311 }, { "epoch": 2.5071826443858107, "grad_norm": 0.29164510630326, "learning_rate": 0.0004922785245979112, "loss": 3.1445181369781494, "step": 4277, "token_acc": 0.2827581154517364 }, { "epoch": 2.50776898270302, "grad_norm": 0.31886656251645734, "learning_rate": 0.000492272547979443, "loss": 3.1670188903808594, "step": 4278, "token_acc": 0.2802357396225322 }, { "epoch": 2.5083553210202285, "grad_norm": 0.2598135189672792, "learning_rate": 0.0004922665690851508, "loss": 3.1491293907165527, "step": 4279, "token_acc": 0.2836048409853921 }, { "epoch": 2.5089416593374376, "grad_norm": 0.2674432332351087, "learning_rate": 0.0004922605879150906, "loss": 3.148040771484375, "step": 4280, "token_acc": 0.28228450886609996 }, { "epoch": 2.5095279976546467, "grad_norm": 0.26942678389811003, "learning_rate": 0.0004922546044693187, "loss": 3.153386354446411, "step": 4281, "token_acc": 0.28247137257813865 }, { "epoch": 2.510114335971856, "grad_norm": 0.261081268127297, "learning_rate": 0.0004922486187478912, "loss": 3.1450023651123047, "step": 4282, "token_acc": 0.28330050904551274 }, { "epoch": 2.5107006742890645, "grad_norm": 0.21410881575339583, "learning_rate": 0.0004922426307508642, "loss": 3.139354944229126, "step": 4283, "token_acc": 0.2834795960802295 }, { "epoch": 2.5112870126062736, "grad_norm": 0.29260667410661473, "learning_rate": 0.0004922366404782943, "loss": 3.158104658126831, "step": 4284, "token_acc": 0.28026898881279244 }, { "epoch": 2.5118733509234827, "grad_norm": 0.29965306649839285, "learning_rate": 0.0004922306479302375, "loss": 3.1704249382019043, "step": 4285, "token_acc": 0.28118961272732085 }, { "epoch": 2.512459689240692, "grad_norm": 0.2567423758958786, "learning_rate": 0.0004922246531067502, "loss": 3.154134511947632, "step": 4286, "token_acc": 0.2834724052922717 }, { "epoch": 2.513046027557901, "grad_norm": 0.26475541496539967, "learning_rate": 0.0004922186560078887, "loss": 3.1556499004364014, "step": 4287, "token_acc": 0.2832360749509433 }, { "epoch": 2.51363236587511, "grad_norm": 0.27687315686815755, "learning_rate": 0.0004922126566337093, "loss": 3.1589155197143555, "step": 4288, "token_acc": 0.2811336162465361 }, { "epoch": 2.514218704192319, "grad_norm": 0.27852264206280625, "learning_rate": 0.0004922066549842683, "loss": 3.1473474502563477, "step": 4289, "token_acc": 0.2828106137775904 }, { "epoch": 2.514805042509528, "grad_norm": 0.2671077484038438, "learning_rate": 0.0004922006510596223, "loss": 3.1173739433288574, "step": 4290, "token_acc": 0.28637902734620985 }, { "epoch": 2.515391380826737, "grad_norm": 0.2326095306515351, "learning_rate": 0.0004921946448598275, "loss": 3.132035255432129, "step": 4291, "token_acc": 0.28393412248548117 }, { "epoch": 2.515977719143946, "grad_norm": 0.22992825667665331, "learning_rate": 0.0004921886363849404, "loss": 3.1505091190338135, "step": 4292, "token_acc": 0.2833363469635523 }, { "epoch": 2.516564057461155, "grad_norm": 0.3042372486612273, "learning_rate": 0.0004921826256350173, "loss": 3.1512975692749023, "step": 4293, "token_acc": 0.2823877258299201 }, { "epoch": 2.517150395778364, "grad_norm": 0.35882623625851784, "learning_rate": 0.0004921766126101149, "loss": 3.1563854217529297, "step": 4294, "token_acc": 0.28189638993980964 }, { "epoch": 2.517736734095573, "grad_norm": 0.2780683516642048, "learning_rate": 0.0004921705973102894, "loss": 3.1225974559783936, "step": 4295, "token_acc": 0.28589753889826197 }, { "epoch": 2.518323072412782, "grad_norm": 0.23816444056334407, "learning_rate": 0.0004921645797355976, "loss": 3.1350255012512207, "step": 4296, "token_acc": 0.2840119983011255 }, { "epoch": 2.518909410729991, "grad_norm": 0.27219288665958735, "learning_rate": 0.000492158559886096, "loss": 3.1376705169677734, "step": 4297, "token_acc": 0.28444319661042905 }, { "epoch": 2.5194957490472003, "grad_norm": 0.23858972281133586, "learning_rate": 0.0004921525377618408, "loss": 3.179051637649536, "step": 4298, "token_acc": 0.27872398362079154 }, { "epoch": 2.5200820873644094, "grad_norm": 0.22971999576466592, "learning_rate": 0.0004921465133628889, "loss": 3.157120704650879, "step": 4299, "token_acc": 0.28111675153510096 }, { "epoch": 2.5206684256816185, "grad_norm": 0.2586473207886974, "learning_rate": 0.0004921404866892969, "loss": 3.1460561752319336, "step": 4300, "token_acc": 0.2825990525027059 }, { "epoch": 2.521254763998827, "grad_norm": 0.2858821864097338, "learning_rate": 0.0004921344577411212, "loss": 3.1245875358581543, "step": 4301, "token_acc": 0.28530609828937953 }, { "epoch": 2.5218411023160363, "grad_norm": 0.30615352040388594, "learning_rate": 0.0004921284265184186, "loss": 3.133049964904785, "step": 4302, "token_acc": 0.2838908506250773 }, { "epoch": 2.5224274406332454, "grad_norm": 0.3007266941146918, "learning_rate": 0.0004921223930212458, "loss": 3.1369903087615967, "step": 4303, "token_acc": 0.2836628372962864 }, { "epoch": 2.5230137789504545, "grad_norm": 0.29510494899516504, "learning_rate": 0.0004921163572496592, "loss": 3.1203293800354004, "step": 4304, "token_acc": 0.28575063682852553 }, { "epoch": 2.523600117267663, "grad_norm": 0.307605869683673, "learning_rate": 0.0004921103192037158, "loss": 3.133833885192871, "step": 4305, "token_acc": 0.2847668324495579 }, { "epoch": 2.5241864555848723, "grad_norm": 0.33189622168709265, "learning_rate": 0.0004921042788834721, "loss": 3.1483256816864014, "step": 4306, "token_acc": 0.28336904862938134 }, { "epoch": 2.5247727939020814, "grad_norm": 0.3084929990974992, "learning_rate": 0.000492098236288985, "loss": 3.1353297233581543, "step": 4307, "token_acc": 0.2829878897732945 }, { "epoch": 2.5253591322192905, "grad_norm": 0.257706446408307, "learning_rate": 0.0004920921914203112, "loss": 3.1635162830352783, "step": 4308, "token_acc": 0.2801346407588288 }, { "epoch": 2.5259454705364996, "grad_norm": 0.3116828843149226, "learning_rate": 0.0004920861442775076, "loss": 3.160191535949707, "step": 4309, "token_acc": 0.28130631334437467 }, { "epoch": 2.5265318088537088, "grad_norm": 0.27230023589998287, "learning_rate": 0.0004920800948606306, "loss": 3.151123523712158, "step": 4310, "token_acc": 0.28097180614868167 }, { "epoch": 2.527118147170918, "grad_norm": 0.3355134609466817, "learning_rate": 0.0004920740431697375, "loss": 3.1164069175720215, "step": 4311, "token_acc": 0.28757651989384275 }, { "epoch": 2.5277044854881265, "grad_norm": 0.3041376186267956, "learning_rate": 0.000492067989204885, "loss": 3.1578617095947266, "step": 4312, "token_acc": 0.28154340251315 }, { "epoch": 2.5282908238053357, "grad_norm": 0.24019866704515144, "learning_rate": 0.0004920619329661299, "loss": 3.129883289337158, "step": 4313, "token_acc": 0.28478083310645247 }, { "epoch": 2.5288771621225448, "grad_norm": 0.3131737085272038, "learning_rate": 0.0004920558744535291, "loss": 3.2153759002685547, "step": 4314, "token_acc": 0.27452384022823034 }, { "epoch": 2.529463500439754, "grad_norm": 0.26758665022490724, "learning_rate": 0.0004920498136671396, "loss": 3.164632797241211, "step": 4315, "token_acc": 0.2815751667521806 }, { "epoch": 2.5300498387569625, "grad_norm": 0.23056215055359155, "learning_rate": 0.0004920437506070182, "loss": 3.144793748855591, "step": 4316, "token_acc": 0.2825833181332569 }, { "epoch": 2.5306361770741717, "grad_norm": 0.30941235034930364, "learning_rate": 0.0004920376852732219, "loss": 3.199553966522217, "step": 4317, "token_acc": 0.27772342451636467 }, { "epoch": 2.5312225153913808, "grad_norm": 0.2523939704555795, "learning_rate": 0.0004920316176658077, "loss": 3.1641182899475098, "step": 4318, "token_acc": 0.28056233904922195 }, { "epoch": 2.53180885370859, "grad_norm": 0.2728309696824672, "learning_rate": 0.0004920255477848327, "loss": 3.166947364807129, "step": 4319, "token_acc": 0.28012700098007187 }, { "epoch": 2.532395192025799, "grad_norm": 0.3066075386046518, "learning_rate": 0.0004920194756303537, "loss": 3.1164848804473877, "step": 4320, "token_acc": 0.28752298541093324 }, { "epoch": 2.532981530343008, "grad_norm": 0.24796660321348632, "learning_rate": 0.0004920134012024279, "loss": 3.1212098598480225, "step": 4321, "token_acc": 0.2860366749728452 }, { "epoch": 2.533567868660217, "grad_norm": 0.251387110834701, "learning_rate": 0.0004920073245011123, "loss": 3.1223530769348145, "step": 4322, "token_acc": 0.28597904590265355 }, { "epoch": 2.534154206977426, "grad_norm": 0.28108959707435394, "learning_rate": 0.000492001245526464, "loss": 3.1054224967956543, "step": 4323, "token_acc": 0.28950577004567685 }, { "epoch": 2.534740545294635, "grad_norm": 0.2644107329424148, "learning_rate": 0.00049199516427854, "loss": 3.155257225036621, "step": 4324, "token_acc": 0.2809893368336828 }, { "epoch": 2.535326883611844, "grad_norm": 0.30989145078981273, "learning_rate": 0.0004919890807573977, "loss": 3.115139961242676, "step": 4325, "token_acc": 0.28814040756762394 }, { "epoch": 2.535913221929053, "grad_norm": 0.2977263044324904, "learning_rate": 0.000491982994963094, "loss": 3.1034939289093018, "step": 4326, "token_acc": 0.2877057090613572 }, { "epoch": 2.536499560246262, "grad_norm": 0.25112086160966424, "learning_rate": 0.0004919769068956861, "loss": 3.1442034244537354, "step": 4327, "token_acc": 0.2845234549998058 }, { "epoch": 2.537085898563471, "grad_norm": 0.31922932359624206, "learning_rate": 0.0004919708165552312, "loss": 3.122307777404785, "step": 4328, "token_acc": 0.28650051247453845 }, { "epoch": 2.53767223688068, "grad_norm": 0.26706980693055804, "learning_rate": 0.0004919647239417866, "loss": 3.1440606117248535, "step": 4329, "token_acc": 0.28275191048588166 }, { "epoch": 2.538258575197889, "grad_norm": 0.2865069420660975, "learning_rate": 0.0004919586290554095, "loss": 3.1797022819519043, "step": 4330, "token_acc": 0.2780855468029681 }, { "epoch": 2.5388449135150983, "grad_norm": 0.3025230886770115, "learning_rate": 0.000491952531896157, "loss": 3.170433521270752, "step": 4331, "token_acc": 0.2793197264837018 }, { "epoch": 2.5394312518323074, "grad_norm": 0.3101695780173901, "learning_rate": 0.0004919464324640866, "loss": 3.1348390579223633, "step": 4332, "token_acc": 0.28437338149559765 }, { "epoch": 2.540017590149516, "grad_norm": 0.24404758732667856, "learning_rate": 0.0004919403307592554, "loss": 3.09989857673645, "step": 4333, "token_acc": 0.28917855599894676 }, { "epoch": 2.5406039284667252, "grad_norm": 0.28855930517661665, "learning_rate": 0.000491934226781721, "loss": 3.152952194213867, "step": 4334, "token_acc": 0.28245550232173255 }, { "epoch": 2.5411902667839343, "grad_norm": 0.29319788305875377, "learning_rate": 0.0004919281205315405, "loss": 3.1324281692504883, "step": 4335, "token_acc": 0.2852023473576193 }, { "epoch": 2.5417766051011434, "grad_norm": 0.2703004688269168, "learning_rate": 0.0004919220120087711, "loss": 3.1648545265197754, "step": 4336, "token_acc": 0.2789870135906686 }, { "epoch": 2.542362943418352, "grad_norm": 0.2600531071819018, "learning_rate": 0.0004919159012134706, "loss": 3.1146669387817383, "step": 4337, "token_acc": 0.2864504712170046 }, { "epoch": 2.5429492817355612, "grad_norm": 0.23898266094751827, "learning_rate": 0.0004919097881456962, "loss": 3.1948297023773193, "step": 4338, "token_acc": 0.2776180339327699 }, { "epoch": 2.5435356200527703, "grad_norm": 0.2822356106364129, "learning_rate": 0.0004919036728055052, "loss": 3.1372992992401123, "step": 4339, "token_acc": 0.28257266814603343 }, { "epoch": 2.5441219583699795, "grad_norm": 0.3289886647941623, "learning_rate": 0.0004918975551929552, "loss": 3.126556396484375, "step": 4340, "token_acc": 0.2858095921187254 }, { "epoch": 2.5447082966871886, "grad_norm": 0.3096503368096305, "learning_rate": 0.0004918914353081036, "loss": 3.1287105083465576, "step": 4341, "token_acc": 0.2855772104509776 }, { "epoch": 2.5452946350043977, "grad_norm": 0.2427602573518604, "learning_rate": 0.000491885313151008, "loss": 3.1497249603271484, "step": 4342, "token_acc": 0.28301823863388553 }, { "epoch": 2.545880973321607, "grad_norm": 0.2649246447511227, "learning_rate": 0.0004918791887217258, "loss": 3.15787410736084, "step": 4343, "token_acc": 0.2806290879921017 }, { "epoch": 2.5464673116388155, "grad_norm": 0.3688002878427219, "learning_rate": 0.0004918730620203145, "loss": 3.137702465057373, "step": 4344, "token_acc": 0.2836198924895081 }, { "epoch": 2.5470536499560246, "grad_norm": 0.392547880010125, "learning_rate": 0.0004918669330468318, "loss": 3.130627393722534, "step": 4345, "token_acc": 0.2848764703899693 }, { "epoch": 2.5476399882732337, "grad_norm": 0.29446639611721936, "learning_rate": 0.0004918608018013352, "loss": 3.1768155097961426, "step": 4346, "token_acc": 0.2789353937940636 }, { "epoch": 2.548226326590443, "grad_norm": 0.291551473048645, "learning_rate": 0.0004918546682838822, "loss": 3.210007667541504, "step": 4347, "token_acc": 0.27479441616307004 }, { "epoch": 2.5488126649076515, "grad_norm": 0.27398888875997945, "learning_rate": 0.0004918485324945305, "loss": 3.166940689086914, "step": 4348, "token_acc": 0.2788000719268848 }, { "epoch": 2.5493990032248606, "grad_norm": 0.23234282120035715, "learning_rate": 0.0004918423944333378, "loss": 3.1179120540618896, "step": 4349, "token_acc": 0.2852429490609624 }, { "epoch": 2.5499853415420697, "grad_norm": 0.32550691217598576, "learning_rate": 0.0004918362541003616, "loss": 3.1616291999816895, "step": 4350, "token_acc": 0.2825448280125688 }, { "epoch": 2.550571679859279, "grad_norm": 0.29218977882083025, "learning_rate": 0.0004918301114956597, "loss": 3.1603875160217285, "step": 4351, "token_acc": 0.27943635187068316 }, { "epoch": 2.551158018176488, "grad_norm": 0.27048560077512346, "learning_rate": 0.0004918239666192898, "loss": 3.149303913116455, "step": 4352, "token_acc": 0.2831882286648519 }, { "epoch": 2.551744356493697, "grad_norm": 0.28268715558857466, "learning_rate": 0.0004918178194713096, "loss": 3.125880479812622, "step": 4353, "token_acc": 0.28484007427782687 }, { "epoch": 2.552330694810906, "grad_norm": 0.2531251929638439, "learning_rate": 0.0004918116700517767, "loss": 3.127319097518921, "step": 4354, "token_acc": 0.2846300730131456 }, { "epoch": 2.552917033128115, "grad_norm": 0.29445173254114065, "learning_rate": 0.0004918055183607492, "loss": 3.1871931552886963, "step": 4355, "token_acc": 0.2765448368475908 }, { "epoch": 2.553503371445324, "grad_norm": 0.295090610181979, "learning_rate": 0.0004917993643982846, "loss": 3.1349101066589355, "step": 4356, "token_acc": 0.28367573378176664 }, { "epoch": 2.554089709762533, "grad_norm": 0.3102836198682593, "learning_rate": 0.0004917932081644408, "loss": 3.1398916244506836, "step": 4357, "token_acc": 0.2838782306862423 }, { "epoch": 2.554676048079742, "grad_norm": 0.3197076737471242, "learning_rate": 0.0004917870496592756, "loss": 3.1072349548339844, "step": 4358, "token_acc": 0.28849589371917683 }, { "epoch": 2.555262386396951, "grad_norm": 0.28351448872883833, "learning_rate": 0.000491780888882847, "loss": 3.167807102203369, "step": 4359, "token_acc": 0.2801222651390479 }, { "epoch": 2.55584872471416, "grad_norm": 0.32487168369859104, "learning_rate": 0.0004917747258352126, "loss": 3.1269595623016357, "step": 4360, "token_acc": 0.2838945916392414 }, { "epoch": 2.556435063031369, "grad_norm": 0.3970360988267621, "learning_rate": 0.0004917685605164306, "loss": 3.1138930320739746, "step": 4361, "token_acc": 0.28848098371484465 }, { "epoch": 2.557021401348578, "grad_norm": 0.3180796567733429, "learning_rate": 0.0004917623929265587, "loss": 3.1905202865600586, "step": 4362, "token_acc": 0.2761942215929946 }, { "epoch": 2.5576077396657872, "grad_norm": 0.30116545051003024, "learning_rate": 0.0004917562230656548, "loss": 3.1863551139831543, "step": 4363, "token_acc": 0.2761850431829338 }, { "epoch": 2.5581940779829964, "grad_norm": 0.3121216005670908, "learning_rate": 0.0004917500509337772, "loss": 3.1712117195129395, "step": 4364, "token_acc": 0.27866050093872674 }, { "epoch": 2.5587804163002055, "grad_norm": 0.26634649353292333, "learning_rate": 0.0004917438765309834, "loss": 3.137951612472534, "step": 4365, "token_acc": 0.283734290453863 }, { "epoch": 2.559366754617414, "grad_norm": 0.24120987487077813, "learning_rate": 0.0004917376998573316, "loss": 3.17258882522583, "step": 4366, "token_acc": 0.27733272450379975 }, { "epoch": 2.5599530929346233, "grad_norm": 0.2567590142408795, "learning_rate": 0.00049173152091288, "loss": 3.128509998321533, "step": 4367, "token_acc": 0.2842827044627466 }, { "epoch": 2.5605394312518324, "grad_norm": 0.24021257412820615, "learning_rate": 0.0004917253396976865, "loss": 3.163233757019043, "step": 4368, "token_acc": 0.2794036898748078 }, { "epoch": 2.5611257695690415, "grad_norm": 0.2520996973701851, "learning_rate": 0.0004917191562118091, "loss": 3.1782047748565674, "step": 4369, "token_acc": 0.28018436873747493 }, { "epoch": 2.56171210788625, "grad_norm": 0.24363076301500283, "learning_rate": 0.0004917129704553059, "loss": 3.178955554962158, "step": 4370, "token_acc": 0.27945157395933606 }, { "epoch": 2.5622984462034593, "grad_norm": 0.22057607909545476, "learning_rate": 0.0004917067824282352, "loss": 3.144303798675537, "step": 4371, "token_acc": 0.28172306033924305 }, { "epoch": 2.5628847845206684, "grad_norm": 0.25323519492898083, "learning_rate": 0.0004917005921306549, "loss": 3.144503355026245, "step": 4372, "token_acc": 0.2823282095781166 }, { "epoch": 2.5634711228378775, "grad_norm": 0.2689643226080944, "learning_rate": 0.0004916943995626232, "loss": 3.168996810913086, "step": 4373, "token_acc": 0.2808877528979768 }, { "epoch": 2.5640574611550866, "grad_norm": 0.26419153182321536, "learning_rate": 0.0004916882047241984, "loss": 3.151325225830078, "step": 4374, "token_acc": 0.2825032859206434 }, { "epoch": 2.5646437994722957, "grad_norm": 0.25188691483540815, "learning_rate": 0.0004916820076154386, "loss": 3.123016834259033, "step": 4375, "token_acc": 0.285173876694759 }, { "epoch": 2.565230137789505, "grad_norm": 0.2808546357122756, "learning_rate": 0.0004916758082364019, "loss": 3.184295177459717, "step": 4376, "token_acc": 0.27758669504944267 }, { "epoch": 2.5658164761067135, "grad_norm": 0.32797327680642174, "learning_rate": 0.0004916696065871466, "loss": 3.1421308517456055, "step": 4377, "token_acc": 0.283045190226603 }, { "epoch": 2.5664028144239226, "grad_norm": 0.2875666664334995, "learning_rate": 0.0004916634026677311, "loss": 3.1357574462890625, "step": 4378, "token_acc": 0.28307711084779724 }, { "epoch": 2.5669891527411317, "grad_norm": 0.24565212969135664, "learning_rate": 0.0004916571964782136, "loss": 3.1166491508483887, "step": 4379, "token_acc": 0.2866507009660146 }, { "epoch": 2.567575491058341, "grad_norm": 0.28666651684881717, "learning_rate": 0.0004916509880186524, "loss": 3.1321258544921875, "step": 4380, "token_acc": 0.28443416449217446 }, { "epoch": 2.5681618293755495, "grad_norm": 0.25411362451029323, "learning_rate": 0.0004916447772891058, "loss": 3.180919647216797, "step": 4381, "token_acc": 0.27737039412150877 }, { "epoch": 2.5687481676927586, "grad_norm": 0.2654262243211257, "learning_rate": 0.000491638564289632, "loss": 3.1119225025177, "step": 4382, "token_acc": 0.28647425706249235 }, { "epoch": 2.5693345060099677, "grad_norm": 0.2956362442028171, "learning_rate": 0.0004916323490202895, "loss": 3.140455722808838, "step": 4383, "token_acc": 0.2842919337950291 }, { "epoch": 2.569920844327177, "grad_norm": 0.2787255526271086, "learning_rate": 0.0004916261314811368, "loss": 3.159057378768921, "step": 4384, "token_acc": 0.28144780727569363 }, { "epoch": 2.570507182644386, "grad_norm": 0.29825703158711053, "learning_rate": 0.0004916199116722322, "loss": 3.140352249145508, "step": 4385, "token_acc": 0.28457807049122164 }, { "epoch": 2.571093520961595, "grad_norm": 0.3370955933004504, "learning_rate": 0.000491613689593634, "loss": 3.1583242416381836, "step": 4386, "token_acc": 0.28399039757628664 }, { "epoch": 2.5716798592788037, "grad_norm": 0.3459928664116189, "learning_rate": 0.0004916074652454009, "loss": 3.162193536758423, "step": 4387, "token_acc": 0.27937162908571056 }, { "epoch": 2.572266197596013, "grad_norm": 0.2754947627012599, "learning_rate": 0.0004916012386275913, "loss": 3.155233383178711, "step": 4388, "token_acc": 0.282756577819121 }, { "epoch": 2.572852535913222, "grad_norm": 0.30894625579211454, "learning_rate": 0.0004915950097402633, "loss": 3.135875701904297, "step": 4389, "token_acc": 0.28430345653149786 }, { "epoch": 2.573438874230431, "grad_norm": 0.2916061934671588, "learning_rate": 0.000491588778583476, "loss": 3.167969226837158, "step": 4390, "token_acc": 0.2800733775453164 }, { "epoch": 2.5740252125476397, "grad_norm": 0.2746631274025691, "learning_rate": 0.0004915825451572877, "loss": 3.141489267349243, "step": 4391, "token_acc": 0.28375773432080054 }, { "epoch": 2.574611550864849, "grad_norm": 0.35073951974094186, "learning_rate": 0.0004915763094617566, "loss": 3.14787220954895, "step": 4392, "token_acc": 0.2813030989294992 }, { "epoch": 2.575197889182058, "grad_norm": 0.23869780654082637, "learning_rate": 0.0004915700714969419, "loss": 3.1046342849731445, "step": 4393, "token_acc": 0.2888057037309147 }, { "epoch": 2.575784227499267, "grad_norm": 0.3491423395944009, "learning_rate": 0.0004915638312629019, "loss": 3.1581149101257324, "step": 4394, "token_acc": 0.2826405829199384 }, { "epoch": 2.576370565816476, "grad_norm": 0.2584341283336134, "learning_rate": 0.0004915575887596952, "loss": 3.1420466899871826, "step": 4395, "token_acc": 0.283997259309204 }, { "epoch": 2.5769569041336853, "grad_norm": 0.3204523717019629, "learning_rate": 0.0004915513439873804, "loss": 3.17549467086792, "step": 4396, "token_acc": 0.27775441331842565 }, { "epoch": 2.5775432424508944, "grad_norm": 0.2666313238784185, "learning_rate": 0.0004915450969460161, "loss": 3.2244763374328613, "step": 4397, "token_acc": 0.27085624133479946 }, { "epoch": 2.578129580768103, "grad_norm": 0.33805956710405566, "learning_rate": 0.0004915388476356612, "loss": 3.1516635417938232, "step": 4398, "token_acc": 0.2814144346770011 }, { "epoch": 2.578715919085312, "grad_norm": 0.284274750803578, "learning_rate": 0.0004915325960563743, "loss": 3.1569743156433105, "step": 4399, "token_acc": 0.2820393220338983 }, { "epoch": 2.5793022574025213, "grad_norm": 0.28096357487119394, "learning_rate": 0.0004915263422082143, "loss": 3.160341739654541, "step": 4400, "token_acc": 0.281220405978507 }, { "epoch": 2.5798885957197304, "grad_norm": 0.2524062721918786, "learning_rate": 0.0004915200860912396, "loss": 3.1140899658203125, "step": 4401, "token_acc": 0.2871440185104121 }, { "epoch": 2.580474934036939, "grad_norm": 0.26410048382613177, "learning_rate": 0.0004915138277055091, "loss": 3.1460094451904297, "step": 4402, "token_acc": 0.2843677027658139 }, { "epoch": 2.581061272354148, "grad_norm": 0.26842801518707626, "learning_rate": 0.0004915075670510817, "loss": 3.15571665763855, "step": 4403, "token_acc": 0.2813728956751519 }, { "epoch": 2.5816476106713573, "grad_norm": 0.22599120906159506, "learning_rate": 0.0004915013041280162, "loss": 3.145167827606201, "step": 4404, "token_acc": 0.28293628526245984 }, { "epoch": 2.5822339489885664, "grad_norm": 0.2680456036466516, "learning_rate": 0.0004914950389363713, "loss": 3.0857837200164795, "step": 4405, "token_acc": 0.2903688240815301 }, { "epoch": 2.5828202873057755, "grad_norm": 0.25367921515288877, "learning_rate": 0.0004914887714762059, "loss": 3.1026771068573, "step": 4406, "token_acc": 0.28792235801581595 }, { "epoch": 2.5834066256229846, "grad_norm": 0.224020983410606, "learning_rate": 0.0004914825017475789, "loss": 3.1019716262817383, "step": 4407, "token_acc": 0.2892546379031572 }, { "epoch": 2.5839929639401937, "grad_norm": 0.35348210079093173, "learning_rate": 0.0004914762297505493, "loss": 3.1458561420440674, "step": 4408, "token_acc": 0.2848810867578234 }, { "epoch": 2.5845793022574024, "grad_norm": 0.35172599239333713, "learning_rate": 0.0004914699554851759, "loss": 3.116210460662842, "step": 4409, "token_acc": 0.28672572216816844 }, { "epoch": 2.5851656405746115, "grad_norm": 0.27237675230111746, "learning_rate": 0.0004914636789515177, "loss": 3.184718608856201, "step": 4410, "token_acc": 0.27708282555082936 }, { "epoch": 2.5857519788918206, "grad_norm": 0.35901935134222934, "learning_rate": 0.0004914574001496334, "loss": 3.087034225463867, "step": 4411, "token_acc": 0.28986774452243463 }, { "epoch": 2.5863383172090297, "grad_norm": 0.27319016904507337, "learning_rate": 0.0004914511190795824, "loss": 3.1788759231567383, "step": 4412, "token_acc": 0.2789790825864739 }, { "epoch": 2.5869246555262384, "grad_norm": 0.3054525436587608, "learning_rate": 0.0004914448357414234, "loss": 3.113358736038208, "step": 4413, "token_acc": 0.287545482465501 }, { "epoch": 2.5875109938434475, "grad_norm": 0.2946710647765237, "learning_rate": 0.0004914385501352156, "loss": 3.1616268157958984, "step": 4414, "token_acc": 0.28040992323810926 }, { "epoch": 2.5880973321606566, "grad_norm": 0.24425756015108044, "learning_rate": 0.0004914322622610178, "loss": 3.122652053833008, "step": 4415, "token_acc": 0.28602584490385 }, { "epoch": 2.5886836704778657, "grad_norm": 0.27362701657532407, "learning_rate": 0.0004914259721188894, "loss": 3.119075298309326, "step": 4416, "token_acc": 0.28688217128352167 }, { "epoch": 2.589270008795075, "grad_norm": 0.3248267226259104, "learning_rate": 0.0004914196797088892, "loss": 3.1667394638061523, "step": 4417, "token_acc": 0.28041669281455917 }, { "epoch": 2.589856347112284, "grad_norm": 0.2881265303388993, "learning_rate": 0.0004914133850310765, "loss": 3.193389892578125, "step": 4418, "token_acc": 0.27687412797141703 }, { "epoch": 2.590442685429493, "grad_norm": 0.46019423683328586, "learning_rate": 0.0004914070880855103, "loss": 3.122049331665039, "step": 4419, "token_acc": 0.2854857207950065 }, { "epoch": 2.5910290237467017, "grad_norm": 0.3019953627677614, "learning_rate": 0.0004914007888722498, "loss": 3.1416196823120117, "step": 4420, "token_acc": 0.28437696644197974 }, { "epoch": 2.591615362063911, "grad_norm": 0.30044737737454075, "learning_rate": 0.0004913944873913543, "loss": 3.133695602416992, "step": 4421, "token_acc": 0.28447698418879974 }, { "epoch": 2.59220170038112, "grad_norm": 0.29566621773957935, "learning_rate": 0.0004913881836428827, "loss": 3.142026901245117, "step": 4422, "token_acc": 0.28393396389565123 }, { "epoch": 2.592788038698329, "grad_norm": 0.26370836990969015, "learning_rate": 0.0004913818776268946, "loss": 3.0946197509765625, "step": 4423, "token_acc": 0.2903073821192994 }, { "epoch": 2.5933743770155377, "grad_norm": 0.23893545713360817, "learning_rate": 0.0004913755693434489, "loss": 3.1701855659484863, "step": 4424, "token_acc": 0.2790072950790258 }, { "epoch": 2.593960715332747, "grad_norm": 0.2880617578520256, "learning_rate": 0.0004913692587926049, "loss": 3.151846408843994, "step": 4425, "token_acc": 0.28204308826480184 }, { "epoch": 2.594547053649956, "grad_norm": 0.3080166873136557, "learning_rate": 0.0004913629459744221, "loss": 3.1573777198791504, "step": 4426, "token_acc": 0.282134699066657 }, { "epoch": 2.595133391967165, "grad_norm": 0.4263050394717753, "learning_rate": 0.0004913566308889596, "loss": 3.1153945922851562, "step": 4427, "token_acc": 0.2855707562885224 }, { "epoch": 2.595719730284374, "grad_norm": 0.4198320197813092, "learning_rate": 0.0004913503135362768, "loss": 3.1211605072021484, "step": 4428, "token_acc": 0.2853663867923315 }, { "epoch": 2.5963060686015833, "grad_norm": 0.3121289616724346, "learning_rate": 0.000491343993916433, "loss": 3.1287055015563965, "step": 4429, "token_acc": 0.2866549661817965 }, { "epoch": 2.5968924069187924, "grad_norm": 0.37980093114027763, "learning_rate": 0.0004913376720294876, "loss": 3.154170513153076, "step": 4430, "token_acc": 0.282718697612616 }, { "epoch": 2.597478745236001, "grad_norm": 0.32986146997532834, "learning_rate": 0.0004913313478755, "loss": 3.160292148590088, "step": 4431, "token_acc": 0.2803236878846377 }, { "epoch": 2.59806508355321, "grad_norm": 0.30078869465495656, "learning_rate": 0.0004913250214545296, "loss": 3.174564838409424, "step": 4432, "token_acc": 0.27713408403630896 }, { "epoch": 2.5986514218704193, "grad_norm": 0.2927607369519846, "learning_rate": 0.0004913186927666359, "loss": 3.1422476768493652, "step": 4433, "token_acc": 0.2825182240600222 }, { "epoch": 2.5992377601876284, "grad_norm": 0.28119368962595204, "learning_rate": 0.0004913123618118781, "loss": 3.1367149353027344, "step": 4434, "token_acc": 0.28194494974036977 }, { "epoch": 2.599824098504837, "grad_norm": 0.34073429017926676, "learning_rate": 0.0004913060285903159, "loss": 3.13165020942688, "step": 4435, "token_acc": 0.28559732568845736 }, { "epoch": 2.600410436822046, "grad_norm": 0.28835942904131184, "learning_rate": 0.0004912996931020087, "loss": 3.1303820610046387, "step": 4436, "token_acc": 0.2839435110373109 }, { "epoch": 2.6009967751392553, "grad_norm": 0.33422063794693274, "learning_rate": 0.0004912933553470161, "loss": 3.1056551933288574, "step": 4437, "token_acc": 0.2876120082230906 }, { "epoch": 2.6015831134564644, "grad_norm": 0.26221060762296367, "learning_rate": 0.0004912870153253975, "loss": 3.14431095123291, "step": 4438, "token_acc": 0.2829661102996884 }, { "epoch": 2.6021694517736735, "grad_norm": 0.26298528222746703, "learning_rate": 0.0004912806730372126, "loss": 3.122894763946533, "step": 4439, "token_acc": 0.2842636195811974 }, { "epoch": 2.6027557900908826, "grad_norm": 0.2723007755709516, "learning_rate": 0.0004912743284825209, "loss": 3.1369009017944336, "step": 4440, "token_acc": 0.2831264802393087 }, { "epoch": 2.6033421284080913, "grad_norm": 0.28391584247640217, "learning_rate": 0.0004912679816613819, "loss": 3.1534619331359863, "step": 4441, "token_acc": 0.2831015915435194 }, { "epoch": 2.6039284667253004, "grad_norm": 0.3486956423617713, "learning_rate": 0.0004912616325738554, "loss": 3.181530714035034, "step": 4442, "token_acc": 0.27839960801727176 }, { "epoch": 2.6045148050425095, "grad_norm": 0.24043476879509992, "learning_rate": 0.000491255281220001, "loss": 3.1551170349121094, "step": 4443, "token_acc": 0.28177229649577573 }, { "epoch": 2.6051011433597187, "grad_norm": 0.2691725427432372, "learning_rate": 0.0004912489275998783, "loss": 3.121718168258667, "step": 4444, "token_acc": 0.28634862070444894 }, { "epoch": 2.6056874816769273, "grad_norm": 0.24070049599901105, "learning_rate": 0.000491242571713547, "loss": 3.171643018722534, "step": 4445, "token_acc": 0.27780545060488676 }, { "epoch": 2.6062738199941364, "grad_norm": 0.25732795886890575, "learning_rate": 0.0004912362135610668, "loss": 3.1481659412384033, "step": 4446, "token_acc": 0.28013366836774345 }, { "epoch": 2.6068601583113455, "grad_norm": 0.2791351599449322, "learning_rate": 0.0004912298531424974, "loss": 3.1447200775146484, "step": 4447, "token_acc": 0.2829630944951182 }, { "epoch": 2.6074464966285547, "grad_norm": 0.28419388709359794, "learning_rate": 0.0004912234904578987, "loss": 3.1735267639160156, "step": 4448, "token_acc": 0.2799769963241822 }, { "epoch": 2.6080328349457638, "grad_norm": 0.25093391769960843, "learning_rate": 0.0004912171255073303, "loss": 3.111140251159668, "step": 4449, "token_acc": 0.28830905426765696 }, { "epoch": 2.608619173262973, "grad_norm": 0.30633934772606325, "learning_rate": 0.000491210758290852, "loss": 3.118685007095337, "step": 4450, "token_acc": 0.28502878829232786 }, { "epoch": 2.609205511580182, "grad_norm": 0.29473364943918695, "learning_rate": 0.0004912043888085238, "loss": 3.1386444568634033, "step": 4451, "token_acc": 0.2846476812888272 }, { "epoch": 2.6097918498973907, "grad_norm": 0.22865924863579715, "learning_rate": 0.0004911980170604054, "loss": 3.1420087814331055, "step": 4452, "token_acc": 0.28462879213700426 }, { "epoch": 2.6103781882145998, "grad_norm": 0.3062297392443326, "learning_rate": 0.0004911916430465565, "loss": 3.132032871246338, "step": 4453, "token_acc": 0.28367317195966985 }, { "epoch": 2.610964526531809, "grad_norm": 0.27935605673554037, "learning_rate": 0.0004911852667670373, "loss": 3.1432809829711914, "step": 4454, "token_acc": 0.28329692368298537 }, { "epoch": 2.611550864849018, "grad_norm": 0.2606252275923648, "learning_rate": 0.0004911788882219074, "loss": 3.1206002235412598, "step": 4455, "token_acc": 0.2868433651725518 }, { "epoch": 2.6121372031662267, "grad_norm": 0.3117186211315846, "learning_rate": 0.0004911725074112268, "loss": 3.11965274810791, "step": 4456, "token_acc": 0.28574816892548605 }, { "epoch": 2.6127235414834358, "grad_norm": 0.22470593157282528, "learning_rate": 0.0004911661243350555, "loss": 3.1618447303771973, "step": 4457, "token_acc": 0.2801963846649009 }, { "epoch": 2.613309879800645, "grad_norm": 0.2632122378022257, "learning_rate": 0.0004911597389934535, "loss": 3.1404547691345215, "step": 4458, "token_acc": 0.28278369041660173 }, { "epoch": 2.613896218117854, "grad_norm": 0.35880362859860043, "learning_rate": 0.0004911533513864806, "loss": 3.1570546627044678, "step": 4459, "token_acc": 0.2813147297254671 }, { "epoch": 2.614482556435063, "grad_norm": 0.2977747547382781, "learning_rate": 0.0004911469615141971, "loss": 3.1947293281555176, "step": 4460, "token_acc": 0.2768346140230316 }, { "epoch": 2.615068894752272, "grad_norm": 0.24002059815786428, "learning_rate": 0.0004911405693766627, "loss": 3.151607036590576, "step": 4461, "token_acc": 0.2811408059973317 }, { "epoch": 2.6156552330694813, "grad_norm": 0.304247785226504, "learning_rate": 0.0004911341749739376, "loss": 3.1220548152923584, "step": 4462, "token_acc": 0.28436413204036465 }, { "epoch": 2.61624157138669, "grad_norm": 0.25596117557087283, "learning_rate": 0.000491127778306082, "loss": 3.101673126220703, "step": 4463, "token_acc": 0.2883029202095894 }, { "epoch": 2.616827909703899, "grad_norm": 0.3132204177191324, "learning_rate": 0.0004911213793731557, "loss": 3.0839123725891113, "step": 4464, "token_acc": 0.29173899469247583 }, { "epoch": 2.6174142480211082, "grad_norm": 0.3007098071653364, "learning_rate": 0.000491114978175219, "loss": 3.160425901412964, "step": 4465, "token_acc": 0.2801504728925216 }, { "epoch": 2.6180005863383173, "grad_norm": 0.22513742705747958, "learning_rate": 0.000491108574712332, "loss": 3.1495208740234375, "step": 4466, "token_acc": 0.28447036498729233 }, { "epoch": 2.618586924655526, "grad_norm": 0.22017635742880062, "learning_rate": 0.0004911021689845549, "loss": 3.159933090209961, "step": 4467, "token_acc": 0.2820490794032586 }, { "epoch": 2.619173262972735, "grad_norm": 0.2315654701100227, "learning_rate": 0.0004910957609919476, "loss": 3.1120617389678955, "step": 4468, "token_acc": 0.2877209897039034 }, { "epoch": 2.6197596012899442, "grad_norm": 0.23733525453108914, "learning_rate": 0.0004910893507345707, "loss": 3.1168088912963867, "step": 4469, "token_acc": 0.28717847815976816 }, { "epoch": 2.6203459396071533, "grad_norm": 0.2677714802000277, "learning_rate": 0.0004910829382124842, "loss": 3.130603551864624, "step": 4470, "token_acc": 0.28436955014392873 }, { "epoch": 2.6209322779243625, "grad_norm": 0.25160877364280415, "learning_rate": 0.0004910765234257483, "loss": 3.113950252532959, "step": 4471, "token_acc": 0.28772502755439444 }, { "epoch": 2.6215186162415716, "grad_norm": 0.29844949998234555, "learning_rate": 0.0004910701063744233, "loss": 3.1047306060791016, "step": 4472, "token_acc": 0.2892114949000129 }, { "epoch": 2.6221049545587807, "grad_norm": 0.2557769128204677, "learning_rate": 0.0004910636870585697, "loss": 3.136532783508301, "step": 4473, "token_acc": 0.2848639523389279 }, { "epoch": 2.6226912928759893, "grad_norm": 0.28125834517679876, "learning_rate": 0.0004910572654782474, "loss": 3.191502571105957, "step": 4474, "token_acc": 0.2778229019406246 }, { "epoch": 2.6232776311931985, "grad_norm": 0.3083182493921153, "learning_rate": 0.0004910508416335168, "loss": 3.1188063621520996, "step": 4475, "token_acc": 0.286374023106807 }, { "epoch": 2.6238639695104076, "grad_norm": 0.35976571815258557, "learning_rate": 0.0004910444155244386, "loss": 3.1428544521331787, "step": 4476, "token_acc": 0.2816737451429604 }, { "epoch": 2.6244503078276167, "grad_norm": 0.29486408886399273, "learning_rate": 0.0004910379871510728, "loss": 3.1631898880004883, "step": 4477, "token_acc": 0.2803576952874016 }, { "epoch": 2.6250366461448253, "grad_norm": 0.30970291992441107, "learning_rate": 0.00049103155651348, "loss": 3.103642702102661, "step": 4478, "token_acc": 0.28936055198861754 }, { "epoch": 2.6256229844620345, "grad_norm": 0.3844130125718855, "learning_rate": 0.0004910251236117205, "loss": 3.139253616333008, "step": 4479, "token_acc": 0.2838493683858395 }, { "epoch": 2.6262093227792436, "grad_norm": 0.2741551505095675, "learning_rate": 0.0004910186884458548, "loss": 3.124657154083252, "step": 4480, "token_acc": 0.2861188779651844 }, { "epoch": 2.6267956610964527, "grad_norm": 0.3487253124713831, "learning_rate": 0.0004910122510159431, "loss": 3.1688432693481445, "step": 4481, "token_acc": 0.28037247745813654 }, { "epoch": 2.627381999413662, "grad_norm": 0.27541561221725075, "learning_rate": 0.0004910058113220462, "loss": 3.126811981201172, "step": 4482, "token_acc": 0.2853793730600316 }, { "epoch": 2.627968337730871, "grad_norm": 0.28073892105092135, "learning_rate": 0.0004909993693642245, "loss": 3.119013786315918, "step": 4483, "token_acc": 0.28459656530107524 }, { "epoch": 2.62855467604808, "grad_norm": 0.30762876799147587, "learning_rate": 0.0004909929251425384, "loss": 3.1422858238220215, "step": 4484, "token_acc": 0.2828757375550569 }, { "epoch": 2.6291410143652887, "grad_norm": 0.2680298117940531, "learning_rate": 0.0004909864786570486, "loss": 3.1751420497894287, "step": 4485, "token_acc": 0.27877686780578304 }, { "epoch": 2.629727352682498, "grad_norm": 0.32606785842895286, "learning_rate": 0.0004909800299078155, "loss": 3.134639263153076, "step": 4486, "token_acc": 0.2844809220200886 }, { "epoch": 2.630313690999707, "grad_norm": 0.2682071001707932, "learning_rate": 0.0004909735788948998, "loss": 3.130958080291748, "step": 4487, "token_acc": 0.28611310926122524 }, { "epoch": 2.630900029316916, "grad_norm": 0.2816885342904494, "learning_rate": 0.000490967125618362, "loss": 3.1387391090393066, "step": 4488, "token_acc": 0.28391692417943903 }, { "epoch": 2.6314863676341247, "grad_norm": 0.2583700103019815, "learning_rate": 0.0004909606700782628, "loss": 3.1681928634643555, "step": 4489, "token_acc": 0.2795932397745884 }, { "epoch": 2.632072705951334, "grad_norm": 0.2766910374733494, "learning_rate": 0.0004909542122746627, "loss": 3.098881959915161, "step": 4490, "token_acc": 0.2877975518015923 }, { "epoch": 2.632659044268543, "grad_norm": 0.2534875242029691, "learning_rate": 0.0004909477522076225, "loss": 3.1765336990356445, "step": 4491, "token_acc": 0.2798009304338418 }, { "epoch": 2.633245382585752, "grad_norm": 0.2725296466325662, "learning_rate": 0.000490941289877203, "loss": 3.2209014892578125, "step": 4492, "token_acc": 0.2736889231313836 }, { "epoch": 2.633831720902961, "grad_norm": 0.26538549354139856, "learning_rate": 0.0004909348252834646, "loss": 3.117403507232666, "step": 4493, "token_acc": 0.2869374032710085 }, { "epoch": 2.6344180592201702, "grad_norm": 0.2516765555451947, "learning_rate": 0.0004909283584264683, "loss": 3.129274606704712, "step": 4494, "token_acc": 0.2839846720067092 }, { "epoch": 2.635004397537379, "grad_norm": 0.24847335417218613, "learning_rate": 0.0004909218893062745, "loss": 3.1543307304382324, "step": 4495, "token_acc": 0.2817410233234806 }, { "epoch": 2.635590735854588, "grad_norm": 0.24730646124994307, "learning_rate": 0.0004909154179229444, "loss": 3.1373813152313232, "step": 4496, "token_acc": 0.28473536689454254 }, { "epoch": 2.636177074171797, "grad_norm": 0.23717947376181753, "learning_rate": 0.0004909089442765385, "loss": 3.1270108222961426, "step": 4497, "token_acc": 0.28454830174415463 }, { "epoch": 2.6367634124890063, "grad_norm": 0.25913042795801317, "learning_rate": 0.0004909024683671178, "loss": 3.1273627281188965, "step": 4498, "token_acc": 0.28646070500193854 }, { "epoch": 2.637349750806215, "grad_norm": 0.280644386101006, "learning_rate": 0.0004908959901947428, "loss": 3.142911195755005, "step": 4499, "token_acc": 0.2829405019747739 }, { "epoch": 2.637936089123424, "grad_norm": 0.2214587242617182, "learning_rate": 0.0004908895097594749, "loss": 3.1260838508605957, "step": 4500, "token_acc": 0.28624982668728105 }, { "epoch": 2.638522427440633, "grad_norm": 0.25632378980001186, "learning_rate": 0.0004908830270613744, "loss": 3.1828646659851074, "step": 4501, "token_acc": 0.2787432326878013 }, { "epoch": 2.6391087657578423, "grad_norm": 0.2555062690669222, "learning_rate": 0.0004908765421005026, "loss": 3.1138601303100586, "step": 4502, "token_acc": 0.287056427597072 }, { "epoch": 2.6396951040750514, "grad_norm": 0.2222247197526058, "learning_rate": 0.0004908700548769202, "loss": 3.1409080028533936, "step": 4503, "token_acc": 0.28470590667896956 }, { "epoch": 2.6402814423922605, "grad_norm": 0.25779015462960087, "learning_rate": 0.0004908635653906882, "loss": 3.103044033050537, "step": 4504, "token_acc": 0.2884303317734016 }, { "epoch": 2.6408677807094696, "grad_norm": 0.30082323503877456, "learning_rate": 0.0004908570736418676, "loss": 3.1434521675109863, "step": 4505, "token_acc": 0.2826840757915941 }, { "epoch": 2.6414541190266783, "grad_norm": 0.3376745401628351, "learning_rate": 0.0004908505796305194, "loss": 3.1410765647888184, "step": 4506, "token_acc": 0.28173394586661005 }, { "epoch": 2.6420404573438874, "grad_norm": 0.28979422282689443, "learning_rate": 0.0004908440833567045, "loss": 3.1467819213867188, "step": 4507, "token_acc": 0.28107786514332966 }, { "epoch": 2.6426267956610965, "grad_norm": 0.24807991038313268, "learning_rate": 0.000490837584820484, "loss": 3.1270833015441895, "step": 4508, "token_acc": 0.28527554218575274 }, { "epoch": 2.6432131339783056, "grad_norm": 0.27778995883019647, "learning_rate": 0.0004908310840219189, "loss": 3.161132335662842, "step": 4509, "token_acc": 0.2799487815613621 }, { "epoch": 2.6437994722955143, "grad_norm": 0.268342976070708, "learning_rate": 0.0004908245809610703, "loss": 3.1399142742156982, "step": 4510, "token_acc": 0.28241428560238335 }, { "epoch": 2.6443858106127234, "grad_norm": 0.25109664777641616, "learning_rate": 0.0004908180756379993, "loss": 3.1628198623657227, "step": 4511, "token_acc": 0.28025493874007873 }, { "epoch": 2.6449721489299325, "grad_norm": 0.23209720426909075, "learning_rate": 0.0004908115680527669, "loss": 3.1590206623077393, "step": 4512, "token_acc": 0.28098680759043393 }, { "epoch": 2.6455584872471416, "grad_norm": 0.26099288299564416, "learning_rate": 0.0004908050582054344, "loss": 3.161895751953125, "step": 4513, "token_acc": 0.28008871725026874 }, { "epoch": 2.6461448255643507, "grad_norm": 0.3045617781248633, "learning_rate": 0.0004907985460960629, "loss": 3.1411805152893066, "step": 4514, "token_acc": 0.2816012266033438 }, { "epoch": 2.64673116388156, "grad_norm": 0.29232976335955796, "learning_rate": 0.0004907920317247134, "loss": 3.1482903957366943, "step": 4515, "token_acc": 0.2811057809490903 }, { "epoch": 2.647317502198769, "grad_norm": 0.26193951830338924, "learning_rate": 0.0004907855150914473, "loss": 3.163036584854126, "step": 4516, "token_acc": 0.2807180900592201 }, { "epoch": 2.6479038405159776, "grad_norm": 0.3172406383327414, "learning_rate": 0.0004907789961963258, "loss": 3.1367228031158447, "step": 4517, "token_acc": 0.2838660108156319 }, { "epoch": 2.6484901788331867, "grad_norm": 0.3356648337440435, "learning_rate": 0.00049077247503941, "loss": 3.155352830886841, "step": 4518, "token_acc": 0.28238370303673127 }, { "epoch": 2.649076517150396, "grad_norm": 0.28835283834228015, "learning_rate": 0.0004907659516207614, "loss": 3.153813600540161, "step": 4519, "token_acc": 0.2808702361234362 }, { "epoch": 2.649662855467605, "grad_norm": 0.249357663473599, "learning_rate": 0.000490759425940441, "loss": 3.129187822341919, "step": 4520, "token_acc": 0.2851964559688678 }, { "epoch": 2.6502491937848136, "grad_norm": 0.2513732061273853, "learning_rate": 0.0004907528979985103, "loss": 3.1271328926086426, "step": 4521, "token_acc": 0.285695754931134 }, { "epoch": 2.6508355321020227, "grad_norm": 0.26490295100569117, "learning_rate": 0.0004907463677950305, "loss": 3.1428842544555664, "step": 4522, "token_acc": 0.28249287509623083 }, { "epoch": 2.651421870419232, "grad_norm": 0.32061621389090744, "learning_rate": 0.0004907398353300628, "loss": 3.1661429405212402, "step": 4523, "token_acc": 0.2799314402326207 }, { "epoch": 2.652008208736441, "grad_norm": 0.3562661299822593, "learning_rate": 0.000490733300603669, "loss": 3.120821952819824, "step": 4524, "token_acc": 0.28763738171948217 }, { "epoch": 2.65259454705365, "grad_norm": 0.30949062056630505, "learning_rate": 0.0004907267636159102, "loss": 3.143404722213745, "step": 4525, "token_acc": 0.28132522064723187 }, { "epoch": 2.653180885370859, "grad_norm": 0.29257220407784473, "learning_rate": 0.0004907202243668477, "loss": 3.1127982139587402, "step": 4526, "token_acc": 0.28731092167670813 }, { "epoch": 2.6537672236880683, "grad_norm": 0.4171464378019305, "learning_rate": 0.0004907136828565432, "loss": 3.166630268096924, "step": 4527, "token_acc": 0.28050423890506293 }, { "epoch": 2.654353562005277, "grad_norm": 0.3093010026319293, "learning_rate": 0.000490707139085058, "loss": 3.1947901248931885, "step": 4528, "token_acc": 0.2760164947143863 }, { "epoch": 2.654939900322486, "grad_norm": 0.30458489838463393, "learning_rate": 0.0004907005930524536, "loss": 3.1313133239746094, "step": 4529, "token_acc": 0.2837124737230479 }, { "epoch": 2.655526238639695, "grad_norm": 0.26862086438482335, "learning_rate": 0.0004906940447587914, "loss": 3.140035629272461, "step": 4530, "token_acc": 0.2858398388381949 }, { "epoch": 2.6561125769569043, "grad_norm": 0.26053079928759193, "learning_rate": 0.000490687494204133, "loss": 3.1293764114379883, "step": 4531, "token_acc": 0.2839868643028419 }, { "epoch": 2.656698915274113, "grad_norm": 0.29246436654778357, "learning_rate": 0.0004906809413885399, "loss": 3.1862375736236572, "step": 4532, "token_acc": 0.27907232329297355 }, { "epoch": 2.657285253591322, "grad_norm": 0.22689675109451865, "learning_rate": 0.0004906743863120737, "loss": 3.149813175201416, "step": 4533, "token_acc": 0.28147393897219286 }, { "epoch": 2.657871591908531, "grad_norm": 0.25834291866456627, "learning_rate": 0.0004906678289747959, "loss": 3.1429402828216553, "step": 4534, "token_acc": 0.28302363293191907 }, { "epoch": 2.6584579302257403, "grad_norm": 0.21242558766448594, "learning_rate": 0.0004906612693767683, "loss": 3.176623582839966, "step": 4535, "token_acc": 0.2799581496390007 }, { "epoch": 2.6590442685429494, "grad_norm": 0.23368878507318006, "learning_rate": 0.0004906547075180523, "loss": 3.102163314819336, "step": 4536, "token_acc": 0.2891551334547957 }, { "epoch": 2.6596306068601585, "grad_norm": 0.21627783741282997, "learning_rate": 0.0004906481433987096, "loss": 3.152431011199951, "step": 4537, "token_acc": 0.28096652832866253 }, { "epoch": 2.660216945177367, "grad_norm": 0.23332895128180198, "learning_rate": 0.0004906415770188019, "loss": 3.1362998485565186, "step": 4538, "token_acc": 0.28523859138425156 }, { "epoch": 2.6608032834945763, "grad_norm": 0.27227170624125596, "learning_rate": 0.0004906350083783907, "loss": 3.1955220699310303, "step": 4539, "token_acc": 0.27691811160892404 }, { "epoch": 2.6613896218117854, "grad_norm": 0.28780642673917695, "learning_rate": 0.000490628437477538, "loss": 3.1884398460388184, "step": 4540, "token_acc": 0.2774921809433601 }, { "epoch": 2.6619759601289945, "grad_norm": 0.2975992585428022, "learning_rate": 0.0004906218643163054, "loss": 3.144155263900757, "step": 4541, "token_acc": 0.2835971296249173 }, { "epoch": 2.6625622984462036, "grad_norm": 0.24173241915494628, "learning_rate": 0.0004906152888947545, "loss": 3.1317138671875, "step": 4542, "token_acc": 0.2846501195131679 }, { "epoch": 2.6631486367634123, "grad_norm": 0.23947610065546424, "learning_rate": 0.0004906087112129474, "loss": 3.094043731689453, "step": 4543, "token_acc": 0.28916481600630956 }, { "epoch": 2.6637349750806214, "grad_norm": 0.24447339786300398, "learning_rate": 0.0004906021312709455, "loss": 3.1463050842285156, "step": 4544, "token_acc": 0.2831048531906726 }, { "epoch": 2.6643213133978305, "grad_norm": 0.2389278386984395, "learning_rate": 0.0004905955490688108, "loss": 3.164994955062866, "step": 4545, "token_acc": 0.2803373551674138 }, { "epoch": 2.6649076517150396, "grad_norm": 0.25485866213899294, "learning_rate": 0.0004905889646066052, "loss": 3.1323490142822266, "step": 4546, "token_acc": 0.2866416998714396 }, { "epoch": 2.6654939900322487, "grad_norm": 0.2814253221596995, "learning_rate": 0.0004905823778843905, "loss": 3.167964220046997, "step": 4547, "token_acc": 0.27959950331690847 }, { "epoch": 2.666080328349458, "grad_norm": 0.28802634302210167, "learning_rate": 0.0004905757889022284, "loss": 3.1675868034362793, "step": 4548, "token_acc": 0.2797365161464191 }, { "epoch": 2.6666666666666665, "grad_norm": 0.2531289993130524, "learning_rate": 0.0004905691976601811, "loss": 3.1019842624664307, "step": 4549, "token_acc": 0.2889767596246556 }, { "epoch": 2.6672530049838756, "grad_norm": 0.23377109164904225, "learning_rate": 0.0004905626041583103, "loss": 3.1520907878875732, "step": 4550, "token_acc": 0.281039814706986 }, { "epoch": 2.6678393433010847, "grad_norm": 0.3563028375675292, "learning_rate": 0.0004905560083966781, "loss": 3.1506714820861816, "step": 4551, "token_acc": 0.2815287429598057 }, { "epoch": 2.668425681618294, "grad_norm": 0.36833642449254844, "learning_rate": 0.0004905494103753462, "loss": 3.1473548412323, "step": 4552, "token_acc": 0.2816553328705106 }, { "epoch": 2.6690120199355025, "grad_norm": 0.350467542647802, "learning_rate": 0.0004905428100943767, "loss": 3.1355807781219482, "step": 4553, "token_acc": 0.2826411200863908 }, { "epoch": 2.6695983582527116, "grad_norm": 0.26992650613140834, "learning_rate": 0.0004905362075538317, "loss": 3.1598753929138184, "step": 4554, "token_acc": 0.28076013013901213 }, { "epoch": 2.6701846965699207, "grad_norm": 0.22727169119500837, "learning_rate": 0.0004905296027537732, "loss": 3.1340034008026123, "step": 4555, "token_acc": 0.28252492599798623 }, { "epoch": 2.67077103488713, "grad_norm": 0.29201391013962075, "learning_rate": 0.0004905229956942632, "loss": 3.1311614513397217, "step": 4556, "token_acc": 0.28352003012250715 }, { "epoch": 2.671357373204339, "grad_norm": 0.29073266274637083, "learning_rate": 0.0004905163863753638, "loss": 3.180220365524292, "step": 4557, "token_acc": 0.27701957290765666 }, { "epoch": 2.671943711521548, "grad_norm": 0.28905171135397717, "learning_rate": 0.000490509774797137, "loss": 3.122464179992676, "step": 4558, "token_acc": 0.2874867031894316 }, { "epoch": 2.672530049838757, "grad_norm": 0.26325935928183564, "learning_rate": 0.0004905031609596449, "loss": 3.1714792251586914, "step": 4559, "token_acc": 0.27866096271025603 }, { "epoch": 2.673116388155966, "grad_norm": 0.2375904715772175, "learning_rate": 0.0004904965448629497, "loss": 3.125209093093872, "step": 4560, "token_acc": 0.2841763703917682 }, { "epoch": 2.673702726473175, "grad_norm": 0.23241632054675374, "learning_rate": 0.0004904899265071136, "loss": 3.161773681640625, "step": 4561, "token_acc": 0.2812694291293833 }, { "epoch": 2.674289064790384, "grad_norm": 0.3073570840791119, "learning_rate": 0.0004904833058921987, "loss": 3.1765480041503906, "step": 4562, "token_acc": 0.2782499826482507 }, { "epoch": 2.674875403107593, "grad_norm": 0.38672690543600824, "learning_rate": 0.0004904766830182672, "loss": 3.187138080596924, "step": 4563, "token_acc": 0.2763471387647178 }, { "epoch": 2.675461741424802, "grad_norm": 0.2825192954051753, "learning_rate": 0.0004904700578853813, "loss": 3.1743669509887695, "step": 4564, "token_acc": 0.28068595043425376 }, { "epoch": 2.676048079742011, "grad_norm": 0.2615401041372074, "learning_rate": 0.0004904634304936031, "loss": 3.1787073612213135, "step": 4565, "token_acc": 0.2784521762974994 }, { "epoch": 2.67663441805922, "grad_norm": 0.33596458728121636, "learning_rate": 0.0004904568008429951, "loss": 3.1512062549591064, "step": 4566, "token_acc": 0.28083209509658247 }, { "epoch": 2.677220756376429, "grad_norm": 0.24547796900119753, "learning_rate": 0.0004904501689336195, "loss": 3.1426455974578857, "step": 4567, "token_acc": 0.283611214953271 }, { "epoch": 2.6778070946936383, "grad_norm": 0.2746946277535087, "learning_rate": 0.0004904435347655386, "loss": 3.2009925842285156, "step": 4568, "token_acc": 0.27325966447467914 }, { "epoch": 2.6783934330108474, "grad_norm": 0.2846392281299993, "learning_rate": 0.0004904368983388147, "loss": 3.1447372436523438, "step": 4569, "token_acc": 0.28445944856281213 }, { "epoch": 2.6789797713280565, "grad_norm": 0.25004107825723876, "learning_rate": 0.0004904302596535101, "loss": 3.1360507011413574, "step": 4570, "token_acc": 0.2825544050703015 }, { "epoch": 2.679566109645265, "grad_norm": 0.3093188081340262, "learning_rate": 0.0004904236187096871, "loss": 3.1342620849609375, "step": 4571, "token_acc": 0.2833793690681398 }, { "epoch": 2.6801524479624743, "grad_norm": 0.2983771198475082, "learning_rate": 0.0004904169755074083, "loss": 3.1560590267181396, "step": 4572, "token_acc": 0.2817252867186055 }, { "epoch": 2.6807387862796834, "grad_norm": 0.2611308246275719, "learning_rate": 0.000490410330046736, "loss": 3.109617233276367, "step": 4573, "token_acc": 0.2881494254743429 }, { "epoch": 2.6813251245968925, "grad_norm": 0.27114589782545484, "learning_rate": 0.0004904036823277326, "loss": 3.0936973094940186, "step": 4574, "token_acc": 0.2890335292028972 }, { "epoch": 2.681911462914101, "grad_norm": 0.302814370706426, "learning_rate": 0.0004903970323504604, "loss": 3.0958058834075928, "step": 4575, "token_acc": 0.28960546201494286 }, { "epoch": 2.6824978012313103, "grad_norm": 0.26994595165391616, "learning_rate": 0.0004903903801149822, "loss": 3.1496384143829346, "step": 4576, "token_acc": 0.28364542112351604 }, { "epoch": 2.6830841395485194, "grad_norm": 0.2281576336401177, "learning_rate": 0.0004903837256213603, "loss": 3.1455273628234863, "step": 4577, "token_acc": 0.2829372112360179 }, { "epoch": 2.6836704778657285, "grad_norm": 0.2955263179322018, "learning_rate": 0.000490377068869657, "loss": 3.111093044281006, "step": 4578, "token_acc": 0.2876562908245966 }, { "epoch": 2.6842568161829377, "grad_norm": 0.2849666885021753, "learning_rate": 0.0004903704098599352, "loss": 3.1576688289642334, "step": 4579, "token_acc": 0.2804136094211035 }, { "epoch": 2.6848431545001468, "grad_norm": 0.26541886628902084, "learning_rate": 0.0004903637485922574, "loss": 3.169520139694214, "step": 4580, "token_acc": 0.27816290705741376 }, { "epoch": 2.685429492817356, "grad_norm": 0.33017831212644083, "learning_rate": 0.000490357085066686, "loss": 3.171154499053955, "step": 4581, "token_acc": 0.27867526398631576 }, { "epoch": 2.6860158311345645, "grad_norm": 0.2925579502614907, "learning_rate": 0.0004903504192832836, "loss": 3.2160425186157227, "step": 4582, "token_acc": 0.2737966590411443 }, { "epoch": 2.6866021694517737, "grad_norm": 0.258739454691168, "learning_rate": 0.000490343751242113, "loss": 3.1443850994110107, "step": 4583, "token_acc": 0.28243598265438546 }, { "epoch": 2.6871885077689828, "grad_norm": 0.2750951372524947, "learning_rate": 0.0004903370809432366, "loss": 3.1276283264160156, "step": 4584, "token_acc": 0.2859750997297645 }, { "epoch": 2.687774846086192, "grad_norm": 0.28674035075631255, "learning_rate": 0.0004903304083867173, "loss": 3.131162643432617, "step": 4585, "token_acc": 0.2853075519688919 }, { "epoch": 2.6883611844034006, "grad_norm": 0.324133447793151, "learning_rate": 0.0004903237335726177, "loss": 3.173985481262207, "step": 4586, "token_acc": 0.2799420759839349 }, { "epoch": 2.6889475227206097, "grad_norm": 0.3442888658921696, "learning_rate": 0.0004903170565010003, "loss": 3.1555871963500977, "step": 4587, "token_acc": 0.2800369874630892 }, { "epoch": 2.6895338610378188, "grad_norm": 0.2159301624043389, "learning_rate": 0.000490310377171928, "loss": 3.1447908878326416, "step": 4588, "token_acc": 0.2826173820989704 }, { "epoch": 2.690120199355028, "grad_norm": 0.28127233528871115, "learning_rate": 0.0004903036955854637, "loss": 3.107553005218506, "step": 4589, "token_acc": 0.28865429811139903 }, { "epoch": 2.690706537672237, "grad_norm": 0.21470824304125144, "learning_rate": 0.0004902970117416697, "loss": 3.15939998626709, "step": 4590, "token_acc": 0.2798994241469104 }, { "epoch": 2.691292875989446, "grad_norm": 0.29766054153090554, "learning_rate": 0.0004902903256406093, "loss": 3.1213431358337402, "step": 4591, "token_acc": 0.2871884084984325 }, { "epoch": 2.6918792143066548, "grad_norm": 0.2256292701692895, "learning_rate": 0.000490283637282345, "loss": 3.1266729831695557, "step": 4592, "token_acc": 0.28570079093968126 }, { "epoch": 2.692465552623864, "grad_norm": 0.28789147900489287, "learning_rate": 0.0004902769466669398, "loss": 3.156118392944336, "step": 4593, "token_acc": 0.28119190152687157 }, { "epoch": 2.693051890941073, "grad_norm": 0.23820974197935682, "learning_rate": 0.0004902702537944565, "loss": 3.078068494796753, "step": 4594, "token_acc": 0.29140178943094863 }, { "epoch": 2.693638229258282, "grad_norm": 0.27718561567821953, "learning_rate": 0.0004902635586649578, "loss": 3.116590738296509, "step": 4595, "token_acc": 0.2861796729055213 }, { "epoch": 2.6942245675754912, "grad_norm": 0.2979731067236039, "learning_rate": 0.0004902568612785067, "loss": 3.1874887943267822, "step": 4596, "token_acc": 0.2770002344522482 }, { "epoch": 2.6948109058927, "grad_norm": 0.2677133286999762, "learning_rate": 0.0004902501616351663, "loss": 3.160142660140991, "step": 4597, "token_acc": 0.2797148151731426 }, { "epoch": 2.695397244209909, "grad_norm": 0.3073354786713019, "learning_rate": 0.0004902434597349993, "loss": 3.149329900741577, "step": 4598, "token_acc": 0.2825338782533878 }, { "epoch": 2.695983582527118, "grad_norm": 0.2414383481175744, "learning_rate": 0.0004902367555780688, "loss": 3.155968427658081, "step": 4599, "token_acc": 0.28039113456688974 }, { "epoch": 2.6965699208443272, "grad_norm": 0.3153028703425432, "learning_rate": 0.0004902300491644376, "loss": 3.101053476333618, "step": 4600, "token_acc": 0.2883945882265929 }, { "epoch": 2.6971562591615363, "grad_norm": 0.23440077949971044, "learning_rate": 0.0004902233404941688, "loss": 3.142268657684326, "step": 4601, "token_acc": 0.28147943276677057 }, { "epoch": 2.6977425974787455, "grad_norm": 0.29717901517589745, "learning_rate": 0.0004902166295673255, "loss": 3.1640563011169434, "step": 4602, "token_acc": 0.2795592460842005 }, { "epoch": 2.698328935795954, "grad_norm": 0.28572570348458276, "learning_rate": 0.0004902099163839706, "loss": 3.1878528594970703, "step": 4603, "token_acc": 0.27611366033003676 }, { "epoch": 2.6989152741131632, "grad_norm": 0.2597955684897106, "learning_rate": 0.0004902032009441672, "loss": 3.1218440532684326, "step": 4604, "token_acc": 0.2876744446279847 }, { "epoch": 2.6995016124303723, "grad_norm": 0.2844928918649005, "learning_rate": 0.0004901964832479785, "loss": 3.07356595993042, "step": 4605, "token_acc": 0.2941308197062806 }, { "epoch": 2.7000879507475815, "grad_norm": 0.24699663085655166, "learning_rate": 0.0004901897632954673, "loss": 3.1109657287597656, "step": 4606, "token_acc": 0.2858095687831323 }, { "epoch": 2.70067428906479, "grad_norm": 0.26351736270608545, "learning_rate": 0.0004901830410866972, "loss": 3.118032455444336, "step": 4607, "token_acc": 0.2863909863923088 }, { "epoch": 2.7012606273819992, "grad_norm": 0.23599287588106688, "learning_rate": 0.0004901763166217309, "loss": 3.1285157203674316, "step": 4608, "token_acc": 0.28603870819373955 }, { "epoch": 2.7018469656992083, "grad_norm": 0.22522743040455692, "learning_rate": 0.0004901695899006319, "loss": 3.149538040161133, "step": 4609, "token_acc": 0.28281165967516403 }, { "epoch": 2.7024333040164175, "grad_norm": 0.22455951050007034, "learning_rate": 0.0004901628609234631, "loss": 3.0960569381713867, "step": 4610, "token_acc": 0.28850787022165114 }, { "epoch": 2.7030196423336266, "grad_norm": 0.23532155706212635, "learning_rate": 0.0004901561296902879, "loss": 3.1284103393554688, "step": 4611, "token_acc": 0.284315905425679 }, { "epoch": 2.7036059806508357, "grad_norm": 0.25515369741528093, "learning_rate": 0.0004901493962011694, "loss": 3.1549153327941895, "step": 4612, "token_acc": 0.2805373151299485 }, { "epoch": 2.704192318968045, "grad_norm": 0.24353652049388982, "learning_rate": 0.000490142660456171, "loss": 3.084451675415039, "step": 4613, "token_acc": 0.2913050298023633 }, { "epoch": 2.7047786572852535, "grad_norm": 0.26363219320596737, "learning_rate": 0.0004901359224553559, "loss": 3.133878707885742, "step": 4614, "token_acc": 0.2825645355233198 }, { "epoch": 2.7053649956024626, "grad_norm": 0.25453575566896036, "learning_rate": 0.0004901291821987875, "loss": 3.1414904594421387, "step": 4615, "token_acc": 0.28200538795362584 }, { "epoch": 2.7059513339196717, "grad_norm": 0.24546837521932802, "learning_rate": 0.0004901224396865288, "loss": 3.144693374633789, "step": 4616, "token_acc": 0.2831186992548684 }, { "epoch": 2.706537672236881, "grad_norm": 0.26714083224545776, "learning_rate": 0.0004901156949186434, "loss": 3.184706211090088, "step": 4617, "token_acc": 0.27719609766703857 }, { "epoch": 2.7071240105540895, "grad_norm": 0.2884246925258864, "learning_rate": 0.0004901089478951947, "loss": 3.179894208908081, "step": 4618, "token_acc": 0.2783122233422448 }, { "epoch": 2.7077103488712986, "grad_norm": 0.3870330747852714, "learning_rate": 0.0004901021986162459, "loss": 3.176281452178955, "step": 4619, "token_acc": 0.2770651549532268 }, { "epoch": 2.7082966871885077, "grad_norm": 0.4220802230232499, "learning_rate": 0.0004900954470818606, "loss": 3.1220810413360596, "step": 4620, "token_acc": 0.28444305160894445 }, { "epoch": 2.708883025505717, "grad_norm": 0.28847719772337294, "learning_rate": 0.0004900886932921021, "loss": 3.123826503753662, "step": 4621, "token_acc": 0.2850696876358798 }, { "epoch": 2.709469363822926, "grad_norm": 0.26718665787498486, "learning_rate": 0.0004900819372470336, "loss": 3.14339280128479, "step": 4622, "token_acc": 0.28174138540725313 }, { "epoch": 2.710055702140135, "grad_norm": 0.2683354956939335, "learning_rate": 0.0004900751789467191, "loss": 3.125194549560547, "step": 4623, "token_acc": 0.2853502367418167 }, { "epoch": 2.710642040457344, "grad_norm": 0.24074950324163233, "learning_rate": 0.0004900684183912217, "loss": 3.138235330581665, "step": 4624, "token_acc": 0.28499834360299475 }, { "epoch": 2.711228378774553, "grad_norm": 0.27774818337033985, "learning_rate": 0.000490061655580605, "loss": 3.133533477783203, "step": 4625, "token_acc": 0.28299663211485376 }, { "epoch": 2.711814717091762, "grad_norm": 0.2904127894817428, "learning_rate": 0.0004900548905149325, "loss": 3.175708293914795, "step": 4626, "token_acc": 0.27870902986177276 }, { "epoch": 2.712401055408971, "grad_norm": 0.2859313859396692, "learning_rate": 0.0004900481231942678, "loss": 3.1801204681396484, "step": 4627, "token_acc": 0.2763924753963368 }, { "epoch": 2.71298739372618, "grad_norm": 0.32398416472042446, "learning_rate": 0.0004900413536186746, "loss": 3.144838809967041, "step": 4628, "token_acc": 0.2836153848099409 }, { "epoch": 2.713573732043389, "grad_norm": 0.2796564939939551, "learning_rate": 0.0004900345817882161, "loss": 3.1292924880981445, "step": 4629, "token_acc": 0.28484225219556814 }, { "epoch": 2.714160070360598, "grad_norm": 0.30077931824830456, "learning_rate": 0.0004900278077029563, "loss": 3.1165008544921875, "step": 4630, "token_acc": 0.28470951509606585 }, { "epoch": 2.714746408677807, "grad_norm": 0.2949413818316535, "learning_rate": 0.0004900210313629587, "loss": 3.131425619125366, "step": 4631, "token_acc": 0.2845090283657069 }, { "epoch": 2.715332746995016, "grad_norm": 0.26149296501499775, "learning_rate": 0.0004900142527682869, "loss": 3.1609621047973633, "step": 4632, "token_acc": 0.28026562142212524 }, { "epoch": 2.7159190853122253, "grad_norm": 0.2542989178347821, "learning_rate": 0.0004900074719190045, "loss": 3.1593732833862305, "step": 4633, "token_acc": 0.28187610153353726 }, { "epoch": 2.7165054236294344, "grad_norm": 0.2808530973569041, "learning_rate": 0.0004900006888151755, "loss": 3.152329206466675, "step": 4634, "token_acc": 0.28108684375838994 }, { "epoch": 2.7170917619466435, "grad_norm": 0.2890188446726456, "learning_rate": 0.0004899939034568633, "loss": 3.1970624923706055, "step": 4635, "token_acc": 0.27567696310138873 }, { "epoch": 2.717678100263852, "grad_norm": 0.29683707155963884, "learning_rate": 0.0004899871158441319, "loss": 3.1192421913146973, "step": 4636, "token_acc": 0.2858852418251896 }, { "epoch": 2.7182644385810613, "grad_norm": 0.29741058546887494, "learning_rate": 0.0004899803259770448, "loss": 3.1668338775634766, "step": 4637, "token_acc": 0.27941674165527625 }, { "epoch": 2.7188507768982704, "grad_norm": 0.29610847521435885, "learning_rate": 0.000489973533855666, "loss": 3.113586187362671, "step": 4638, "token_acc": 0.28603497930119165 }, { "epoch": 2.7194371152154795, "grad_norm": 0.3366273867720583, "learning_rate": 0.0004899667394800592, "loss": 3.1647963523864746, "step": 4639, "token_acc": 0.28175258860240826 }, { "epoch": 2.720023453532688, "grad_norm": 0.34250426163279474, "learning_rate": 0.0004899599428502883, "loss": 3.168461322784424, "step": 4640, "token_acc": 0.27693266395642757 }, { "epoch": 2.7206097918498973, "grad_norm": 0.2750664984680904, "learning_rate": 0.000489953143966417, "loss": 3.1305861473083496, "step": 4641, "token_acc": 0.2839380693281164 }, { "epoch": 2.7211961301671064, "grad_norm": 0.2470801226380168, "learning_rate": 0.0004899463428285093, "loss": 3.1503121852874756, "step": 4642, "token_acc": 0.28222943422051605 }, { "epoch": 2.7217824684843155, "grad_norm": 0.34691840004262736, "learning_rate": 0.000489939539436629, "loss": 3.1506032943725586, "step": 4643, "token_acc": 0.28079444004972315 }, { "epoch": 2.7223688068015246, "grad_norm": 0.3516749888249259, "learning_rate": 0.0004899327337908402, "loss": 3.141904354095459, "step": 4644, "token_acc": 0.28399655038547617 }, { "epoch": 2.7229551451187337, "grad_norm": 0.252177892636462, "learning_rate": 0.0004899259258912065, "loss": 3.148179531097412, "step": 4645, "token_acc": 0.28268363014295006 }, { "epoch": 2.7235414834359424, "grad_norm": 0.29009563463127364, "learning_rate": 0.000489919115737792, "loss": 3.148155689239502, "step": 4646, "token_acc": 0.2819458073294867 }, { "epoch": 2.7241278217531515, "grad_norm": 0.3206674979347932, "learning_rate": 0.0004899123033306608, "loss": 3.1484107971191406, "step": 4647, "token_acc": 0.28285912452153233 }, { "epoch": 2.7247141600703606, "grad_norm": 0.23996238680679377, "learning_rate": 0.0004899054886698768, "loss": 3.128343105316162, "step": 4648, "token_acc": 0.28616290379322473 }, { "epoch": 2.7253004983875697, "grad_norm": 0.27142357675306694, "learning_rate": 0.000489898671755504, "loss": 3.0969607830047607, "step": 4649, "token_acc": 0.2903362726950791 }, { "epoch": 2.7258868367047784, "grad_norm": 0.2900547923061283, "learning_rate": 0.0004898918525876064, "loss": 3.091984748840332, "step": 4650, "token_acc": 0.29108085914646786 }, { "epoch": 2.7264731750219875, "grad_norm": 0.22992646377485954, "learning_rate": 0.0004898850311662482, "loss": 3.0915329456329346, "step": 4651, "token_acc": 0.2891348308320256 }, { "epoch": 2.7270595133391966, "grad_norm": 0.23601135815877283, "learning_rate": 0.0004898782074914933, "loss": 3.1307356357574463, "step": 4652, "token_acc": 0.28399524961789835 }, { "epoch": 2.7276458516564057, "grad_norm": 0.23617799372227247, "learning_rate": 0.0004898713815634059, "loss": 3.1133649349212646, "step": 4653, "token_acc": 0.2859813932865183 }, { "epoch": 2.728232189973615, "grad_norm": 0.23585246452634517, "learning_rate": 0.0004898645533820502, "loss": 3.166503429412842, "step": 4654, "token_acc": 0.28026843984071426 }, { "epoch": 2.728818528290824, "grad_norm": 0.21685252388318935, "learning_rate": 0.0004898577229474901, "loss": 3.170147657394409, "step": 4655, "token_acc": 0.27952401306750524 }, { "epoch": 2.729404866608033, "grad_norm": 0.31649556891255637, "learning_rate": 0.0004898508902597899, "loss": 3.083954334259033, "step": 4656, "token_acc": 0.2900733946426995 }, { "epoch": 2.7299912049252417, "grad_norm": 0.3637913677699276, "learning_rate": 0.0004898440553190139, "loss": 3.147120952606201, "step": 4657, "token_acc": 0.28143762682789064 }, { "epoch": 2.730577543242451, "grad_norm": 0.36166290170017024, "learning_rate": 0.0004898372181252261, "loss": 3.122004747390747, "step": 4658, "token_acc": 0.28577327505036637 }, { "epoch": 2.73116388155966, "grad_norm": 0.23316732792858197, "learning_rate": 0.0004898303786784909, "loss": 3.108412504196167, "step": 4659, "token_acc": 0.2881640377889195 }, { "epoch": 2.731750219876869, "grad_norm": 0.29814462268874253, "learning_rate": 0.0004898235369788724, "loss": 3.1333141326904297, "step": 4660, "token_acc": 0.28487127040673144 }, { "epoch": 2.7323365581940777, "grad_norm": 0.2509210591832417, "learning_rate": 0.000489816693026435, "loss": 3.128150463104248, "step": 4661, "token_acc": 0.2850359496323651 }, { "epoch": 2.732922896511287, "grad_norm": 0.23572388201185432, "learning_rate": 0.0004898098468212428, "loss": 3.081077814102173, "step": 4662, "token_acc": 0.29224525070649554 }, { "epoch": 2.733509234828496, "grad_norm": 0.29483905246628833, "learning_rate": 0.0004898029983633604, "loss": 3.1461424827575684, "step": 4663, "token_acc": 0.2812102110250684 }, { "epoch": 2.734095573145705, "grad_norm": 0.20071425180809044, "learning_rate": 0.0004897961476528519, "loss": 3.1102137565612793, "step": 4664, "token_acc": 0.28715215199926686 }, { "epoch": 2.734681911462914, "grad_norm": 0.29503383800872274, "learning_rate": 0.0004897892946897817, "loss": 3.1386547088623047, "step": 4665, "token_acc": 0.2839117938936217 }, { "epoch": 2.7352682497801233, "grad_norm": 0.3034404017055964, "learning_rate": 0.0004897824394742142, "loss": 3.144566297531128, "step": 4666, "token_acc": 0.2834482413410647 }, { "epoch": 2.7358545880973324, "grad_norm": 0.21482594888669135, "learning_rate": 0.0004897755820062139, "loss": 3.1447761058807373, "step": 4667, "token_acc": 0.2832826142228443 }, { "epoch": 2.736440926414541, "grad_norm": 0.2793304598202907, "learning_rate": 0.0004897687222858449, "loss": 3.0738322734832764, "step": 4668, "token_acc": 0.29108797202432213 }, { "epoch": 2.73702726473175, "grad_norm": 0.22536273070786247, "learning_rate": 0.000489761860313172, "loss": 3.1731812953948975, "step": 4669, "token_acc": 0.27899361272854617 }, { "epoch": 2.7376136030489593, "grad_norm": 0.2599278124761073, "learning_rate": 0.0004897549960882594, "loss": 3.139139175415039, "step": 4670, "token_acc": 0.28286789153244957 }, { "epoch": 2.7381999413661684, "grad_norm": 0.22082128100025292, "learning_rate": 0.0004897481296111718, "loss": 3.098876476287842, "step": 4671, "token_acc": 0.28846851062845036 }, { "epoch": 2.738786279683377, "grad_norm": 0.26358662258456256, "learning_rate": 0.0004897412608819736, "loss": 3.1468658447265625, "step": 4672, "token_acc": 0.2808954277120144 }, { "epoch": 2.739372618000586, "grad_norm": 0.29519735422107757, "learning_rate": 0.0004897343899007291, "loss": 3.1617846488952637, "step": 4673, "token_acc": 0.2805192346128357 }, { "epoch": 2.7399589563177953, "grad_norm": 0.24847913735160262, "learning_rate": 0.0004897275166675033, "loss": 3.1215832233428955, "step": 4674, "token_acc": 0.28753905136767166 }, { "epoch": 2.7405452946350044, "grad_norm": 0.3282242401468146, "learning_rate": 0.0004897206411823604, "loss": 3.1645286083221436, "step": 4675, "token_acc": 0.2789327597523269 }, { "epoch": 2.7411316329522135, "grad_norm": 0.28710659048661724, "learning_rate": 0.0004897137634453651, "loss": 3.145056962966919, "step": 4676, "token_acc": 0.2833787614776619 }, { "epoch": 2.7417179712694226, "grad_norm": 0.2508308232827155, "learning_rate": 0.0004897068834565821, "loss": 3.127073287963867, "step": 4677, "token_acc": 0.2846760196702343 }, { "epoch": 2.7423043095866317, "grad_norm": 0.30398951874752256, "learning_rate": 0.0004897000012160759, "loss": 3.1692733764648438, "step": 4678, "token_acc": 0.2789690947614525 }, { "epoch": 2.7428906479038404, "grad_norm": 0.2953313476242557, "learning_rate": 0.0004896931167239111, "loss": 3.1600899696350098, "step": 4679, "token_acc": 0.28037793612554546 }, { "epoch": 2.7434769862210495, "grad_norm": 0.2468342939084421, "learning_rate": 0.0004896862299801526, "loss": 3.1086108684539795, "step": 4680, "token_acc": 0.2871841909532683 }, { "epoch": 2.7440633245382586, "grad_norm": 0.26562090552312306, "learning_rate": 0.0004896793409848648, "loss": 3.18031907081604, "step": 4681, "token_acc": 0.27814727879001344 }, { "epoch": 2.7446496628554677, "grad_norm": 0.23660266954358344, "learning_rate": 0.0004896724497381127, "loss": 3.1466221809387207, "step": 4682, "token_acc": 0.2816064040061401 }, { "epoch": 2.7452360011726764, "grad_norm": 0.23469188574664585, "learning_rate": 0.0004896655562399608, "loss": 3.0906708240509033, "step": 4683, "token_acc": 0.2874018080965005 }, { "epoch": 2.7458223394898855, "grad_norm": 0.23105903296739963, "learning_rate": 0.000489658660490474, "loss": 3.108341693878174, "step": 4684, "token_acc": 0.28833244089461924 }, { "epoch": 2.7464086778070946, "grad_norm": 0.23723851449580355, "learning_rate": 0.000489651762489717, "loss": 3.1508374214172363, "step": 4685, "token_acc": 0.2804935379411232 }, { "epoch": 2.7469950161243037, "grad_norm": 0.25478314914994793, "learning_rate": 0.0004896448622377546, "loss": 3.132269859313965, "step": 4686, "token_acc": 0.28439411159820316 }, { "epoch": 2.747581354441513, "grad_norm": 0.2585053572222541, "learning_rate": 0.0004896379597346517, "loss": 3.0948827266693115, "step": 4687, "token_acc": 0.2917751699849616 }, { "epoch": 2.748167692758722, "grad_norm": 0.24682417857298933, "learning_rate": 0.000489631054980473, "loss": 3.1234304904937744, "step": 4688, "token_acc": 0.28512647756060777 }, { "epoch": 2.748754031075931, "grad_norm": 0.24527136084390572, "learning_rate": 0.0004896241479752835, "loss": 3.154219627380371, "step": 4689, "token_acc": 0.28161860729631816 }, { "epoch": 2.7493403693931397, "grad_norm": 0.23419750139671655, "learning_rate": 0.0004896172387191481, "loss": 3.1275951862335205, "step": 4690, "token_acc": 0.2865577030638245 }, { "epoch": 2.749926707710349, "grad_norm": 0.23597819088616398, "learning_rate": 0.0004896103272121315, "loss": 3.116631031036377, "step": 4691, "token_acc": 0.28638339149166675 }, { "epoch": 2.750513046027558, "grad_norm": 0.25151531147525114, "learning_rate": 0.0004896034134542989, "loss": 3.1337738037109375, "step": 4692, "token_acc": 0.2840357459510196 }, { "epoch": 2.751099384344767, "grad_norm": 0.2735195578784183, "learning_rate": 0.000489596497445715, "loss": 3.167222499847412, "step": 4693, "token_acc": 0.27862573515284444 }, { "epoch": 2.7516857226619758, "grad_norm": 0.3585359974120776, "learning_rate": 0.0004895895791864449, "loss": 3.1827821731567383, "step": 4694, "token_acc": 0.27783848165505814 }, { "epoch": 2.752272060979185, "grad_norm": 0.5008751756653854, "learning_rate": 0.0004895826586765535, "loss": 3.138991594314575, "step": 4695, "token_acc": 0.28352407574814187 }, { "epoch": 2.752858399296394, "grad_norm": 0.3484440637997365, "learning_rate": 0.000489575735916106, "loss": 3.103484630584717, "step": 4696, "token_acc": 0.28816345524024367 }, { "epoch": 2.753444737613603, "grad_norm": 0.27816780930070484, "learning_rate": 0.0004895688109051671, "loss": 3.1396255493164062, "step": 4697, "token_acc": 0.28212498196628527 }, { "epoch": 2.754031075930812, "grad_norm": 0.3227452600003064, "learning_rate": 0.0004895618836438022, "loss": 3.185189962387085, "step": 4698, "token_acc": 0.2767657440876856 }, { "epoch": 2.7546174142480213, "grad_norm": 0.31382178378118714, "learning_rate": 0.0004895549541320762, "loss": 3.1338088512420654, "step": 4699, "token_acc": 0.2838295169941415 }, { "epoch": 2.75520375256523, "grad_norm": 0.23049711230321435, "learning_rate": 0.0004895480223700542, "loss": 3.142578601837158, "step": 4700, "token_acc": 0.2820820480558487 }, { "epoch": 2.755790090882439, "grad_norm": 0.31489964839209, "learning_rate": 0.0004895410883578012, "loss": 3.1371493339538574, "step": 4701, "token_acc": 0.2836753013593229 }, { "epoch": 2.756376429199648, "grad_norm": 0.2686036582480531, "learning_rate": 0.0004895341520953826, "loss": 3.1444923877716064, "step": 4702, "token_acc": 0.2824188603446453 }, { "epoch": 2.7569627675168573, "grad_norm": 0.2981379247343524, "learning_rate": 0.0004895272135828634, "loss": 3.1441211700439453, "step": 4703, "token_acc": 0.2825166085996841 }, { "epoch": 2.757549105834066, "grad_norm": 0.2601346819378336, "learning_rate": 0.0004895202728203088, "loss": 3.164188861846924, "step": 4704, "token_acc": 0.27944430027902695 }, { "epoch": 2.758135444151275, "grad_norm": 0.29451487480838134, "learning_rate": 0.000489513329807784, "loss": 3.1282501220703125, "step": 4705, "token_acc": 0.2865070585756426 }, { "epoch": 2.758721782468484, "grad_norm": 0.21241281390617445, "learning_rate": 0.0004895063845453541, "loss": 3.1644952297210693, "step": 4706, "token_acc": 0.2815866784284742 }, { "epoch": 2.7593081207856933, "grad_norm": 0.31226325523463877, "learning_rate": 0.0004894994370330845, "loss": 3.1245813369750977, "step": 4707, "token_acc": 0.28736516274270707 }, { "epoch": 2.7598944591029024, "grad_norm": 0.23877780682356678, "learning_rate": 0.0004894924872710406, "loss": 3.114713191986084, "step": 4708, "token_acc": 0.2862852482568282 }, { "epoch": 2.7604807974201115, "grad_norm": 0.2979531407373359, "learning_rate": 0.0004894855352592873, "loss": 3.1398301124572754, "step": 4709, "token_acc": 0.28429895651808773 }, { "epoch": 2.7610671357373207, "grad_norm": 0.27890177073715405, "learning_rate": 0.0004894785809978902, "loss": 3.1423983573913574, "step": 4710, "token_acc": 0.28212466796917873 }, { "epoch": 2.7616534740545293, "grad_norm": 0.28840477872329, "learning_rate": 0.0004894716244869144, "loss": 3.1969165802001953, "step": 4711, "token_acc": 0.2763451022750287 }, { "epoch": 2.7622398123717384, "grad_norm": 0.30006077725123764, "learning_rate": 0.0004894646657264256, "loss": 3.1501541137695312, "step": 4712, "token_acc": 0.28159570310037446 }, { "epoch": 2.7628261506889475, "grad_norm": 0.2574056239584595, "learning_rate": 0.0004894577047164887, "loss": 3.1437087059020996, "step": 4713, "token_acc": 0.2819671620966257 }, { "epoch": 2.7634124890061567, "grad_norm": 0.32790668941460493, "learning_rate": 0.0004894507414571695, "loss": 3.1586780548095703, "step": 4714, "token_acc": 0.2810597499414368 }, { "epoch": 2.7639988273233653, "grad_norm": 0.3137578036505134, "learning_rate": 0.0004894437759485332, "loss": 3.1431007385253906, "step": 4715, "token_acc": 0.281324909056098 }, { "epoch": 2.7645851656405744, "grad_norm": 0.273846201435247, "learning_rate": 0.0004894368081906454, "loss": 3.1213245391845703, "step": 4716, "token_acc": 0.285061088095464 }, { "epoch": 2.7651715039577835, "grad_norm": 0.2501106641797382, "learning_rate": 0.0004894298381835713, "loss": 3.1490840911865234, "step": 4717, "token_acc": 0.2831556850515342 }, { "epoch": 2.7657578422749927, "grad_norm": 0.22216573064279863, "learning_rate": 0.0004894228659273765, "loss": 3.126720905303955, "step": 4718, "token_acc": 0.2867807137919293 }, { "epoch": 2.7663441805922018, "grad_norm": 0.28318810862495697, "learning_rate": 0.0004894158914221265, "loss": 3.1314051151275635, "step": 4719, "token_acc": 0.28491857545991867 }, { "epoch": 2.766930518909411, "grad_norm": 0.2560482405355049, "learning_rate": 0.0004894089146678869, "loss": 3.1204075813293457, "step": 4720, "token_acc": 0.2870677545652889 }, { "epoch": 2.76751685722662, "grad_norm": 0.22524016270833025, "learning_rate": 0.0004894019356647231, "loss": 3.071620225906372, "step": 4721, "token_acc": 0.29332311230356456 }, { "epoch": 2.7681031955438287, "grad_norm": 0.272433271011628, "learning_rate": 0.0004893949544127008, "loss": 3.117757797241211, "step": 4722, "token_acc": 0.28637140867855754 }, { "epoch": 2.7686895338610378, "grad_norm": 0.3305912445740114, "learning_rate": 0.0004893879709118853, "loss": 3.1081907749176025, "step": 4723, "token_acc": 0.28894129328534196 }, { "epoch": 2.769275872178247, "grad_norm": 0.266289648154714, "learning_rate": 0.0004893809851623425, "loss": 3.1260695457458496, "step": 4724, "token_acc": 0.2857022663932271 }, { "epoch": 2.769862210495456, "grad_norm": 0.25903067837290356, "learning_rate": 0.0004893739971641379, "loss": 3.134542465209961, "step": 4725, "token_acc": 0.2837237881546508 }, { "epoch": 2.7704485488126647, "grad_norm": 0.31142384520845673, "learning_rate": 0.0004893670069173371, "loss": 3.160855531692505, "step": 4726, "token_acc": 0.2806040532494046 }, { "epoch": 2.771034887129874, "grad_norm": 0.24921372353066618, "learning_rate": 0.0004893600144220059, "loss": 3.1608972549438477, "step": 4727, "token_acc": 0.28055226393912935 }, { "epoch": 2.771621225447083, "grad_norm": 0.3406261263035801, "learning_rate": 0.00048935301967821, "loss": 3.1286606788635254, "step": 4728, "token_acc": 0.28479483816761414 }, { "epoch": 2.772207563764292, "grad_norm": 0.243917088945951, "learning_rate": 0.0004893460226860149, "loss": 3.1958253383636475, "step": 4729, "token_acc": 0.276421516586247 }, { "epoch": 2.772793902081501, "grad_norm": 0.33389709848379723, "learning_rate": 0.0004893390234454864, "loss": 3.1602582931518555, "step": 4730, "token_acc": 0.2792872350414012 }, { "epoch": 2.7733802403987102, "grad_norm": 0.31752815670651213, "learning_rate": 0.0004893320219566904, "loss": 3.095024347305298, "step": 4731, "token_acc": 0.2899858897742364 }, { "epoch": 2.7739665787159193, "grad_norm": 0.2852714205903597, "learning_rate": 0.0004893250182196924, "loss": 3.1193761825561523, "step": 4732, "token_acc": 0.2867515307852735 }, { "epoch": 2.774552917033128, "grad_norm": 0.29605496102111417, "learning_rate": 0.0004893180122345585, "loss": 3.1618452072143555, "step": 4733, "token_acc": 0.27959368970604925 }, { "epoch": 2.775139255350337, "grad_norm": 0.22579600572916014, "learning_rate": 0.0004893110040013543, "loss": 3.1116764545440674, "step": 4734, "token_acc": 0.28650340451482514 }, { "epoch": 2.7757255936675462, "grad_norm": 0.2535650210233001, "learning_rate": 0.0004893039935201458, "loss": 3.1361196041107178, "step": 4735, "token_acc": 0.2833049839917056 }, { "epoch": 2.7763119319847553, "grad_norm": 0.2198855230716398, "learning_rate": 0.0004892969807909986, "loss": 3.13472318649292, "step": 4736, "token_acc": 0.2840570958143328 }, { "epoch": 2.776898270301964, "grad_norm": 0.3185649597506807, "learning_rate": 0.0004892899658139788, "loss": 3.137021064758301, "step": 4737, "token_acc": 0.28396280218020264 }, { "epoch": 2.777484608619173, "grad_norm": 0.2423267319933476, "learning_rate": 0.0004892829485891522, "loss": 3.1698858737945557, "step": 4738, "token_acc": 0.27865405681930794 }, { "epoch": 2.7780709469363822, "grad_norm": 0.2486845724927719, "learning_rate": 0.0004892759291165847, "loss": 3.0912961959838867, "step": 4739, "token_acc": 0.2905722047824101 }, { "epoch": 2.7786572852535913, "grad_norm": 0.2258235165814888, "learning_rate": 0.0004892689073963423, "loss": 3.1047024726867676, "step": 4740, "token_acc": 0.2876001199057563 }, { "epoch": 2.7792436235708005, "grad_norm": 0.21219839238692192, "learning_rate": 0.0004892618834284909, "loss": 3.172163486480713, "step": 4741, "token_acc": 0.27929837076824626 }, { "epoch": 2.7798299618880096, "grad_norm": 0.24686201524315263, "learning_rate": 0.0004892548572130966, "loss": 3.1514899730682373, "step": 4742, "token_acc": 0.2816319848661234 }, { "epoch": 2.7804163002052187, "grad_norm": 0.22372404989621758, "learning_rate": 0.0004892478287502252, "loss": 3.1281960010528564, "step": 4743, "token_acc": 0.28440663419879847 }, { "epoch": 2.7810026385224274, "grad_norm": 0.2470546516877714, "learning_rate": 0.0004892407980399429, "loss": 3.128087043762207, "step": 4744, "token_acc": 0.283678534526177 }, { "epoch": 2.7815889768396365, "grad_norm": 0.23988344132263434, "learning_rate": 0.0004892337650823157, "loss": 3.113142490386963, "step": 4745, "token_acc": 0.2880073158217938 }, { "epoch": 2.7821753151568456, "grad_norm": 0.2126166486788284, "learning_rate": 0.0004892267298774096, "loss": 3.162776470184326, "step": 4746, "token_acc": 0.2809240175222968 }, { "epoch": 2.7827616534740547, "grad_norm": 0.24419246115066431, "learning_rate": 0.0004892196924252908, "loss": 3.1414027214050293, "step": 4747, "token_acc": 0.28242271641925826 }, { "epoch": 2.7833479917912634, "grad_norm": 0.29457160394824694, "learning_rate": 0.0004892126527260253, "loss": 3.1456260681152344, "step": 4748, "token_acc": 0.28155473506156736 }, { "epoch": 2.7839343301084725, "grad_norm": 0.3212524312582994, "learning_rate": 0.0004892056107796793, "loss": 3.147282600402832, "step": 4749, "token_acc": 0.280877109987605 }, { "epoch": 2.7845206684256816, "grad_norm": 0.3658091207040734, "learning_rate": 0.0004891985665863189, "loss": 3.112868547439575, "step": 4750, "token_acc": 0.28801362425244065 }, { "epoch": 2.7851070067428907, "grad_norm": 0.23358527190445688, "learning_rate": 0.0004891915201460103, "loss": 3.1768479347229004, "step": 4751, "token_acc": 0.2800974847646615 }, { "epoch": 2.7856933450601, "grad_norm": 0.40776014724302473, "learning_rate": 0.0004891844714588196, "loss": 3.1268935203552246, "step": 4752, "token_acc": 0.285677277041065 }, { "epoch": 2.786279683377309, "grad_norm": 0.33237398735911294, "learning_rate": 0.0004891774205248133, "loss": 3.120698928833008, "step": 4753, "token_acc": 0.2839557856669096 }, { "epoch": 2.7868660216945176, "grad_norm": 0.3027464081842109, "learning_rate": 0.0004891703673440572, "loss": 3.119347095489502, "step": 4754, "token_acc": 0.28429170467699527 }, { "epoch": 2.7874523600117267, "grad_norm": 0.3535387774499992, "learning_rate": 0.0004891633119166179, "loss": 3.1623423099517822, "step": 4755, "token_acc": 0.27947276863374815 }, { "epoch": 2.788038698328936, "grad_norm": 0.2852322447383937, "learning_rate": 0.0004891562542425615, "loss": 3.197279691696167, "step": 4756, "token_acc": 0.2746762601012241 }, { "epoch": 2.788625036646145, "grad_norm": 0.3025865257725178, "learning_rate": 0.0004891491943219545, "loss": 3.1487555503845215, "step": 4757, "token_acc": 0.2813431230165134 }, { "epoch": 2.7892113749633536, "grad_norm": 0.26241815870430996, "learning_rate": 0.0004891421321548629, "loss": 3.1276917457580566, "step": 4758, "token_acc": 0.28311717937883496 }, { "epoch": 2.7897977132805627, "grad_norm": 0.2570204720041931, "learning_rate": 0.0004891350677413534, "loss": 3.1447300910949707, "step": 4759, "token_acc": 0.2803467297911538 }, { "epoch": 2.790384051597772, "grad_norm": 0.2715480239119731, "learning_rate": 0.000489128001081492, "loss": 3.118472099304199, "step": 4760, "token_acc": 0.286356869080427 }, { "epoch": 2.790970389914981, "grad_norm": 0.26125443731071396, "learning_rate": 0.0004891209321753454, "loss": 3.0976710319519043, "step": 4761, "token_acc": 0.28823498585053975 }, { "epoch": 2.79155672823219, "grad_norm": 0.2540348084330082, "learning_rate": 0.0004891138610229797, "loss": 3.1242332458496094, "step": 4762, "token_acc": 0.2863199968650809 }, { "epoch": 2.792143066549399, "grad_norm": 0.2966605158790345, "learning_rate": 0.0004891067876244616, "loss": 3.1267249584198, "step": 4763, "token_acc": 0.285043304990485 }, { "epoch": 2.7927294048666083, "grad_norm": 0.26760063200786866, "learning_rate": 0.0004890997119798574, "loss": 3.11531138420105, "step": 4764, "token_acc": 0.2867565740968297 }, { "epoch": 2.793315743183817, "grad_norm": 0.2755861163539987, "learning_rate": 0.0004890926340892337, "loss": 3.1764094829559326, "step": 4765, "token_acc": 0.27950152121059135 }, { "epoch": 2.793902081501026, "grad_norm": 0.2501730141152054, "learning_rate": 0.0004890855539526567, "loss": 3.1275932788848877, "step": 4766, "token_acc": 0.2851521061326907 }, { "epoch": 2.794488419818235, "grad_norm": 0.21243290555023056, "learning_rate": 0.0004890784715701933, "loss": 3.1366515159606934, "step": 4767, "token_acc": 0.28415683890261617 }, { "epoch": 2.7950747581354443, "grad_norm": 0.24661673490560215, "learning_rate": 0.0004890713869419097, "loss": 3.15203857421875, "step": 4768, "token_acc": 0.2808588189566189 }, { "epoch": 2.795661096452653, "grad_norm": 0.2272899537043174, "learning_rate": 0.0004890643000678725, "loss": 3.1364526748657227, "step": 4769, "token_acc": 0.28260287774314413 }, { "epoch": 2.796247434769862, "grad_norm": 0.2599063054148905, "learning_rate": 0.0004890572109481485, "loss": 3.1675479412078857, "step": 4770, "token_acc": 0.2796374963048929 }, { "epoch": 2.796833773087071, "grad_norm": 0.2830031105603673, "learning_rate": 0.0004890501195828039, "loss": 3.178873062133789, "step": 4771, "token_acc": 0.27720935625054344 }, { "epoch": 2.7974201114042803, "grad_norm": 0.2432977124445569, "learning_rate": 0.0004890430259719058, "loss": 3.15281343460083, "step": 4772, "token_acc": 0.2817670969898106 }, { "epoch": 2.7980064497214894, "grad_norm": 0.2681937681555498, "learning_rate": 0.0004890359301155205, "loss": 3.1359658241271973, "step": 4773, "token_acc": 0.2841669557664145 }, { "epoch": 2.7985927880386985, "grad_norm": 0.2744038421293905, "learning_rate": 0.0004890288320137147, "loss": 3.155168056488037, "step": 4774, "token_acc": 0.2806380692966581 }, { "epoch": 2.7991791263559076, "grad_norm": 0.2725403939948629, "learning_rate": 0.0004890217316665552, "loss": 3.1517701148986816, "step": 4775, "token_acc": 0.28120835642390424 }, { "epoch": 2.7997654646731163, "grad_norm": 0.21723623310345797, "learning_rate": 0.0004890146290741086, "loss": 3.130082368850708, "step": 4776, "token_acc": 0.2843410521252288 }, { "epoch": 2.8003518029903254, "grad_norm": 0.20437789964757797, "learning_rate": 0.0004890075242364415, "loss": 3.1507952213287354, "step": 4777, "token_acc": 0.2810131615357543 }, { "epoch": 2.8009381413075345, "grad_norm": 0.26381051439657055, "learning_rate": 0.000489000417153621, "loss": 3.163231134414673, "step": 4778, "token_acc": 0.2783015543832803 }, { "epoch": 2.8015244796247436, "grad_norm": 0.2637321701196472, "learning_rate": 0.0004889933078257134, "loss": 3.1180357933044434, "step": 4779, "token_acc": 0.28675394850656216 }, { "epoch": 2.8021108179419523, "grad_norm": 0.22620770781136632, "learning_rate": 0.000488986196252786, "loss": 3.1661171913146973, "step": 4780, "token_acc": 0.2811723376387216 }, { "epoch": 2.8026971562591614, "grad_norm": 0.24031152012616444, "learning_rate": 0.0004889790824349051, "loss": 3.1385445594787598, "step": 4781, "token_acc": 0.2838351694462471 }, { "epoch": 2.8032834945763705, "grad_norm": 0.32767669290900836, "learning_rate": 0.0004889719663721378, "loss": 3.116870880126953, "step": 4782, "token_acc": 0.28447842034541976 }, { "epoch": 2.8038698328935796, "grad_norm": 0.4466538165430876, "learning_rate": 0.000488964848064551, "loss": 3.1482481956481934, "step": 4783, "token_acc": 0.28074214732178177 }, { "epoch": 2.8044561712107887, "grad_norm": 0.3965562260027191, "learning_rate": 0.0004889577275122113, "loss": 3.1394331455230713, "step": 4784, "token_acc": 0.2827249396814971 }, { "epoch": 2.805042509527998, "grad_norm": 0.26538141496841916, "learning_rate": 0.0004889506047151858, "loss": 3.1387925148010254, "step": 4785, "token_acc": 0.28311314216942346 }, { "epoch": 2.805628847845207, "grad_norm": 0.3218794602907884, "learning_rate": 0.0004889434796735415, "loss": 3.11043381690979, "step": 4786, "token_acc": 0.2868546126660438 }, { "epoch": 2.8062151861624156, "grad_norm": 0.2788341858801879, "learning_rate": 0.000488936352387345, "loss": 3.107247829437256, "step": 4787, "token_acc": 0.28618070798266015 }, { "epoch": 2.8068015244796247, "grad_norm": 0.2812183411656589, "learning_rate": 0.0004889292228566635, "loss": 3.1420722007751465, "step": 4788, "token_acc": 0.28230795038740186 }, { "epoch": 2.807387862796834, "grad_norm": 0.2996716694326207, "learning_rate": 0.000488922091081564, "loss": 3.142725944519043, "step": 4789, "token_acc": 0.2827711701391617 }, { "epoch": 2.807974201114043, "grad_norm": 0.2803489956355042, "learning_rate": 0.0004889149570621133, "loss": 3.1395883560180664, "step": 4790, "token_acc": 0.28265799916539014 }, { "epoch": 2.8085605394312516, "grad_norm": 0.29353423634237885, "learning_rate": 0.0004889078207983785, "loss": 3.131152629852295, "step": 4791, "token_acc": 0.28405726715198737 }, { "epoch": 2.8091468777484607, "grad_norm": 0.24898360870191433, "learning_rate": 0.0004889006822904268, "loss": 3.1402578353881836, "step": 4792, "token_acc": 0.2809907985973793 }, { "epoch": 2.80973321606567, "grad_norm": 0.2201221802705435, "learning_rate": 0.000488893541538325, "loss": 3.1404025554656982, "step": 4793, "token_acc": 0.28434244397640435 }, { "epoch": 2.810319554382879, "grad_norm": 0.26767496824734044, "learning_rate": 0.0004888863985421403, "loss": 3.138634204864502, "step": 4794, "token_acc": 0.282695086246518 }, { "epoch": 2.810905892700088, "grad_norm": 0.21128748436489886, "learning_rate": 0.0004888792533019398, "loss": 3.1629817485809326, "step": 4795, "token_acc": 0.2790732630688607 }, { "epoch": 2.811492231017297, "grad_norm": 0.2497509363202678, "learning_rate": 0.0004888721058177905, "loss": 3.114626884460449, "step": 4796, "token_acc": 0.2859989712918042 }, { "epoch": 2.8120785693345063, "grad_norm": 0.3002243938230459, "learning_rate": 0.0004888649560897599, "loss": 3.0750510692596436, "step": 4797, "token_acc": 0.2924205562805785 }, { "epoch": 2.812664907651715, "grad_norm": 0.26098159775598884, "learning_rate": 0.0004888578041179147, "loss": 3.166616916656494, "step": 4798, "token_acc": 0.2789171986134225 }, { "epoch": 2.813251245968924, "grad_norm": 0.221920530249086, "learning_rate": 0.0004888506499023224, "loss": 3.1472902297973633, "step": 4799, "token_acc": 0.28272709234078996 }, { "epoch": 2.813837584286133, "grad_norm": 0.2930919568299991, "learning_rate": 0.00048884349344305, "loss": 3.136810302734375, "step": 4800, "token_acc": 0.28461438503021247 }, { "epoch": 2.8144239226033423, "grad_norm": 0.29130884409643953, "learning_rate": 0.0004888363347401649, "loss": 3.1434431076049805, "step": 4801, "token_acc": 0.28241469949846437 }, { "epoch": 2.815010260920551, "grad_norm": 0.2553519462451598, "learning_rate": 0.0004888291737937343, "loss": 3.117910861968994, "step": 4802, "token_acc": 0.28457531540716197 }, { "epoch": 2.81559659923776, "grad_norm": 0.27663672539583145, "learning_rate": 0.0004888220106038254, "loss": 3.13838529586792, "step": 4803, "token_acc": 0.2851304366039356 }, { "epoch": 2.816182937554969, "grad_norm": 0.2830561135618201, "learning_rate": 0.0004888148451705055, "loss": 3.092414617538452, "step": 4804, "token_acc": 0.28795570512387064 }, { "epoch": 2.8167692758721783, "grad_norm": 0.259522735853877, "learning_rate": 0.000488807677493842, "loss": 3.1096653938293457, "step": 4805, "token_acc": 0.28804866907520454 }, { "epoch": 2.8173556141893874, "grad_norm": 0.2950226067418885, "learning_rate": 0.0004888005075739021, "loss": 3.1235463619232178, "step": 4806, "token_acc": 0.2838370191649467 }, { "epoch": 2.8179419525065965, "grad_norm": 0.2906369915166881, "learning_rate": 0.0004887933354107532, "loss": 3.1253035068511963, "step": 4807, "token_acc": 0.2860261006594012 }, { "epoch": 2.818528290823805, "grad_norm": 0.2727124794917412, "learning_rate": 0.0004887861610044628, "loss": 3.177701234817505, "step": 4808, "token_acc": 0.27767760451236523 }, { "epoch": 2.8191146291410143, "grad_norm": 0.2405233197068983, "learning_rate": 0.0004887789843550981, "loss": 3.103774070739746, "step": 4809, "token_acc": 0.2872932277019881 }, { "epoch": 2.8197009674582234, "grad_norm": 0.2369185188193357, "learning_rate": 0.0004887718054627266, "loss": 3.108470916748047, "step": 4810, "token_acc": 0.2888774175966711 }, { "epoch": 2.8202873057754325, "grad_norm": 0.21363715161438507, "learning_rate": 0.0004887646243274158, "loss": 3.1599059104919434, "step": 4811, "token_acc": 0.2792772883835767 }, { "epoch": 2.820873644092641, "grad_norm": 0.21772661749902666, "learning_rate": 0.000488757440949233, "loss": 3.1554532051086426, "step": 4812, "token_acc": 0.28124307146552363 }, { "epoch": 2.8214599824098503, "grad_norm": 0.3022079624029741, "learning_rate": 0.0004887502553282459, "loss": 3.1413803100585938, "step": 4813, "token_acc": 0.28335416300856253 }, { "epoch": 2.8220463207270594, "grad_norm": 0.36893414015354886, "learning_rate": 0.0004887430674645218, "loss": 3.139862537384033, "step": 4814, "token_acc": 0.28398390434700305 }, { "epoch": 2.8226326590442685, "grad_norm": 0.2524035940052976, "learning_rate": 0.0004887358773581283, "loss": 3.1565520763397217, "step": 4815, "token_acc": 0.28279112691514574 }, { "epoch": 2.8232189973614776, "grad_norm": 0.2266042514209356, "learning_rate": 0.0004887286850091329, "loss": 3.140373945236206, "step": 4816, "token_acc": 0.28330737540192924 }, { "epoch": 2.8238053356786867, "grad_norm": 0.2962740093235684, "learning_rate": 0.0004887214904176032, "loss": 3.134408950805664, "step": 4817, "token_acc": 0.28433784337843376 }, { "epoch": 2.824391673995896, "grad_norm": 0.22089683755830478, "learning_rate": 0.0004887142935836069, "loss": 3.138504981994629, "step": 4818, "token_acc": 0.2825005670160189 }, { "epoch": 2.8249780123131045, "grad_norm": 0.2637003735275535, "learning_rate": 0.0004887070945072113, "loss": 3.1459946632385254, "step": 4819, "token_acc": 0.28209072359127296 }, { "epoch": 2.8255643506303136, "grad_norm": 0.24770239416044743, "learning_rate": 0.0004886998931884843, "loss": 3.13864803314209, "step": 4820, "token_acc": 0.2836980760255147 }, { "epoch": 2.8261506889475227, "grad_norm": 0.23454315326146682, "learning_rate": 0.0004886926896274934, "loss": 3.113605499267578, "step": 4821, "token_acc": 0.28490206600482965 }, { "epoch": 2.826737027264732, "grad_norm": 0.2651431563404158, "learning_rate": 0.0004886854838243065, "loss": 3.0919008255004883, "step": 4822, "token_acc": 0.2896715411902811 }, { "epoch": 2.8273233655819405, "grad_norm": 0.24038376264708838, "learning_rate": 0.0004886782757789909, "loss": 3.102668285369873, "step": 4823, "token_acc": 0.28718365066017865 }, { "epoch": 2.8279097038991496, "grad_norm": 0.24144183990414794, "learning_rate": 0.0004886710654916146, "loss": 3.1367955207824707, "step": 4824, "token_acc": 0.2845948606362035 }, { "epoch": 2.8284960422163588, "grad_norm": 0.2755659530292548, "learning_rate": 0.0004886638529622453, "loss": 3.163353204727173, "step": 4825, "token_acc": 0.27903680057562147 }, { "epoch": 2.829082380533568, "grad_norm": 0.25532211776843117, "learning_rate": 0.0004886566381909507, "loss": 3.1461715698242188, "step": 4826, "token_acc": 0.28233120901450076 }, { "epoch": 2.829668718850777, "grad_norm": 0.23827733094897113, "learning_rate": 0.0004886494211777984, "loss": 3.1009912490844727, "step": 4827, "token_acc": 0.28933638183225835 }, { "epoch": 2.830255057167986, "grad_norm": 0.2877732760477564, "learning_rate": 0.0004886422019228565, "loss": 3.1174845695495605, "step": 4828, "token_acc": 0.28412332439678284 }, { "epoch": 2.830841395485195, "grad_norm": 0.31576323982471777, "learning_rate": 0.0004886349804261928, "loss": 3.1727046966552734, "step": 4829, "token_acc": 0.27816600798719143 }, { "epoch": 2.831427733802404, "grad_norm": 0.2745040491637979, "learning_rate": 0.000488627756687875, "loss": 3.0889248847961426, "step": 4830, "token_acc": 0.29038323758386403 }, { "epoch": 2.832014072119613, "grad_norm": 0.2527200821652121, "learning_rate": 0.0004886205307079708, "loss": 3.1660141944885254, "step": 4831, "token_acc": 0.2809941638699069 }, { "epoch": 2.832600410436822, "grad_norm": 0.24994001644372724, "learning_rate": 0.0004886133024865483, "loss": 3.097322702407837, "step": 4832, "token_acc": 0.2899228869719839 }, { "epoch": 2.833186748754031, "grad_norm": 0.2772194207028575, "learning_rate": 0.0004886060720236755, "loss": 3.113114595413208, "step": 4833, "token_acc": 0.287156201779759 }, { "epoch": 2.83377308707124, "grad_norm": 0.3463963017644551, "learning_rate": 0.0004885988393194201, "loss": 3.1332573890686035, "step": 4834, "token_acc": 0.28390824478147597 }, { "epoch": 2.834359425388449, "grad_norm": 0.3308777573161761, "learning_rate": 0.00048859160437385, "loss": 3.1541991233825684, "step": 4835, "token_acc": 0.2819822501912031 }, { "epoch": 2.834945763705658, "grad_norm": 0.27038624227132807, "learning_rate": 0.0004885843671870335, "loss": 3.08933687210083, "step": 4836, "token_acc": 0.2914752679145297 }, { "epoch": 2.835532102022867, "grad_norm": 0.28604992831551707, "learning_rate": 0.0004885771277590383, "loss": 3.156747817993164, "step": 4837, "token_acc": 0.28124876517029457 }, { "epoch": 2.8361184403400763, "grad_norm": 0.30495858998986786, "learning_rate": 0.0004885698860899324, "loss": 3.1565260887145996, "step": 4838, "token_acc": 0.2806578504697083 }, { "epoch": 2.8367047786572854, "grad_norm": 0.2666780861802044, "learning_rate": 0.000488562642179784, "loss": 3.1229500770568848, "step": 4839, "token_acc": 0.2853679921605034 }, { "epoch": 2.8372911169744945, "grad_norm": 0.27107225996601786, "learning_rate": 0.0004885553960286609, "loss": 3.1419596672058105, "step": 4840, "token_acc": 0.2810470033896171 }, { "epoch": 2.837877455291703, "grad_norm": 0.34543296921476974, "learning_rate": 0.0004885481476366314, "loss": 3.133160352706909, "step": 4841, "token_acc": 0.28292354944876696 }, { "epoch": 2.8384637936089123, "grad_norm": 0.25788836693040357, "learning_rate": 0.0004885408970037636, "loss": 3.1340599060058594, "step": 4842, "token_acc": 0.28384641269323324 }, { "epoch": 2.8390501319261214, "grad_norm": 0.3177809911161217, "learning_rate": 0.0004885336441301253, "loss": 3.1265523433685303, "step": 4843, "token_acc": 0.28330160714697467 }, { "epoch": 2.8396364702433305, "grad_norm": 0.25404191643852275, "learning_rate": 0.000488526389015785, "loss": 3.0653738975524902, "step": 4844, "token_acc": 0.2939909337612625 }, { "epoch": 2.840222808560539, "grad_norm": 0.3173571984748071, "learning_rate": 0.0004885191316608106, "loss": 3.137275457382202, "step": 4845, "token_acc": 0.2822794376726634 }, { "epoch": 2.8408091468777483, "grad_norm": 0.30475774595429855, "learning_rate": 0.0004885118720652704, "loss": 3.1285552978515625, "step": 4846, "token_acc": 0.28453220075093605 }, { "epoch": 2.8413954851949574, "grad_norm": 0.2824073090441585, "learning_rate": 0.0004885046102292327, "loss": 3.094118595123291, "step": 4847, "token_acc": 0.29059716111378864 }, { "epoch": 2.8419818235121665, "grad_norm": 0.2827726405057913, "learning_rate": 0.0004884973461527654, "loss": 3.171477794647217, "step": 4848, "token_acc": 0.278738587223939 }, { "epoch": 2.8425681618293757, "grad_norm": 0.27057062522085346, "learning_rate": 0.000488490079835937, "loss": 3.169370412826538, "step": 4849, "token_acc": 0.2782568996184032 }, { "epoch": 2.8431545001465848, "grad_norm": 0.29739606626571946, "learning_rate": 0.0004884828112788155, "loss": 3.1766932010650635, "step": 4850, "token_acc": 0.27792062306625415 }, { "epoch": 2.843740838463794, "grad_norm": 0.27094101805440496, "learning_rate": 0.0004884755404814695, "loss": 3.1282691955566406, "step": 4851, "token_acc": 0.28499901107862874 }, { "epoch": 2.8443271767810026, "grad_norm": 0.2721023355385571, "learning_rate": 0.0004884682674439672, "loss": 3.154409885406494, "step": 4852, "token_acc": 0.28127260874567 }, { "epoch": 2.8449135150982117, "grad_norm": 0.27513560094676087, "learning_rate": 0.0004884609921663767, "loss": 3.104313850402832, "step": 4853, "token_acc": 0.28854601863651275 }, { "epoch": 2.8454998534154208, "grad_norm": 0.23135231606183784, "learning_rate": 0.0004884537146487666, "loss": 3.135647773742676, "step": 4854, "token_acc": 0.2824167587687539 }, { "epoch": 2.84608619173263, "grad_norm": 0.28390059504146725, "learning_rate": 0.0004884464348912052, "loss": 3.1599316596984863, "step": 4855, "token_acc": 0.28203571409797457 }, { "epoch": 2.8466725300498386, "grad_norm": 0.2602915882825771, "learning_rate": 0.0004884391528937608, "loss": 3.12893009185791, "step": 4856, "token_acc": 0.28457589959697005 }, { "epoch": 2.8472588683670477, "grad_norm": 0.29280848821246713, "learning_rate": 0.0004884318686565019, "loss": 3.1245017051696777, "step": 4857, "token_acc": 0.2845064718536167 }, { "epoch": 2.847845206684257, "grad_norm": 0.20905816049308512, "learning_rate": 0.0004884245821794969, "loss": 3.1666455268859863, "step": 4858, "token_acc": 0.2779463493169673 }, { "epoch": 2.848431545001466, "grad_norm": 0.3349393279738938, "learning_rate": 0.0004884172934628142, "loss": 3.1396267414093018, "step": 4859, "token_acc": 0.2836357776515317 }, { "epoch": 2.849017883318675, "grad_norm": 0.25368987618105154, "learning_rate": 0.0004884100025065223, "loss": 3.1463000774383545, "step": 4860, "token_acc": 0.28435287640641116 }, { "epoch": 2.849604221635884, "grad_norm": 0.2991508483751611, "learning_rate": 0.0004884027093106896, "loss": 3.1236701011657715, "step": 4861, "token_acc": 0.2827968164178123 }, { "epoch": 2.850190559953093, "grad_norm": 0.29225931230625884, "learning_rate": 0.0004883954138753849, "loss": 3.1588516235351562, "step": 4862, "token_acc": 0.2795737076556511 }, { "epoch": 2.850776898270302, "grad_norm": 0.2603450766875072, "learning_rate": 0.0004883881162006763, "loss": 3.139554500579834, "step": 4863, "token_acc": 0.284038429764378 }, { "epoch": 2.851363236587511, "grad_norm": 0.27184290452018395, "learning_rate": 0.0004883808162866328, "loss": 3.087451934814453, "step": 4864, "token_acc": 0.2896082729163154 }, { "epoch": 2.85194957490472, "grad_norm": 0.2381407118896836, "learning_rate": 0.0004883735141333227, "loss": 3.1246142387390137, "step": 4865, "token_acc": 0.28369183378519514 }, { "epoch": 2.852535913221929, "grad_norm": 0.2615941572052395, "learning_rate": 0.0004883662097408145, "loss": 3.1363883018493652, "step": 4866, "token_acc": 0.28562646170910744 }, { "epoch": 2.853122251539138, "grad_norm": 0.22284654722782624, "learning_rate": 0.0004883589031091771, "loss": 3.078174114227295, "step": 4867, "token_acc": 0.29076180115588834 }, { "epoch": 2.853708589856347, "grad_norm": 0.27795248716023313, "learning_rate": 0.0004883515942384789, "loss": 3.164367198944092, "step": 4868, "token_acc": 0.2793844975098966 }, { "epoch": 2.854294928173556, "grad_norm": 0.225008912028625, "learning_rate": 0.0004883442831287888, "loss": 3.1500251293182373, "step": 4869, "token_acc": 0.28182562685965584 }, { "epoch": 2.8548812664907652, "grad_norm": 0.23970016918237333, "learning_rate": 0.0004883369697801751, "loss": 3.2006447315216064, "step": 4870, "token_acc": 0.2736985012226259 }, { "epoch": 2.8554676048079743, "grad_norm": 0.2327864579120954, "learning_rate": 0.000488329654192707, "loss": 3.172611713409424, "step": 4871, "token_acc": 0.2788710984199706 }, { "epoch": 2.8560539431251835, "grad_norm": 0.2664719904586091, "learning_rate": 0.0004883223363664527, "loss": 3.1084370613098145, "step": 4872, "token_acc": 0.2863302503753478 }, { "epoch": 2.856640281442392, "grad_norm": 0.24426386549479084, "learning_rate": 0.0004883150163014814, "loss": 3.13490629196167, "step": 4873, "token_acc": 0.28431476326068317 }, { "epoch": 2.8572266197596012, "grad_norm": 0.2399171842575517, "learning_rate": 0.0004883076939978616, "loss": 3.113438606262207, "step": 4874, "token_acc": 0.28580018419243286 }, { "epoch": 2.8578129580768104, "grad_norm": 0.30837687459020247, "learning_rate": 0.000488300369455662, "loss": 3.1295883655548096, "step": 4875, "token_acc": 0.2841399843246172 }, { "epoch": 2.8583992963940195, "grad_norm": 0.24295209598079248, "learning_rate": 0.0004882930426749517, "loss": 3.1296138763427734, "step": 4876, "token_acc": 0.28432564099956265 }, { "epoch": 2.858985634711228, "grad_norm": 0.2721329931576096, "learning_rate": 0.0004882857136557994, "loss": 3.153696298599243, "step": 4877, "token_acc": 0.28128367678382604 }, { "epoch": 2.8595719730284372, "grad_norm": 0.29408264410763685, "learning_rate": 0.00048827838239827383, "loss": 3.162203788757324, "step": 4878, "token_acc": 0.2799060719642149 }, { "epoch": 2.8601583113456464, "grad_norm": 0.24398074797271935, "learning_rate": 0.0004882710489024439, "loss": 3.126344919204712, "step": 4879, "token_acc": 0.28400916516104757 }, { "epoch": 2.8607446496628555, "grad_norm": 0.30150785669185504, "learning_rate": 0.0004882637131683786, "loss": 3.1272635459899902, "step": 4880, "token_acc": 0.28365464140311936 }, { "epoch": 2.8613309879800646, "grad_norm": 0.28953445745000006, "learning_rate": 0.00048825637519614673, "loss": 3.1855874061584473, "step": 4881, "token_acc": 0.2761914046033218 }, { "epoch": 2.8619173262972737, "grad_norm": 0.30295388905733067, "learning_rate": 0.00048824903498581736, "loss": 3.138740062713623, "step": 4882, "token_acc": 0.28357544858637357 }, { "epoch": 2.862503664614483, "grad_norm": 0.28710693378579333, "learning_rate": 0.00048824169253745933, "loss": 3.1260299682617188, "step": 4883, "token_acc": 0.2864039045718571 }, { "epoch": 2.8630900029316915, "grad_norm": 0.23524056348061323, "learning_rate": 0.0004882343478511415, "loss": 3.1516289710998535, "step": 4884, "token_acc": 0.2826598219049642 }, { "epoch": 2.8636763412489006, "grad_norm": 0.2561964033380634, "learning_rate": 0.00048822700092693316, "loss": 3.118824005126953, "step": 4885, "token_acc": 0.2851107170184072 }, { "epoch": 2.8642626795661097, "grad_norm": 0.3338957719049952, "learning_rate": 0.00048821965176490314, "loss": 3.142913818359375, "step": 4886, "token_acc": 0.28275287789019454 }, { "epoch": 2.864849017883319, "grad_norm": 0.31284440494613225, "learning_rate": 0.00048821230036512044, "loss": 3.1092324256896973, "step": 4887, "token_acc": 0.2874816034904722 }, { "epoch": 2.8654353562005275, "grad_norm": 0.21716681335491536, "learning_rate": 0.0004882049467276541, "loss": 3.117408275604248, "step": 4888, "token_acc": 0.28670610642350214 }, { "epoch": 2.8660216945177366, "grad_norm": 0.2608137208737408, "learning_rate": 0.0004881975908525734, "loss": 3.1200599670410156, "step": 4889, "token_acc": 0.2869746508350062 }, { "epoch": 2.8666080328349457, "grad_norm": 0.2750466535009853, "learning_rate": 0.00048819023273994727, "loss": 3.1784353256225586, "step": 4890, "token_acc": 0.2776120829267802 }, { "epoch": 2.867194371152155, "grad_norm": 0.24547870942754935, "learning_rate": 0.00048818287238984486, "loss": 3.1103293895721436, "step": 4891, "token_acc": 0.287287465707776 }, { "epoch": 2.867780709469364, "grad_norm": 0.313609054127639, "learning_rate": 0.00048817550980233536, "loss": 3.0979578495025635, "step": 4892, "token_acc": 0.28923173720982054 }, { "epoch": 2.868367047786573, "grad_norm": 0.2509927512284993, "learning_rate": 0.00048816814497748784, "loss": 3.160936117172241, "step": 4893, "token_acc": 0.28152552641602524 }, { "epoch": 2.868953386103782, "grad_norm": 0.29532063041266643, "learning_rate": 0.00048816077791537157, "loss": 3.127889633178711, "step": 4894, "token_acc": 0.283574443046707 }, { "epoch": 2.869539724420991, "grad_norm": 0.34693096371133986, "learning_rate": 0.0004881534086160557, "loss": 3.173649787902832, "step": 4895, "token_acc": 0.27964687219808265 }, { "epoch": 2.8701260627382, "grad_norm": 0.2428215841349258, "learning_rate": 0.00048814603707960947, "loss": 3.0773868560791016, "step": 4896, "token_acc": 0.29024613872987265 }, { "epoch": 2.870712401055409, "grad_norm": 0.3115948784738602, "learning_rate": 0.00048813866330610215, "loss": 3.1171956062316895, "step": 4897, "token_acc": 0.286990333713322 }, { "epoch": 2.871298739372618, "grad_norm": 0.2715649954485524, "learning_rate": 0.000488131287295603, "loss": 3.122842788696289, "step": 4898, "token_acc": 0.28530024170006174 }, { "epoch": 2.871885077689827, "grad_norm": 0.28934515336328087, "learning_rate": 0.00048812390904818116, "loss": 3.139252185821533, "step": 4899, "token_acc": 0.28268319432237543 }, { "epoch": 2.872471416007036, "grad_norm": 0.2863952705746134, "learning_rate": 0.0004881165285639062, "loss": 3.126121997833252, "step": 4900, "token_acc": 0.28395641900761354 }, { "epoch": 2.873057754324245, "grad_norm": 0.27340161269909075, "learning_rate": 0.00048810914584284726, "loss": 3.115375518798828, "step": 4901, "token_acc": 0.2864196073275249 }, { "epoch": 2.873644092641454, "grad_norm": 0.268884494905015, "learning_rate": 0.0004881017608850738, "loss": 3.163297414779663, "step": 4902, "token_acc": 0.27963908352231703 }, { "epoch": 2.8742304309586633, "grad_norm": 0.22514243882692994, "learning_rate": 0.00048809437369065514, "loss": 3.101938247680664, "step": 4903, "token_acc": 0.28692334850149065 }, { "epoch": 2.8748167692758724, "grad_norm": 0.2569857420088353, "learning_rate": 0.00048808698425966063, "loss": 3.15704083442688, "step": 4904, "token_acc": 0.27955448601238064 }, { "epoch": 2.875403107593081, "grad_norm": 0.21552991499180688, "learning_rate": 0.00048807959259215974, "loss": 3.1478776931762695, "step": 4905, "token_acc": 0.28106563152227376 }, { "epoch": 2.87598944591029, "grad_norm": 0.2544848470115608, "learning_rate": 0.00048807219868822195, "loss": 3.157792568206787, "step": 4906, "token_acc": 0.2804688787197517 }, { "epoch": 2.8765757842274993, "grad_norm": 0.22163938403224986, "learning_rate": 0.0004880648025479166, "loss": 3.1324574947357178, "step": 4907, "token_acc": 0.2848131722353002 }, { "epoch": 2.8771621225447084, "grad_norm": 0.2822877488634309, "learning_rate": 0.00048805740417131325, "loss": 3.1124801635742188, "step": 4908, "token_acc": 0.28740142325192275 }, { "epoch": 2.8777484608619175, "grad_norm": 0.24468603546085294, "learning_rate": 0.00048805000355848133, "loss": 3.1252307891845703, "step": 4909, "token_acc": 0.2847210107907394 }, { "epoch": 2.878334799179126, "grad_norm": 0.28729574012784703, "learning_rate": 0.00048804260070949045, "loss": 3.103818893432617, "step": 4910, "token_acc": 0.2882280472222081 }, { "epoch": 2.8789211374963353, "grad_norm": 0.26472334428852384, "learning_rate": 0.0004880351956244101, "loss": 3.154768466949463, "step": 4911, "token_acc": 0.2803519462818833 }, { "epoch": 2.8795074758135444, "grad_norm": 0.27072707398898543, "learning_rate": 0.00048802778830330987, "loss": 3.1232645511627197, "step": 4912, "token_acc": 0.28609724705612744 }, { "epoch": 2.8800938141307535, "grad_norm": 0.34702788946768703, "learning_rate": 0.00048802037874625927, "loss": 3.1395516395568848, "step": 4913, "token_acc": 0.2826941719337702 }, { "epoch": 2.8806801524479626, "grad_norm": 0.24268875024728914, "learning_rate": 0.00048801296695332797, "loss": 3.122776985168457, "step": 4914, "token_acc": 0.2837166115492241 }, { "epoch": 2.8812664907651717, "grad_norm": 0.29519130281033223, "learning_rate": 0.0004880055529245855, "loss": 3.0927324295043945, "step": 4915, "token_acc": 0.2906697390378863 }, { "epoch": 2.8818528290823804, "grad_norm": 0.25951170370563165, "learning_rate": 0.00048799813666010165, "loss": 3.134528875350952, "step": 4916, "token_acc": 0.2842230838055473 }, { "epoch": 2.8824391673995895, "grad_norm": 0.2433983553476355, "learning_rate": 0.000487990718159946, "loss": 3.153985023498535, "step": 4917, "token_acc": 0.28151600036144425 }, { "epoch": 2.8830255057167986, "grad_norm": 0.22552680240463832, "learning_rate": 0.00048798329742418824, "loss": 3.1615052223205566, "step": 4918, "token_acc": 0.28131498835719804 }, { "epoch": 2.8836118440340077, "grad_norm": 0.23457797177383471, "learning_rate": 0.00048797587445289814, "loss": 3.1286332607269287, "step": 4919, "token_acc": 0.2851543245482481 }, { "epoch": 2.8841981823512164, "grad_norm": 0.24432487782522214, "learning_rate": 0.0004879684492461453, "loss": 3.1091256141662598, "step": 4920, "token_acc": 0.2865839175675928 }, { "epoch": 2.8847845206684255, "grad_norm": 0.2377113188607445, "learning_rate": 0.0004879610218039996, "loss": 3.1517481803894043, "step": 4921, "token_acc": 0.2838878407190956 }, { "epoch": 2.8853708589856346, "grad_norm": 0.24372504589064947, "learning_rate": 0.00048795359212653076, "loss": 3.0923757553100586, "step": 4922, "token_acc": 0.29036334598717367 }, { "epoch": 2.8859571973028437, "grad_norm": 0.23310574223944144, "learning_rate": 0.00048794616021380854, "loss": 3.1291494369506836, "step": 4923, "token_acc": 0.2847905045015342 }, { "epoch": 2.886543535620053, "grad_norm": 0.2249346197215438, "learning_rate": 0.0004879387260659027, "loss": 3.116410255432129, "step": 4924, "token_acc": 0.28687137453996797 }, { "epoch": 2.887129873937262, "grad_norm": 0.2991269378866682, "learning_rate": 0.0004879312896828833, "loss": 3.1162405014038086, "step": 4925, "token_acc": 0.2849651999377705 }, { "epoch": 2.887716212254471, "grad_norm": 0.2552031973321904, "learning_rate": 0.00048792385106481993, "loss": 3.137662172317505, "step": 4926, "token_acc": 0.283719781882669 }, { "epoch": 2.8883025505716797, "grad_norm": 0.22674399464041056, "learning_rate": 0.0004879164102117827, "loss": 3.1396563053131104, "step": 4927, "token_acc": 0.28465568139403163 }, { "epoch": 2.888888888888889, "grad_norm": 0.2886787981477102, "learning_rate": 0.00048790896712384136, "loss": 3.117295265197754, "step": 4928, "token_acc": 0.28683486749842524 }, { "epoch": 2.889475227206098, "grad_norm": 0.27655948163577637, "learning_rate": 0.00048790152180106585, "loss": 3.1172399520874023, "step": 4929, "token_acc": 0.28440871350726593 }, { "epoch": 2.890061565523307, "grad_norm": 0.2521089063548159, "learning_rate": 0.00048789407424352604, "loss": 3.1061902046203613, "step": 4930, "token_acc": 0.2872061168794341 }, { "epoch": 2.8906479038405157, "grad_norm": 0.3298256128425193, "learning_rate": 0.00048788662445129204, "loss": 3.1459975242614746, "step": 4931, "token_acc": 0.28299710276505696 }, { "epoch": 2.891234242157725, "grad_norm": 0.30398850844897296, "learning_rate": 0.0004878791724244338, "loss": 3.144563674926758, "step": 4932, "token_acc": 0.28111557467382414 }, { "epoch": 2.891820580474934, "grad_norm": 0.32186584283597364, "learning_rate": 0.0004878717181630212, "loss": 3.109895706176758, "step": 4933, "token_acc": 0.28757185227958737 }, { "epoch": 2.892406918792143, "grad_norm": 0.3443025871349975, "learning_rate": 0.0004878642616671244, "loss": 3.1630849838256836, "step": 4934, "token_acc": 0.2814193635299116 }, { "epoch": 2.892993257109352, "grad_norm": 0.2301029925206164, "learning_rate": 0.0004878568029368134, "loss": 3.132241725921631, "step": 4935, "token_acc": 0.2832746393940856 }, { "epoch": 2.8935795954265613, "grad_norm": 0.2620890795734399, "learning_rate": 0.00048784934197215827, "loss": 3.125356435775757, "step": 4936, "token_acc": 0.2857983164331912 }, { "epoch": 2.8941659337437704, "grad_norm": 0.22218595847011413, "learning_rate": 0.0004878418787732289, "loss": 3.1670401096343994, "step": 4937, "token_acc": 0.2795852374045365 }, { "epoch": 2.894752272060979, "grad_norm": 0.2584335746268995, "learning_rate": 0.0004878344133400958, "loss": 3.1224045753479004, "step": 4938, "token_acc": 0.2843456352812928 }, { "epoch": 2.895338610378188, "grad_norm": 0.251835323765681, "learning_rate": 0.00048782694567282874, "loss": 3.1495532989501953, "step": 4939, "token_acc": 0.28241576672438634 }, { "epoch": 2.8959249486953973, "grad_norm": 0.23854720296066906, "learning_rate": 0.00048781947577149806, "loss": 3.0989975929260254, "step": 4940, "token_acc": 0.2916883357619884 }, { "epoch": 2.8965112870126064, "grad_norm": 0.2500097892330433, "learning_rate": 0.00048781200363617384, "loss": 3.1113104820251465, "step": 4941, "token_acc": 0.28744871583930875 }, { "epoch": 2.897097625329815, "grad_norm": 0.24953324638278887, "learning_rate": 0.0004878045292669263, "loss": 3.1418986320495605, "step": 4942, "token_acc": 0.28371842669769576 }, { "epoch": 2.897683963647024, "grad_norm": 0.24820076041688197, "learning_rate": 0.00048779705266382566, "loss": 3.081324338912964, "step": 4943, "token_acc": 0.29091045830457124 }, { "epoch": 2.8982703019642333, "grad_norm": 0.2243659998028146, "learning_rate": 0.00048778957382694215, "loss": 3.1396548748016357, "step": 4944, "token_acc": 0.2848575198792626 }, { "epoch": 2.8988566402814424, "grad_norm": 0.2207393730603077, "learning_rate": 0.00048778209275634603, "loss": 3.143639087677002, "step": 4945, "token_acc": 0.2830619183965641 }, { "epoch": 2.8994429785986515, "grad_norm": 0.30214378413488546, "learning_rate": 0.00048777460945210755, "loss": 3.134450912475586, "step": 4946, "token_acc": 0.284913070746327 }, { "epoch": 2.9000293169158606, "grad_norm": 0.3012189256467185, "learning_rate": 0.000487767123914297, "loss": 3.1279685497283936, "step": 4947, "token_acc": 0.2827419230799829 }, { "epoch": 2.9006156552330697, "grad_norm": 0.21262570409533066, "learning_rate": 0.0004877596361429848, "loss": 3.10392427444458, "step": 4948, "token_acc": 0.28851040552972473 }, { "epoch": 2.9012019935502784, "grad_norm": 0.31257265836380166, "learning_rate": 0.00048775214613824114, "loss": 3.184384346008301, "step": 4949, "token_acc": 0.27766400211402525 }, { "epoch": 2.9017883318674875, "grad_norm": 0.34238082151502763, "learning_rate": 0.00048774465390013643, "loss": 3.1837613582611084, "step": 4950, "token_acc": 0.2784403871052689 }, { "epoch": 2.9023746701846966, "grad_norm": 0.242457552158686, "learning_rate": 0.00048773715942874107, "loss": 3.1589243412017822, "step": 4951, "token_acc": 0.2813106662579677 }, { "epoch": 2.9029610085019057, "grad_norm": 0.3005332064815135, "learning_rate": 0.0004877296627241254, "loss": 3.12115216255188, "step": 4952, "token_acc": 0.2860999072423892 }, { "epoch": 2.9035473468191144, "grad_norm": 0.21735373005870992, "learning_rate": 0.00048772216378636, "loss": 3.1634509563446045, "step": 4953, "token_acc": 0.2804573349470225 }, { "epoch": 2.9041336851363235, "grad_norm": 0.264352902620328, "learning_rate": 0.0004877146626155152, "loss": 3.0979161262512207, "step": 4954, "token_acc": 0.2897223608342499 }, { "epoch": 2.9047200234535326, "grad_norm": 0.29044381708577865, "learning_rate": 0.0004877071592116614, "loss": 3.152224063873291, "step": 4955, "token_acc": 0.279707410375042 }, { "epoch": 2.9053063617707418, "grad_norm": 0.332172293547952, "learning_rate": 0.00048769965357486916, "loss": 3.1629724502563477, "step": 4956, "token_acc": 0.2812857354005088 }, { "epoch": 2.905892700087951, "grad_norm": 0.28792439526554087, "learning_rate": 0.00048769214570520904, "loss": 3.1459832191467285, "step": 4957, "token_acc": 0.2817521322522902 }, { "epoch": 2.90647903840516, "grad_norm": 0.243877731574557, "learning_rate": 0.0004876846356027514, "loss": 3.1482694149017334, "step": 4958, "token_acc": 0.28087909356422314 }, { "epoch": 2.9070653767223686, "grad_norm": 0.24062871732834393, "learning_rate": 0.00048767712326756694, "loss": 3.1307859420776367, "step": 4959, "token_acc": 0.2844771601322176 }, { "epoch": 2.9076517150395778, "grad_norm": 0.24555808404074167, "learning_rate": 0.00048766960869972624, "loss": 3.1613354682922363, "step": 4960, "token_acc": 0.2794603496326324 }, { "epoch": 2.908238053356787, "grad_norm": 0.24587791851756013, "learning_rate": 0.0004876620918992998, "loss": 3.1440720558166504, "step": 4961, "token_acc": 0.28235912693962884 }, { "epoch": 2.908824391673996, "grad_norm": 0.23536638324006415, "learning_rate": 0.00048765457286635826, "loss": 3.1673424243927, "step": 4962, "token_acc": 0.2788081499660811 }, { "epoch": 2.909410729991205, "grad_norm": 0.23296239241269304, "learning_rate": 0.0004876470516009722, "loss": 3.1183199882507324, "step": 4963, "token_acc": 0.2860383372388748 }, { "epoch": 2.9099970683084138, "grad_norm": 0.2325040259701827, "learning_rate": 0.0004876395281032124, "loss": 3.125436305999756, "step": 4964, "token_acc": 0.2854846778307751 }, { "epoch": 2.910583406625623, "grad_norm": 0.24309006947276676, "learning_rate": 0.0004876320023731494, "loss": 3.1273741722106934, "step": 4965, "token_acc": 0.2858567641450445 }, { "epoch": 2.911169744942832, "grad_norm": 0.21467208252067768, "learning_rate": 0.000487624474410854, "loss": 3.1292076110839844, "step": 4966, "token_acc": 0.28431792871595657 }, { "epoch": 2.911756083260041, "grad_norm": 0.22260149417323666, "learning_rate": 0.0004876169442163968, "loss": 3.100684642791748, "step": 4967, "token_acc": 0.28478081246825704 }, { "epoch": 2.91234242157725, "grad_norm": 0.25475399156640255, "learning_rate": 0.00048760941178984865, "loss": 3.1385393142700195, "step": 4968, "token_acc": 0.2827265086395817 }, { "epoch": 2.9129287598944593, "grad_norm": 0.2689112418370151, "learning_rate": 0.00048760187713128026, "loss": 3.155050277709961, "step": 4969, "token_acc": 0.2811823812435206 }, { "epoch": 2.913515098211668, "grad_norm": 0.2706116503433038, "learning_rate": 0.0004875943402407624, "loss": 3.0980143547058105, "step": 4970, "token_acc": 0.28876156505457606 }, { "epoch": 2.914101436528877, "grad_norm": 0.2307981305182796, "learning_rate": 0.00048758680111836585, "loss": 3.1046605110168457, "step": 4971, "token_acc": 0.2884801955015759 }, { "epoch": 2.914687774846086, "grad_norm": 0.2213007048394922, "learning_rate": 0.0004875792597641615, "loss": 3.0680899620056152, "step": 4972, "token_acc": 0.29343777062571896 }, { "epoch": 2.9152741131632953, "grad_norm": 0.2867207568041624, "learning_rate": 0.0004875717161782201, "loss": 3.1130900382995605, "step": 4973, "token_acc": 0.28708213635390595 }, { "epoch": 2.915860451480504, "grad_norm": 0.42013674859298566, "learning_rate": 0.0004875641703606126, "loss": 3.160256862640381, "step": 4974, "token_acc": 0.28140747383206494 }, { "epoch": 2.916446789797713, "grad_norm": 0.5600867800506169, "learning_rate": 0.00048755662231140986, "loss": 3.153989791870117, "step": 4975, "token_acc": 0.28072518836482474 }, { "epoch": 2.917033128114922, "grad_norm": 0.27464065531475684, "learning_rate": 0.0004875490720306827, "loss": 3.1440978050231934, "step": 4976, "token_acc": 0.2809371053296287 }, { "epoch": 2.9176194664321313, "grad_norm": 0.42182624221510395, "learning_rate": 0.00048754151951850214, "loss": 3.1297121047973633, "step": 4977, "token_acc": 0.2849239023612845 }, { "epoch": 2.9182058047493404, "grad_norm": 0.2842807842374587, "learning_rate": 0.00048753396477493904, "loss": 3.0647027492523193, "step": 4978, "token_acc": 0.29411764705882354 }, { "epoch": 2.9187921430665495, "grad_norm": 0.3331885838986031, "learning_rate": 0.0004875264078000645, "loss": 3.111910820007324, "step": 4979, "token_acc": 0.2858324721152765 }, { "epoch": 2.9193784813837587, "grad_norm": 0.27789912681534007, "learning_rate": 0.0004875188485939494, "loss": 3.104243040084839, "step": 4980, "token_acc": 0.2886180704564458 }, { "epoch": 2.9199648197009673, "grad_norm": 0.3143294974976295, "learning_rate": 0.0004875112871566648, "loss": 3.13938045501709, "step": 4981, "token_acc": 0.2845464135021097 }, { "epoch": 2.9205511580181764, "grad_norm": 0.21218678057505008, "learning_rate": 0.0004875037234882817, "loss": 3.116074800491333, "step": 4982, "token_acc": 0.2850433834843563 }, { "epoch": 2.9211374963353856, "grad_norm": 0.3236348785750339, "learning_rate": 0.0004874961575888711, "loss": 3.094538450241089, "step": 4983, "token_acc": 0.289173755851339 }, { "epoch": 2.9217238346525947, "grad_norm": 0.2620582726122614, "learning_rate": 0.0004874885894585042, "loss": 3.1313014030456543, "step": 4984, "token_acc": 0.284410233575072 }, { "epoch": 2.9223101729698033, "grad_norm": 0.28180325216626967, "learning_rate": 0.000487481019097252, "loss": 3.134223699569702, "step": 4985, "token_acc": 0.28451412923392694 }, { "epoch": 2.9228965112870124, "grad_norm": 0.24338775858186906, "learning_rate": 0.0004874734465051857, "loss": 3.1487369537353516, "step": 4986, "token_acc": 0.2829513441434034 }, { "epoch": 2.9234828496042216, "grad_norm": 0.28237787731808073, "learning_rate": 0.0004874658716823762, "loss": 3.165557861328125, "step": 4987, "token_acc": 0.2794731053374284 }, { "epoch": 2.9240691879214307, "grad_norm": 0.27270117052472936, "learning_rate": 0.00048745829462889503, "loss": 3.1274406909942627, "step": 4988, "token_acc": 0.2839410664696169 }, { "epoch": 2.92465552623864, "grad_norm": 0.22488968672447976, "learning_rate": 0.000487450715344813, "loss": 3.147987127304077, "step": 4989, "token_acc": 0.28190344362983744 }, { "epoch": 2.925241864555849, "grad_norm": 0.2649247462912229, "learning_rate": 0.00048744313383020153, "loss": 3.1359360218048096, "step": 4990, "token_acc": 0.2811820282336059 }, { "epoch": 2.925828202873058, "grad_norm": 0.22684200560699008, "learning_rate": 0.0004874355500851318, "loss": 3.1208736896514893, "step": 4991, "token_acc": 0.2868541313481241 }, { "epoch": 2.9264145411902667, "grad_norm": 0.24243535116984258, "learning_rate": 0.000487427964109675, "loss": 3.1571602821350098, "step": 4992, "token_acc": 0.2810554551253304 }, { "epoch": 2.927000879507476, "grad_norm": 0.23746714915998451, "learning_rate": 0.0004874203759039024, "loss": 3.0808215141296387, "step": 4993, "token_acc": 0.2909341063008429 }, { "epoch": 2.927587217824685, "grad_norm": 0.2573632656009003, "learning_rate": 0.0004874127854678853, "loss": 3.1138830184936523, "step": 4994, "token_acc": 0.285584410765308 }, { "epoch": 2.928173556141894, "grad_norm": 0.2776031992059007, "learning_rate": 0.000487405192801695, "loss": 3.134657382965088, "step": 4995, "token_acc": 0.2829649039104763 }, { "epoch": 2.9287598944591027, "grad_norm": 0.2897421656459499, "learning_rate": 0.00048739759790540285, "loss": 3.126394033432007, "step": 4996, "token_acc": 0.28627142103371545 }, { "epoch": 2.929346232776312, "grad_norm": 0.23327237980974946, "learning_rate": 0.0004873900007790801, "loss": 3.066833972930908, "step": 4997, "token_acc": 0.2927761752947725 }, { "epoch": 2.929932571093521, "grad_norm": 0.30007429625096604, "learning_rate": 0.0004873824014227983, "loss": 3.117905378341675, "step": 4998, "token_acc": 0.286602941443063 }, { "epoch": 2.93051890941073, "grad_norm": 0.28489495058823794, "learning_rate": 0.00048737479983662857, "loss": 3.134899377822876, "step": 4999, "token_acc": 0.2834375651180679 }, { "epoch": 2.931105247727939, "grad_norm": 0.21440386072585876, "learning_rate": 0.0004873671960206426, "loss": 3.109004497528076, "step": 5000, "token_acc": 0.2855063759009425 }, { "epoch": 2.9316915860451482, "grad_norm": 0.25605963330166026, "learning_rate": 0.00048735958997491157, "loss": 3.1286637783050537, "step": 5001, "token_acc": 0.28562270418237784 }, { "epoch": 2.9322779243623573, "grad_norm": 0.25141988071908633, "learning_rate": 0.00048735198169950713, "loss": 3.1293187141418457, "step": 5002, "token_acc": 0.28528434815723774 }, { "epoch": 2.932864262679566, "grad_norm": 0.2141529271474328, "learning_rate": 0.0004873443711945006, "loss": 3.1483154296875, "step": 5003, "token_acc": 0.2803410322841446 }, { "epoch": 2.933450600996775, "grad_norm": 0.3130246265328783, "learning_rate": 0.0004873367584599635, "loss": 3.1273369789123535, "step": 5004, "token_acc": 0.283746491804647 }, { "epoch": 2.9340369393139842, "grad_norm": 0.32513088324049844, "learning_rate": 0.0004873291434959674, "loss": 3.1280877590179443, "step": 5005, "token_acc": 0.284706452452695 }, { "epoch": 2.9346232776311933, "grad_norm": 0.20923199331385156, "learning_rate": 0.00048732152630258385, "loss": 3.11245059967041, "step": 5006, "token_acc": 0.28776597039469237 }, { "epoch": 2.935209615948402, "grad_norm": 0.24136986709090455, "learning_rate": 0.0004873139068798843, "loss": 3.174717903137207, "step": 5007, "token_acc": 0.27896171054132496 }, { "epoch": 2.935795954265611, "grad_norm": 0.2392244794975838, "learning_rate": 0.0004873062852279404, "loss": 3.1441166400909424, "step": 5008, "token_acc": 0.2831403496836519 }, { "epoch": 2.9363822925828202, "grad_norm": 0.2682497200847533, "learning_rate": 0.0004872986613468237, "loss": 3.1679811477661133, "step": 5009, "token_acc": 0.27847445358803624 }, { "epoch": 2.9369686309000294, "grad_norm": 0.26964364317596096, "learning_rate": 0.0004872910352366059, "loss": 3.1294591426849365, "step": 5010, "token_acc": 0.28302027034201555 }, { "epoch": 2.9375549692172385, "grad_norm": 0.22211537283776384, "learning_rate": 0.0004872834068973585, "loss": 3.1413230895996094, "step": 5011, "token_acc": 0.2842098806914244 }, { "epoch": 2.9381413075344476, "grad_norm": 0.24044243426936524, "learning_rate": 0.00048727577632915326, "loss": 3.126039981842041, "step": 5012, "token_acc": 0.28366730916099936 }, { "epoch": 2.9387276458516562, "grad_norm": 0.21969192877333163, "learning_rate": 0.00048726814353206184, "loss": 3.134089469909668, "step": 5013, "token_acc": 0.2849448362799129 }, { "epoch": 2.9393139841688654, "grad_norm": 0.25165462024635504, "learning_rate": 0.000487260508506156, "loss": 3.1366353034973145, "step": 5014, "token_acc": 0.28379830379998827 }, { "epoch": 2.9399003224860745, "grad_norm": 0.24104550424361132, "learning_rate": 0.0004872528712515073, "loss": 3.1400697231292725, "step": 5015, "token_acc": 0.28286487556934453 }, { "epoch": 2.9404866608032836, "grad_norm": 0.3091771984394633, "learning_rate": 0.00048724523176818757, "loss": 3.1702637672424316, "step": 5016, "token_acc": 0.27720797285316867 }, { "epoch": 2.9410729991204922, "grad_norm": 0.4061705666955857, "learning_rate": 0.00048723759005626867, "loss": 3.092770576477051, "step": 5017, "token_acc": 0.2907736293477542 }, { "epoch": 2.9416593374377014, "grad_norm": 0.31868675984414274, "learning_rate": 0.00048722994611582224, "loss": 3.1166653633117676, "step": 5018, "token_acc": 0.2852645800374422 }, { "epoch": 2.9422456757549105, "grad_norm": 0.26438728876931605, "learning_rate": 0.00048722229994692016, "loss": 3.119567394256592, "step": 5019, "token_acc": 0.2850664970829762 }, { "epoch": 2.9428320140721196, "grad_norm": 0.30612925404687125, "learning_rate": 0.0004872146515496342, "loss": 3.145846128463745, "step": 5020, "token_acc": 0.2825974932231139 }, { "epoch": 2.9434183523893287, "grad_norm": 0.21888526017309753, "learning_rate": 0.00048720700092403626, "loss": 3.135315418243408, "step": 5021, "token_acc": 0.2849608141737548 }, { "epoch": 2.944004690706538, "grad_norm": 0.26941200130587956, "learning_rate": 0.00048719934807019816, "loss": 3.1143932342529297, "step": 5022, "token_acc": 0.2864189088904438 }, { "epoch": 2.944591029023747, "grad_norm": 0.25301562560019936, "learning_rate": 0.00048719169298819183, "loss": 3.152822732925415, "step": 5023, "token_acc": 0.28264420939441876 }, { "epoch": 2.9451773673409556, "grad_norm": 0.31488721997814373, "learning_rate": 0.0004871840356780892, "loss": 3.1335601806640625, "step": 5024, "token_acc": 0.28600399733510995 }, { "epoch": 2.9457637056581647, "grad_norm": 0.24409918810160133, "learning_rate": 0.00048717637613996214, "loss": 3.12785005569458, "step": 5025, "token_acc": 0.28383287572751675 }, { "epoch": 2.946350043975374, "grad_norm": 0.2561151222652107, "learning_rate": 0.0004871687143738826, "loss": 3.1521596908569336, "step": 5026, "token_acc": 0.2818687684081323 }, { "epoch": 2.946936382292583, "grad_norm": 0.29187288183032073, "learning_rate": 0.00048716105037992257, "loss": 3.2151827812194824, "step": 5027, "token_acc": 0.2721538987546364 }, { "epoch": 2.9475227206097916, "grad_norm": 0.287352231972974, "learning_rate": 0.0004871533841581541, "loss": 3.1494736671447754, "step": 5028, "token_acc": 0.2819452216586166 }, { "epoch": 2.9481090589270007, "grad_norm": 0.21268769227228548, "learning_rate": 0.0004871457157086491, "loss": 3.103982925415039, "step": 5029, "token_acc": 0.28768441225360913 }, { "epoch": 2.94869539724421, "grad_norm": 0.2593412367636553, "learning_rate": 0.00048713804503147976, "loss": 3.124460220336914, "step": 5030, "token_acc": 0.28478371259005364 }, { "epoch": 2.949281735561419, "grad_norm": 0.2612123127343416, "learning_rate": 0.00048713037212671796, "loss": 3.107083559036255, "step": 5031, "token_acc": 0.2866644191221091 }, { "epoch": 2.949868073878628, "grad_norm": 0.24123033295897464, "learning_rate": 0.0004871226969944358, "loss": 3.1284422874450684, "step": 5032, "token_acc": 0.2848839372564862 }, { "epoch": 2.950454412195837, "grad_norm": 0.27460143525491787, "learning_rate": 0.0004871150196347055, "loss": 3.1908700466156006, "step": 5033, "token_acc": 0.27775067089534033 }, { "epoch": 2.9510407505130463, "grad_norm": 0.26320050572031906, "learning_rate": 0.0004871073400475991, "loss": 3.103938341140747, "step": 5034, "token_acc": 0.2871200750966551 }, { "epoch": 2.951627088830255, "grad_norm": 0.2645706212320269, "learning_rate": 0.0004870996582331888, "loss": 3.1424694061279297, "step": 5035, "token_acc": 0.2831940824158473 }, { "epoch": 2.952213427147464, "grad_norm": 0.21439366042139882, "learning_rate": 0.0004870919741915466, "loss": 3.127854347229004, "step": 5036, "token_acc": 0.2837863641759002 }, { "epoch": 2.952799765464673, "grad_norm": 0.2401823601680044, "learning_rate": 0.0004870842879227448, "loss": 3.144318103790283, "step": 5037, "token_acc": 0.2806005078526113 }, { "epoch": 2.9533861037818823, "grad_norm": 0.22641307004500938, "learning_rate": 0.00048707659942685567, "loss": 3.0996389389038086, "step": 5038, "token_acc": 0.2884437843696062 }, { "epoch": 2.953972442099091, "grad_norm": 0.24555454429266688, "learning_rate": 0.0004870689087039513, "loss": 3.1291019916534424, "step": 5039, "token_acc": 0.2853980622152914 }, { "epoch": 2.9545587804163, "grad_norm": 0.26631543914857037, "learning_rate": 0.000487061215754104, "loss": 3.141916513442993, "step": 5040, "token_acc": 0.2839145118568745 }, { "epoch": 2.955145118733509, "grad_norm": 0.25769035552611275, "learning_rate": 0.0004870535205773859, "loss": 3.1855549812316895, "step": 5041, "token_acc": 0.277336928133887 }, { "epoch": 2.9557314570507183, "grad_norm": 0.27460155688643045, "learning_rate": 0.0004870458231738696, "loss": 3.154581069946289, "step": 5042, "token_acc": 0.28180919789939646 }, { "epoch": 2.9563177953679274, "grad_norm": 0.4073859059057163, "learning_rate": 0.0004870381235436271, "loss": 3.130425453186035, "step": 5043, "token_acc": 0.2838434707229985 }, { "epoch": 2.9569041336851365, "grad_norm": 0.43431855198354563, "learning_rate": 0.00048703042168673095, "loss": 3.1138930320739746, "step": 5044, "token_acc": 0.28732473607851616 }, { "epoch": 2.9574904720023456, "grad_norm": 0.21475586317070977, "learning_rate": 0.0004870227176032533, "loss": 3.1383705139160156, "step": 5045, "token_acc": 0.2824630620134512 }, { "epoch": 2.9580768103195543, "grad_norm": 0.3315855311179034, "learning_rate": 0.00048701501129326665, "loss": 3.1405115127563477, "step": 5046, "token_acc": 0.28419331433558714 }, { "epoch": 2.9586631486367634, "grad_norm": 0.2330663298856108, "learning_rate": 0.00048700730275684327, "loss": 3.1151113510131836, "step": 5047, "token_acc": 0.2858880426283629 }, { "epoch": 2.9592494869539725, "grad_norm": 0.3248144425581349, "learning_rate": 0.0004869995919940557, "loss": 3.1694445610046387, "step": 5048, "token_acc": 0.2802275153005391 }, { "epoch": 2.9598358252711816, "grad_norm": 0.22135343760054257, "learning_rate": 0.0004869918790049764, "loss": 3.154057025909424, "step": 5049, "token_acc": 0.2813104458312992 }, { "epoch": 2.9604221635883903, "grad_norm": 0.26102946612720423, "learning_rate": 0.0004869841637896777, "loss": 3.1102654933929443, "step": 5050, "token_acc": 0.2883207592439551 }, { "epoch": 2.9610085019055994, "grad_norm": 0.2144279742893639, "learning_rate": 0.00048697644634823205, "loss": 3.1315462589263916, "step": 5051, "token_acc": 0.2844339686174153 }, { "epoch": 2.9615948402228085, "grad_norm": 0.2702593642371519, "learning_rate": 0.00048696872668071214, "loss": 3.147261619567871, "step": 5052, "token_acc": 0.28296109264525837 }, { "epoch": 2.9621811785400176, "grad_norm": 0.2654448283294186, "learning_rate": 0.00048696100478719023, "loss": 3.079688549041748, "step": 5053, "token_acc": 0.29133618954434815 }, { "epoch": 2.9627675168572267, "grad_norm": 0.25409080232221837, "learning_rate": 0.0004869532806677391, "loss": 3.1200003623962402, "step": 5054, "token_acc": 0.2858537435271992 }, { "epoch": 2.963353855174436, "grad_norm": 0.2931293350787964, "learning_rate": 0.00048694555432243113, "loss": 3.0992982387542725, "step": 5055, "token_acc": 0.2891093047617785 }, { "epoch": 2.963940193491645, "grad_norm": 0.2713098398333767, "learning_rate": 0.00048693782575133895, "loss": 3.13455867767334, "step": 5056, "token_acc": 0.28383693265473514 }, { "epoch": 2.9645265318088536, "grad_norm": 0.30791694391189034, "learning_rate": 0.00048693009495453523, "loss": 3.1085782051086426, "step": 5057, "token_acc": 0.2879622447353919 }, { "epoch": 2.9651128701260627, "grad_norm": 0.2132019125100214, "learning_rate": 0.0004869223619320925, "loss": 3.0838470458984375, "step": 5058, "token_acc": 0.2916803367638968 }, { "epoch": 2.965699208443272, "grad_norm": 0.2620606354028236, "learning_rate": 0.0004869146266840835, "loss": 3.095184326171875, "step": 5059, "token_acc": 0.28787502146274707 }, { "epoch": 2.966285546760481, "grad_norm": 0.24769001751269004, "learning_rate": 0.00048690688921058077, "loss": 3.188934326171875, "step": 5060, "token_acc": 0.27673149580610196 }, { "epoch": 2.9668718850776896, "grad_norm": 0.28063787414840935, "learning_rate": 0.000486899149511657, "loss": 3.13330340385437, "step": 5061, "token_acc": 0.2837298022763059 }, { "epoch": 2.9674582233948987, "grad_norm": 0.2200590009047734, "learning_rate": 0.00048689140758738505, "loss": 3.1424121856689453, "step": 5062, "token_acc": 0.2822681385626847 }, { "epoch": 2.968044561712108, "grad_norm": 0.26087387528970885, "learning_rate": 0.0004868836634378375, "loss": 3.0964503288269043, "step": 5063, "token_acc": 0.28938027813741113 }, { "epoch": 2.968630900029317, "grad_norm": 0.2550698872130819, "learning_rate": 0.00048687591706308715, "loss": 3.0813496112823486, "step": 5064, "token_acc": 0.29097788352771387 }, { "epoch": 2.969217238346526, "grad_norm": 0.2762376574762002, "learning_rate": 0.0004868681684632067, "loss": 3.1141421794891357, "step": 5065, "token_acc": 0.2867829564258136 }, { "epoch": 2.969803576663735, "grad_norm": 0.2240529240358315, "learning_rate": 0.00048686041763826906, "loss": 3.127117395401001, "step": 5066, "token_acc": 0.285890549380418 }, { "epoch": 2.970389914980944, "grad_norm": 0.2396271754345822, "learning_rate": 0.00048685266458834694, "loss": 3.1150436401367188, "step": 5067, "token_acc": 0.28616807705013847 }, { "epoch": 2.970976253298153, "grad_norm": 0.2606228808606463, "learning_rate": 0.0004868449093135132, "loss": 3.068798303604126, "step": 5068, "token_acc": 0.2923513169469049 }, { "epoch": 2.971562591615362, "grad_norm": 0.26481347756583673, "learning_rate": 0.0004868371518138407, "loss": 3.1042301654815674, "step": 5069, "token_acc": 0.28946109416886945 }, { "epoch": 2.972148929932571, "grad_norm": 0.21640718191958788, "learning_rate": 0.00048682939208940227, "loss": 3.159252166748047, "step": 5070, "token_acc": 0.27981310111914537 }, { "epoch": 2.97273526824978, "grad_norm": 0.3364816324936961, "learning_rate": 0.0004868216301402709, "loss": 3.1341726779937744, "step": 5071, "token_acc": 0.2827651031945321 }, { "epoch": 2.973321606566989, "grad_norm": 0.24019311813317218, "learning_rate": 0.0004868138659665193, "loss": 3.1295175552368164, "step": 5072, "token_acc": 0.28491618674119024 }, { "epoch": 2.973907944884198, "grad_norm": 0.32060329280615685, "learning_rate": 0.00048680609956822064, "loss": 3.14194917678833, "step": 5073, "token_acc": 0.2822914625383761 }, { "epoch": 2.974494283201407, "grad_norm": 0.26863143263727157, "learning_rate": 0.0004867983309454478, "loss": 3.1023459434509277, "step": 5074, "token_acc": 0.2881482200426529 }, { "epoch": 2.9750806215186163, "grad_norm": 0.3159415399831908, "learning_rate": 0.0004867905600982737, "loss": 3.1521291732788086, "step": 5075, "token_acc": 0.2812723697310284 }, { "epoch": 2.9756669598358254, "grad_norm": 0.23098873248335225, "learning_rate": 0.0004867827870267714, "loss": 3.1205129623413086, "step": 5076, "token_acc": 0.28746366585222005 }, { "epoch": 2.9762532981530345, "grad_norm": 0.2916610383535501, "learning_rate": 0.0004867750117310138, "loss": 3.1205546855926514, "step": 5077, "token_acc": 0.28396449184376826 }, { "epoch": 2.976839636470243, "grad_norm": 0.2681245324707136, "learning_rate": 0.0004867672342110741, "loss": 3.0903921127319336, "step": 5078, "token_acc": 0.29124601679709766 }, { "epoch": 2.9774259747874523, "grad_norm": 0.2775514769308955, "learning_rate": 0.0004867594544670252, "loss": 3.0816352367401123, "step": 5079, "token_acc": 0.2923162904927083 }, { "epoch": 2.9780123131046614, "grad_norm": 0.26866945673466197, "learning_rate": 0.0004867516724989404, "loss": 3.1228179931640625, "step": 5080, "token_acc": 0.28582954226048574 }, { "epoch": 2.9785986514218705, "grad_norm": 0.2696039043914441, "learning_rate": 0.00048674388830689255, "loss": 3.1396074295043945, "step": 5081, "token_acc": 0.28074636601314124 }, { "epoch": 2.979184989739079, "grad_norm": 0.2853419119533892, "learning_rate": 0.00048673610189095486, "loss": 3.131565570831299, "step": 5082, "token_acc": 0.2836169449220403 }, { "epoch": 2.9797713280562883, "grad_norm": 0.26128091138816056, "learning_rate": 0.0004867283132512006, "loss": 3.1794867515563965, "step": 5083, "token_acc": 0.27843310933016513 }, { "epoch": 2.9803576663734974, "grad_norm": 0.2644633157548551, "learning_rate": 0.00048672052238770276, "loss": 3.1267125606536865, "step": 5084, "token_acc": 0.2858645002649133 }, { "epoch": 2.9809440046907065, "grad_norm": 0.23538525143763747, "learning_rate": 0.0004867127293005346, "loss": 3.1287636756896973, "step": 5085, "token_acc": 0.28599776560089374 }, { "epoch": 2.9815303430079156, "grad_norm": 0.21746074606868301, "learning_rate": 0.00048670493398976934, "loss": 3.1294968128204346, "step": 5086, "token_acc": 0.2837701127276231 }, { "epoch": 2.9821166813251248, "grad_norm": 0.25141010510472817, "learning_rate": 0.0004866971364554802, "loss": 3.16300892829895, "step": 5087, "token_acc": 0.27777038889184447 }, { "epoch": 2.982703019642334, "grad_norm": 0.1987521988846626, "learning_rate": 0.0004866893366977404, "loss": 3.1690592765808105, "step": 5088, "token_acc": 0.2784115580468164 }, { "epoch": 2.9832893579595425, "grad_norm": 0.25688432513692644, "learning_rate": 0.00048668153471662323, "loss": 3.1373634338378906, "step": 5089, "token_acc": 0.2845778214138074 }, { "epoch": 2.9838756962767516, "grad_norm": 0.23918403315538103, "learning_rate": 0.00048667373051220197, "loss": 3.136134147644043, "step": 5090, "token_acc": 0.28470975779715774 }, { "epoch": 2.9844620345939608, "grad_norm": 0.25951237046769476, "learning_rate": 0.00048666592408455004, "loss": 3.0959150791168213, "step": 5091, "token_acc": 0.28886679818342476 }, { "epoch": 2.98504837291117, "grad_norm": 0.28256128964825933, "learning_rate": 0.0004866581154337405, "loss": 3.1380186080932617, "step": 5092, "token_acc": 0.28295188992633113 }, { "epoch": 2.9856347112283785, "grad_norm": 0.2862454050947879, "learning_rate": 0.00048665030455984694, "loss": 3.1073827743530273, "step": 5093, "token_acc": 0.28751925291929553 }, { "epoch": 2.9862210495455876, "grad_norm": 0.32948477080210786, "learning_rate": 0.00048664249146294263, "loss": 3.1256392002105713, "step": 5094, "token_acc": 0.28464736377472566 }, { "epoch": 2.9868073878627968, "grad_norm": 0.27460470425521083, "learning_rate": 0.00048663467614310104, "loss": 3.0692873001098633, "step": 5095, "token_acc": 0.2929485682692282 }, { "epoch": 2.987393726180006, "grad_norm": 0.28533287860485534, "learning_rate": 0.00048662685860039547, "loss": 3.103707790374756, "step": 5096, "token_acc": 0.28593239680478494 }, { "epoch": 2.987980064497215, "grad_norm": 0.3126168187488595, "learning_rate": 0.00048661903883489947, "loss": 3.157106399536133, "step": 5097, "token_acc": 0.28054050151858134 }, { "epoch": 2.988566402814424, "grad_norm": 0.23471742300715495, "learning_rate": 0.00048661121684668646, "loss": 3.1111836433410645, "step": 5098, "token_acc": 0.2865949010261743 }, { "epoch": 2.989152741131633, "grad_norm": 0.29379702455819884, "learning_rate": 0.0004866033926358299, "loss": 3.1283020973205566, "step": 5099, "token_acc": 0.2854787687865065 }, { "epoch": 2.989739079448842, "grad_norm": 0.2651475341495928, "learning_rate": 0.0004865955662024033, "loss": 3.0943541526794434, "step": 5100, "token_acc": 0.2889979394277619 }, { "epoch": 2.990325417766051, "grad_norm": 0.2862928524118598, "learning_rate": 0.00048658773754648013, "loss": 3.130763530731201, "step": 5101, "token_acc": 0.2849230705104492 }, { "epoch": 2.99091175608326, "grad_norm": 0.24627434560696157, "learning_rate": 0.000486579906668134, "loss": 3.1406311988830566, "step": 5102, "token_acc": 0.2826849241225138 }, { "epoch": 2.991498094400469, "grad_norm": 0.255040317350064, "learning_rate": 0.00048657207356743844, "loss": 3.1247973442077637, "step": 5103, "token_acc": 0.2870278398414728 }, { "epoch": 2.992084432717678, "grad_norm": 0.24753943025976863, "learning_rate": 0.00048656423824446705, "loss": 3.1451849937438965, "step": 5104, "token_acc": 0.28084009792552506 }, { "epoch": 2.992670771034887, "grad_norm": 0.28821650128826953, "learning_rate": 0.0004865564006992934, "loss": 3.151597499847412, "step": 5105, "token_acc": 0.2823802758932009 }, { "epoch": 2.993257109352096, "grad_norm": 0.2614114724031269, "learning_rate": 0.0004865485609319911, "loss": 3.161609172821045, "step": 5106, "token_acc": 0.27931655436091335 }, { "epoch": 2.993843447669305, "grad_norm": 0.23257223724618717, "learning_rate": 0.0004865407189426339, "loss": 3.119357109069824, "step": 5107, "token_acc": 0.28619423598977567 }, { "epoch": 2.9944297859865143, "grad_norm": 0.2860632164956374, "learning_rate": 0.0004865328747312953, "loss": 3.130225658416748, "step": 5108, "token_acc": 0.2853936817178839 }, { "epoch": 2.9950161243037234, "grad_norm": 0.22942938460325435, "learning_rate": 0.0004865250282980491, "loss": 3.128994941711426, "step": 5109, "token_acc": 0.28547814341579253 }, { "epoch": 2.9956024626209325, "grad_norm": 0.252124695179662, "learning_rate": 0.0004865171796429689, "loss": 3.1368064880371094, "step": 5110, "token_acc": 0.28463896745324835 }, { "epoch": 2.996188800938141, "grad_norm": 0.2823795971154206, "learning_rate": 0.0004865093287661286, "loss": 3.153078079223633, "step": 5111, "token_acc": 0.280427370705167 }, { "epoch": 2.9967751392553503, "grad_norm": 0.2480082593861109, "learning_rate": 0.00048650147566760196, "loss": 3.1281285285949707, "step": 5112, "token_acc": 0.28476552122888443 }, { "epoch": 2.9973614775725594, "grad_norm": 0.3097303307069442, "learning_rate": 0.0004864936203474625, "loss": 3.1273326873779297, "step": 5113, "token_acc": 0.28605087000306967 }, { "epoch": 2.9979478158897686, "grad_norm": 0.2413854207360571, "learning_rate": 0.0004864857628057842, "loss": 3.089822769165039, "step": 5114, "token_acc": 0.2903577966901602 }, { "epoch": 2.998534154206977, "grad_norm": 0.24080071615322693, "learning_rate": 0.00048647790304264085, "loss": 3.1740894317626953, "step": 5115, "token_acc": 0.27804753793004633 }, { "epoch": 2.9991204925241863, "grad_norm": 0.30992952844119925, "learning_rate": 0.0004864700410581062, "loss": 3.0656604766845703, "step": 5116, "token_acc": 0.2934852579756528 }, { "epoch": 2.9997068308413954, "grad_norm": 0.277401516394314, "learning_rate": 0.0004864621768522542, "loss": 3.1326990127563477, "step": 5117, "token_acc": 0.28198034437297753 }, { "epoch": 3.0, "grad_norm": 0.3087410535085222, "learning_rate": 0.00048645431042515866, "loss": 3.124927520751953, "step": 5118, "token_acc": 0.2873822204014748 }, { "epoch": 3.0, "eval_loss": 3.1179864406585693, "eval_runtime": 6.6091, "eval_samples_per_second": 38.734, "eval_steps_per_second": 4.842, "eval_token_acc": 0.2856722886310509, "step": 5118 }, { "epoch": 3.000586338317209, "grad_norm": 0.2882160028749108, "learning_rate": 0.0004864464417768936, "loss": 3.117948055267334, "step": 5119, "token_acc": 0.28441056772106704 }, { "epoch": 3.0011726766344182, "grad_norm": 0.31834905557074633, "learning_rate": 0.0004864385709075327, "loss": 3.046848773956299, "step": 5120, "token_acc": 0.29483086298091427 }, { "epoch": 3.001759014951627, "grad_norm": 0.2644584368104796, "learning_rate": 0.0004864306978171501, "loss": 3.049337387084961, "step": 5121, "token_acc": 0.2947937634425712 }, { "epoch": 3.002345353268836, "grad_norm": 0.26222329268513556, "learning_rate": 0.00048642282250581966, "loss": 3.0931153297424316, "step": 5122, "token_acc": 0.28815215060453075 }, { "epoch": 3.002931691586045, "grad_norm": 0.3319241627417302, "learning_rate": 0.00048641494497361537, "loss": 3.059563398361206, "step": 5123, "token_acc": 0.2925396804762957 }, { "epoch": 3.0035180299032542, "grad_norm": 0.2714821117033217, "learning_rate": 0.0004864070652206113, "loss": 3.0870728492736816, "step": 5124, "token_acc": 0.28991021234646847 }, { "epoch": 3.0041043682204633, "grad_norm": 0.3172501950692396, "learning_rate": 0.00048639918324688136, "loss": 3.075063705444336, "step": 5125, "token_acc": 0.2919139446548876 }, { "epoch": 3.0046907065376725, "grad_norm": 0.27970886764511665, "learning_rate": 0.0004863912990524997, "loss": 3.0813117027282715, "step": 5126, "token_acc": 0.2898245537764277 }, { "epoch": 3.005277044854881, "grad_norm": 0.29214820361802823, "learning_rate": 0.0004863834126375403, "loss": 3.08788800239563, "step": 5127, "token_acc": 0.2879080821162637 }, { "epoch": 3.0058633831720902, "grad_norm": 0.31198781015029076, "learning_rate": 0.0004863755240020773, "loss": 3.0655016899108887, "step": 5128, "token_acc": 0.29140818165814025 }, { "epoch": 3.0064497214892993, "grad_norm": 0.27062728481234727, "learning_rate": 0.0004863676331461847, "loss": 3.065094232559204, "step": 5129, "token_acc": 0.2924741821462717 }, { "epoch": 3.0070360598065085, "grad_norm": 0.305095067684652, "learning_rate": 0.00048635974006993677, "loss": 3.0489299297332764, "step": 5130, "token_acc": 0.29337991908789995 }, { "epoch": 3.0076223981237176, "grad_norm": 0.23105103192595688, "learning_rate": 0.0004863518447734075, "loss": 3.062300682067871, "step": 5131, "token_acc": 0.293084426522514 }, { "epoch": 3.0082087364409262, "grad_norm": 0.29156645183418645, "learning_rate": 0.0004863439472566712, "loss": 3.086378335952759, "step": 5132, "token_acc": 0.28889833116796454 }, { "epoch": 3.0087950747581353, "grad_norm": 0.2643919033989638, "learning_rate": 0.000486336047519802, "loss": 3.059706687927246, "step": 5133, "token_acc": 0.2935018408235208 }, { "epoch": 3.0093814130753445, "grad_norm": 0.21485544492412725, "learning_rate": 0.0004863281455628741, "loss": 3.010554313659668, "step": 5134, "token_acc": 0.3008202619846144 }, { "epoch": 3.0099677513925536, "grad_norm": 0.272067651041854, "learning_rate": 0.0004863202413859617, "loss": 3.094364643096924, "step": 5135, "token_acc": 0.28657185385446843 }, { "epoch": 3.0105540897097627, "grad_norm": 0.2200032287618293, "learning_rate": 0.00048631233498913905, "loss": 3.029228925704956, "step": 5136, "token_acc": 0.29739917264702626 }, { "epoch": 3.0111404280269713, "grad_norm": 0.24699938399702395, "learning_rate": 0.0004863044263724805, "loss": 3.0791375637054443, "step": 5137, "token_acc": 0.29090153917426137 }, { "epoch": 3.0117267663441805, "grad_norm": 0.22653257312763628, "learning_rate": 0.0004862965155360603, "loss": 3.0522570610046387, "step": 5138, "token_acc": 0.29530881296809275 }, { "epoch": 3.0123131046613896, "grad_norm": 0.26947683095703245, "learning_rate": 0.00048628860247995273, "loss": 3.0387346744537354, "step": 5139, "token_acc": 0.2943674080794122 }, { "epoch": 3.0128994429785987, "grad_norm": 0.27191928943213844, "learning_rate": 0.0004862806872042321, "loss": 3.0484466552734375, "step": 5140, "token_acc": 0.29416462090514967 }, { "epoch": 3.013485781295808, "grad_norm": 0.24223586743640826, "learning_rate": 0.0004862727697089728, "loss": 3.0574183464050293, "step": 5141, "token_acc": 0.29256974052230755 }, { "epoch": 3.014072119613017, "grad_norm": 0.28356402252226, "learning_rate": 0.0004862648499942493, "loss": 3.065389633178711, "step": 5142, "token_acc": 0.29116122692033247 }, { "epoch": 3.0146584579302256, "grad_norm": 0.22051485975272037, "learning_rate": 0.00048625692806013586, "loss": 3.061725616455078, "step": 5143, "token_acc": 0.29221882005439553 }, { "epoch": 3.0152447962474347, "grad_norm": 0.2836496213321387, "learning_rate": 0.00048624900390670695, "loss": 3.062709331512451, "step": 5144, "token_acc": 0.2916811046359007 }, { "epoch": 3.015831134564644, "grad_norm": 0.2313336321943706, "learning_rate": 0.000486241077534037, "loss": 3.11256742477417, "step": 5145, "token_acc": 0.2832080326958979 }, { "epoch": 3.016417472881853, "grad_norm": 0.2623036074637004, "learning_rate": 0.00048623314894220046, "loss": 3.0813961029052734, "step": 5146, "token_acc": 0.2885755088436222 }, { "epoch": 3.017003811199062, "grad_norm": 0.2366062036052034, "learning_rate": 0.00048622521813127174, "loss": 3.0432567596435547, "step": 5147, "token_acc": 0.29490412547833694 }, { "epoch": 3.0175901495162707, "grad_norm": 0.21381505772151607, "learning_rate": 0.0004862172851013255, "loss": 3.033277750015259, "step": 5148, "token_acc": 0.297480382540461 }, { "epoch": 3.01817648783348, "grad_norm": 0.24359287788688994, "learning_rate": 0.00048620934985243617, "loss": 3.110724687576294, "step": 5149, "token_acc": 0.28736121002823195 }, { "epoch": 3.018762826150689, "grad_norm": 0.20885551861776722, "learning_rate": 0.0004862014123846783, "loss": 3.1032533645629883, "step": 5150, "token_acc": 0.2878907569642181 }, { "epoch": 3.019349164467898, "grad_norm": 0.27182600149614233, "learning_rate": 0.0004861934726981264, "loss": 3.0839195251464844, "step": 5151, "token_acc": 0.289646867736425 }, { "epoch": 3.019935502785107, "grad_norm": 0.23746196887024953, "learning_rate": 0.0004861855307928551, "loss": 3.073615550994873, "step": 5152, "token_acc": 0.2894042917843784 }, { "epoch": 3.0205218411023163, "grad_norm": 0.21783195713987594, "learning_rate": 0.00048617758666893903, "loss": 3.025961399078369, "step": 5153, "token_acc": 0.29741202210489553 }, { "epoch": 3.021108179419525, "grad_norm": 0.22052411329225424, "learning_rate": 0.0004861696403264528, "loss": 3.10245418548584, "step": 5154, "token_acc": 0.2879073228788583 }, { "epoch": 3.021694517736734, "grad_norm": 0.260452859086525, "learning_rate": 0.000486161691765471, "loss": 3.053095817565918, "step": 5155, "token_acc": 0.29357544968100985 }, { "epoch": 3.022280856053943, "grad_norm": 0.27201297577491657, "learning_rate": 0.00048615374098606837, "loss": 3.0812697410583496, "step": 5156, "token_acc": 0.29122008573306485 }, { "epoch": 3.0228671943711523, "grad_norm": 0.23566806425581, "learning_rate": 0.00048614578798831956, "loss": 3.092101573944092, "step": 5157, "token_acc": 0.2885811918039659 }, { "epoch": 3.0234535326883614, "grad_norm": 0.20298902788148207, "learning_rate": 0.0004861378327722993, "loss": 3.0941519737243652, "step": 5158, "token_acc": 0.2876100148695897 }, { "epoch": 3.02403987100557, "grad_norm": 0.226501295528511, "learning_rate": 0.0004861298753380822, "loss": 3.0533154010772705, "step": 5159, "token_acc": 0.2940406834489757 }, { "epoch": 3.024626209322779, "grad_norm": 0.2826830258379027, "learning_rate": 0.0004861219156857432, "loss": 3.0858664512634277, "step": 5160, "token_acc": 0.28827576512483305 }, { "epoch": 3.0252125476399883, "grad_norm": 0.2657235648218342, "learning_rate": 0.0004861139538153569, "loss": 3.0944931507110596, "step": 5161, "token_acc": 0.2868873820242518 }, { "epoch": 3.0257988859571974, "grad_norm": 0.21443986378105956, "learning_rate": 0.0004861059897269983, "loss": 3.0560765266418457, "step": 5162, "token_acc": 0.29272467675845526 }, { "epoch": 3.0263852242744065, "grad_norm": 0.22073308017767562, "learning_rate": 0.00048609802342074204, "loss": 3.0360772609710693, "step": 5163, "token_acc": 0.2964008951087199 }, { "epoch": 3.026971562591615, "grad_norm": 0.2530275578061761, "learning_rate": 0.00048609005489666296, "loss": 3.103590965270996, "step": 5164, "token_acc": 0.28694595907790316 }, { "epoch": 3.0275579009088243, "grad_norm": 0.22060595795542087, "learning_rate": 0.000486082084154836, "loss": 3.0465476512908936, "step": 5165, "token_acc": 0.2947382096920878 }, { "epoch": 3.0281442392260334, "grad_norm": 0.21208614628522823, "learning_rate": 0.00048607411119533595, "loss": 3.059967279434204, "step": 5166, "token_acc": 0.29185977514461897 }, { "epoch": 3.0287305775432425, "grad_norm": 0.23787190969563315, "learning_rate": 0.0004860661360182377, "loss": 3.055058479309082, "step": 5167, "token_acc": 0.29266713335666783 }, { "epoch": 3.0293169158604516, "grad_norm": 0.3774276247385556, "learning_rate": 0.00048605815862361624, "loss": 3.095343589782715, "step": 5168, "token_acc": 0.2881098570737051 }, { "epoch": 3.0299032541776607, "grad_norm": 0.5157414160119536, "learning_rate": 0.00048605017901154644, "loss": 3.095182418823242, "step": 5169, "token_acc": 0.2881034609823101 }, { "epoch": 3.0304895924948694, "grad_norm": 0.24417588156403958, "learning_rate": 0.0004860421971821034, "loss": 3.039158344268799, "step": 5170, "token_acc": 0.2954391283149259 }, { "epoch": 3.0310759308120785, "grad_norm": 0.33770151795217096, "learning_rate": 0.0004860342131353619, "loss": 3.0692014694213867, "step": 5171, "token_acc": 0.2903390493763492 }, { "epoch": 3.0316622691292876, "grad_norm": 0.21440975365850581, "learning_rate": 0.0004860262268713971, "loss": 3.048516273498535, "step": 5172, "token_acc": 0.29338846250379785 }, { "epoch": 3.0322486074464967, "grad_norm": 0.27213364220646075, "learning_rate": 0.0004860182383902838, "loss": 3.0958354473114014, "step": 5173, "token_acc": 0.28788101899572965 }, { "epoch": 3.032834945763706, "grad_norm": 0.23370546792198157, "learning_rate": 0.00048601024769209735, "loss": 3.089506149291992, "step": 5174, "token_acc": 0.28906534029537756 }, { "epoch": 3.0334212840809145, "grad_norm": 0.2523841166717806, "learning_rate": 0.0004860022547769125, "loss": 3.0423972606658936, "step": 5175, "token_acc": 0.2937526016020793 }, { "epoch": 3.0340076223981236, "grad_norm": 0.26197981283164257, "learning_rate": 0.0004859942596448046, "loss": 3.0522491931915283, "step": 5176, "token_acc": 0.2950880639775114 }, { "epoch": 3.0345939607153327, "grad_norm": 0.21591051042242448, "learning_rate": 0.00048598626229584866, "loss": 3.079040050506592, "step": 5177, "token_acc": 0.2906592160716008 }, { "epoch": 3.035180299032542, "grad_norm": 0.2861028648431269, "learning_rate": 0.0004859782627301197, "loss": 3.1199498176574707, "step": 5178, "token_acc": 0.2834390323635175 }, { "epoch": 3.035766637349751, "grad_norm": 0.22137764469169652, "learning_rate": 0.00048597026094769294, "loss": 3.053640842437744, "step": 5179, "token_acc": 0.29326542515518894 }, { "epoch": 3.03635297566696, "grad_norm": 0.29187842657416035, "learning_rate": 0.0004859622569486436, "loss": 3.0686140060424805, "step": 5180, "token_acc": 0.29211527891025324 }, { "epoch": 3.0369393139841687, "grad_norm": 0.22940034649296087, "learning_rate": 0.00048595425073304677, "loss": 3.1228342056274414, "step": 5181, "token_acc": 0.28545669508133 }, { "epoch": 3.037525652301378, "grad_norm": 0.2958295550146608, "learning_rate": 0.00048594624230097774, "loss": 3.081361770629883, "step": 5182, "token_acc": 0.28991781951610335 }, { "epoch": 3.038111990618587, "grad_norm": 0.2812588592185084, "learning_rate": 0.00048593823165251173, "loss": 3.116501808166504, "step": 5183, "token_acc": 0.2847373854452421 }, { "epoch": 3.038698328935796, "grad_norm": 0.3165643224125176, "learning_rate": 0.0004859302187877239, "loss": 3.0625205039978027, "step": 5184, "token_acc": 0.2930380914722552 }, { "epoch": 3.039284667253005, "grad_norm": 0.24699632189106552, "learning_rate": 0.0004859222037066896, "loss": 3.0394372940063477, "step": 5185, "token_acc": 0.2956459074073093 }, { "epoch": 3.039871005570214, "grad_norm": 0.21267587083791145, "learning_rate": 0.00048591418640948415, "loss": 3.062208652496338, "step": 5186, "token_acc": 0.2922067610218949 }, { "epoch": 3.040457343887423, "grad_norm": 0.23055548758078567, "learning_rate": 0.00048590616689618283, "loss": 3.111326217651367, "step": 5187, "token_acc": 0.28780197086665466 }, { "epoch": 3.041043682204632, "grad_norm": 0.23274274612381035, "learning_rate": 0.0004858981451668609, "loss": 3.1234617233276367, "step": 5188, "token_acc": 0.2853598141969318 }, { "epoch": 3.041630020521841, "grad_norm": 0.23070190152065695, "learning_rate": 0.0004858901212215938, "loss": 3.0723695755004883, "step": 5189, "token_acc": 0.2929783340957292 }, { "epoch": 3.0422163588390503, "grad_norm": 0.23384417080529668, "learning_rate": 0.0004858820950604569, "loss": 3.0617289543151855, "step": 5190, "token_acc": 0.2930191550328103 }, { "epoch": 3.042802697156259, "grad_norm": 0.23651873002484405, "learning_rate": 0.0004858740666835255, "loss": 3.078335762023926, "step": 5191, "token_acc": 0.2887696685680794 }, { "epoch": 3.043389035473468, "grad_norm": 0.27029208020735157, "learning_rate": 0.00048586603609087513, "loss": 3.0879404544830322, "step": 5192, "token_acc": 0.2889154342547749 }, { "epoch": 3.043975373790677, "grad_norm": 0.20096834971336394, "learning_rate": 0.0004858580032825812, "loss": 3.052816390991211, "step": 5193, "token_acc": 0.2940693770980977 }, { "epoch": 3.0445617121078863, "grad_norm": 0.2984639972852076, "learning_rate": 0.00048584996825871914, "loss": 3.0756049156188965, "step": 5194, "token_acc": 0.2898000604234288 }, { "epoch": 3.0451480504250954, "grad_norm": 0.21713853605936775, "learning_rate": 0.00048584193101936445, "loss": 3.0888099670410156, "step": 5195, "token_acc": 0.2885610343490418 }, { "epoch": 3.0457343887423045, "grad_norm": 0.27219213045597174, "learning_rate": 0.0004858338915645926, "loss": 3.0222482681274414, "step": 5196, "token_acc": 0.2998593935156505 }, { "epoch": 3.046320727059513, "grad_norm": 0.32521878313062497, "learning_rate": 0.00048582584989447907, "loss": 3.0886552333831787, "step": 5197, "token_acc": 0.28827402683309855 }, { "epoch": 3.0469070653767223, "grad_norm": 0.24705784865921382, "learning_rate": 0.00048581780600909957, "loss": 3.0708184242248535, "step": 5198, "token_acc": 0.2902632296518273 }, { "epoch": 3.0474934036939314, "grad_norm": 0.36636533294246215, "learning_rate": 0.0004858097599085295, "loss": 3.062175989151001, "step": 5199, "token_acc": 0.29109758104327793 }, { "epoch": 3.0480797420111405, "grad_norm": 0.2326016431056734, "learning_rate": 0.0004858017115928445, "loss": 3.0578935146331787, "step": 5200, "token_acc": 0.2926591789013685 }, { "epoch": 3.0486660803283496, "grad_norm": 0.30422837007606635, "learning_rate": 0.0004857936610621202, "loss": 3.0532772541046143, "step": 5201, "token_acc": 0.2936129039117907 }, { "epoch": 3.0492524186455583, "grad_norm": 0.24142606601121344, "learning_rate": 0.00048578560831643214, "loss": 3.0546822547912598, "step": 5202, "token_acc": 0.29267986495437937 }, { "epoch": 3.0498387569627674, "grad_norm": 0.29826665916570516, "learning_rate": 0.00048577755335585604, "loss": 3.0806407928466797, "step": 5203, "token_acc": 0.2905272463627494 }, { "epoch": 3.0504250952799765, "grad_norm": 0.26946616616855495, "learning_rate": 0.0004857694961804675, "loss": 3.0651307106018066, "step": 5204, "token_acc": 0.29360836930101253 }, { "epoch": 3.0510114335971856, "grad_norm": 0.2611873108703457, "learning_rate": 0.0004857614367903423, "loss": 3.080674171447754, "step": 5205, "token_acc": 0.2897982445542838 }, { "epoch": 3.0515977719143947, "grad_norm": 0.2839189679525068, "learning_rate": 0.0004857533751855561, "loss": 3.095982551574707, "step": 5206, "token_acc": 0.2876162698635665 }, { "epoch": 3.052184110231604, "grad_norm": 0.21530893964637418, "learning_rate": 0.0004857453113661846, "loss": 3.101893424987793, "step": 5207, "token_acc": 0.286405318897111 }, { "epoch": 3.0527704485488125, "grad_norm": 0.2689238599398576, "learning_rate": 0.00048573724533230355, "loss": 3.0780866146087646, "step": 5208, "token_acc": 0.2902573864644148 }, { "epoch": 3.0533567868660216, "grad_norm": 0.21577878617149543, "learning_rate": 0.0004857291770839887, "loss": 3.124995470046997, "step": 5209, "token_acc": 0.2857989763025148 }, { "epoch": 3.0539431251832307, "grad_norm": 0.26754186806587926, "learning_rate": 0.000485721106621316, "loss": 3.073413133621216, "step": 5210, "token_acc": 0.29073124499398634 }, { "epoch": 3.05452946350044, "grad_norm": 0.2979932904404805, "learning_rate": 0.0004857130339443611, "loss": 3.106566905975342, "step": 5211, "token_acc": 0.2863657422371625 }, { "epoch": 3.055115801817649, "grad_norm": 0.2614817130852734, "learning_rate": 0.00048570495905319975, "loss": 3.042097568511963, "step": 5212, "token_acc": 0.2936354651457069 }, { "epoch": 3.0557021401348576, "grad_norm": 0.24640113427084576, "learning_rate": 0.0004856968819479081, "loss": 3.061169385910034, "step": 5213, "token_acc": 0.29207875945859535 }, { "epoch": 3.0562884784520667, "grad_norm": 0.23367505889205795, "learning_rate": 0.0004856888026285617, "loss": 3.0874786376953125, "step": 5214, "token_acc": 0.2894607013043119 }, { "epoch": 3.056874816769276, "grad_norm": 0.2098683440937314, "learning_rate": 0.00048568072109523674, "loss": 3.053040027618408, "step": 5215, "token_acc": 0.29335893410827574 }, { "epoch": 3.057461155086485, "grad_norm": 0.23098426715655707, "learning_rate": 0.00048567263734800893, "loss": 3.0737555027008057, "step": 5216, "token_acc": 0.290288427011385 }, { "epoch": 3.058047493403694, "grad_norm": 0.21248676131412744, "learning_rate": 0.0004856645513869542, "loss": 3.0830187797546387, "step": 5217, "token_acc": 0.2882682661470595 }, { "epoch": 3.0586338317209028, "grad_norm": 0.22576576993133143, "learning_rate": 0.00048565646321214865, "loss": 3.065706491470337, "step": 5218, "token_acc": 0.29212273369767666 }, { "epoch": 3.059220170038112, "grad_norm": 0.22977289001017456, "learning_rate": 0.00048564837282366813, "loss": 3.0613574981689453, "step": 5219, "token_acc": 0.29184455539063553 }, { "epoch": 3.059806508355321, "grad_norm": 0.26160483651925204, "learning_rate": 0.00048564028022158874, "loss": 3.0220696926116943, "step": 5220, "token_acc": 0.29893590429320555 }, { "epoch": 3.06039284667253, "grad_norm": 0.2170517412378351, "learning_rate": 0.0004856321854059864, "loss": 3.104980945587158, "step": 5221, "token_acc": 0.2884311842555493 }, { "epoch": 3.060979184989739, "grad_norm": 0.24205719991771996, "learning_rate": 0.0004856240883769372, "loss": 3.063933849334717, "step": 5222, "token_acc": 0.29482963707738924 }, { "epoch": 3.0615655233069483, "grad_norm": 0.2745472350216802, "learning_rate": 0.0004856159891345172, "loss": 3.073984146118164, "step": 5223, "token_acc": 0.2903301874294556 }, { "epoch": 3.062151861624157, "grad_norm": 0.2151189529096028, "learning_rate": 0.0004856078876788025, "loss": 3.098543643951416, "step": 5224, "token_acc": 0.2876095963264318 }, { "epoch": 3.062738199941366, "grad_norm": 0.25987197538088136, "learning_rate": 0.0004855997840098692, "loss": 3.043489933013916, "step": 5225, "token_acc": 0.2942860968135432 }, { "epoch": 3.063324538258575, "grad_norm": 0.20600653983720674, "learning_rate": 0.00048559167812779335, "loss": 3.07480525970459, "step": 5226, "token_acc": 0.29156108713038187 }, { "epoch": 3.0639108765757843, "grad_norm": 0.22839056602449656, "learning_rate": 0.00048558357003265117, "loss": 3.076939582824707, "step": 5227, "token_acc": 0.29157132111326656 }, { "epoch": 3.0644972148929934, "grad_norm": 0.2736531116092107, "learning_rate": 0.00048557545972451884, "loss": 3.0803170204162598, "step": 5228, "token_acc": 0.290480162349389 }, { "epoch": 3.065083553210202, "grad_norm": 0.3629298048101826, "learning_rate": 0.0004855673472034725, "loss": 3.0543994903564453, "step": 5229, "token_acc": 0.29361668405689906 }, { "epoch": 3.065669891527411, "grad_norm": 0.31196110360187296, "learning_rate": 0.00048555923246958833, "loss": 3.1105122566223145, "step": 5230, "token_acc": 0.28595971331966946 }, { "epoch": 3.0662562298446203, "grad_norm": 0.26968010099753403, "learning_rate": 0.0004855511155229426, "loss": 3.1003952026367188, "step": 5231, "token_acc": 0.2861464330798362 }, { "epoch": 3.0668425681618294, "grad_norm": 0.31679044683858004, "learning_rate": 0.00048554299636361156, "loss": 3.0746235847473145, "step": 5232, "token_acc": 0.29170201179218114 }, { "epoch": 3.0674289064790385, "grad_norm": 0.2380455912842498, "learning_rate": 0.00048553487499167143, "loss": 3.03865385055542, "step": 5233, "token_acc": 0.29671215711911963 }, { "epoch": 3.068015244796247, "grad_norm": 0.29084205010485037, "learning_rate": 0.0004855267514071985, "loss": 3.0861971378326416, "step": 5234, "token_acc": 0.290347847097026 }, { "epoch": 3.0686015831134563, "grad_norm": 0.270147042940698, "learning_rate": 0.0004855186256102692, "loss": 3.050185441970825, "step": 5235, "token_acc": 0.29399004286081515 }, { "epoch": 3.0691879214306654, "grad_norm": 0.21989156926929357, "learning_rate": 0.00048551049760095976, "loss": 3.0831711292266846, "step": 5236, "token_acc": 0.2899712454418403 }, { "epoch": 3.0697742597478745, "grad_norm": 0.24042272942448464, "learning_rate": 0.0004855023673793466, "loss": 3.074821710586548, "step": 5237, "token_acc": 0.29094616508517573 }, { "epoch": 3.0703605980650837, "grad_norm": 0.22862377657428393, "learning_rate": 0.0004854942349455059, "loss": 3.1099209785461426, "step": 5238, "token_acc": 0.28494912587008003 }, { "epoch": 3.0709469363822928, "grad_norm": 0.23513111823054786, "learning_rate": 0.00048548610029951433, "loss": 3.0856919288635254, "step": 5239, "token_acc": 0.28967279911775895 }, { "epoch": 3.0715332746995014, "grad_norm": 0.2491982801687533, "learning_rate": 0.00048547796344144815, "loss": 3.0947065353393555, "step": 5240, "token_acc": 0.2885461567867547 }, { "epoch": 3.0721196130167105, "grad_norm": 0.2475177679273553, "learning_rate": 0.00048546982437138375, "loss": 3.092134714126587, "step": 5241, "token_acc": 0.2902554831685151 }, { "epoch": 3.0727059513339197, "grad_norm": 0.22291407647683034, "learning_rate": 0.0004854616830893977, "loss": 3.0829644203186035, "step": 5242, "token_acc": 0.28947767816949416 }, { "epoch": 3.0732922896511288, "grad_norm": 0.23249588807185387, "learning_rate": 0.00048545353959556636, "loss": 3.072159767150879, "step": 5243, "token_acc": 0.2922139708857852 }, { "epoch": 3.073878627968338, "grad_norm": 0.23048170405392537, "learning_rate": 0.0004854453938899664, "loss": 3.1043477058410645, "step": 5244, "token_acc": 0.28690024980618484 }, { "epoch": 3.0744649662855466, "grad_norm": 0.2105993301897914, "learning_rate": 0.00048543724597267416, "loss": 3.052957057952881, "step": 5245, "token_acc": 0.29426410991141005 }, { "epoch": 3.0750513046027557, "grad_norm": 0.2305878945979558, "learning_rate": 0.00048542909584376625, "loss": 3.0958142280578613, "step": 5246, "token_acc": 0.28880178318731603 }, { "epoch": 3.0756376429199648, "grad_norm": 0.2459117518914884, "learning_rate": 0.0004854209435033193, "loss": 3.0506558418273926, "step": 5247, "token_acc": 0.29318781053495746 }, { "epoch": 3.076223981237174, "grad_norm": 0.23206553952241118, "learning_rate": 0.00048541278895140974, "loss": 3.075748920440674, "step": 5248, "token_acc": 0.289481464575996 }, { "epoch": 3.076810319554383, "grad_norm": 0.19216039227917162, "learning_rate": 0.00048540463218811424, "loss": 3.029987335205078, "step": 5249, "token_acc": 0.2964475063732489 }, { "epoch": 3.077396657871592, "grad_norm": 0.22159583984247275, "learning_rate": 0.0004853964732135095, "loss": 3.1173555850982666, "step": 5250, "token_acc": 0.28369109198044434 }, { "epoch": 3.077982996188801, "grad_norm": 0.2565723282741172, "learning_rate": 0.0004853883120276721, "loss": 3.091160774230957, "step": 5251, "token_acc": 0.2897236199694454 }, { "epoch": 3.07856933450601, "grad_norm": 0.24045751366869014, "learning_rate": 0.0004853801486306786, "loss": 3.0558314323425293, "step": 5252, "token_acc": 0.2932082863536012 }, { "epoch": 3.079155672823219, "grad_norm": 0.2666021705203025, "learning_rate": 0.0004853719830226059, "loss": 3.0953288078308105, "step": 5253, "token_acc": 0.28738323697364476 }, { "epoch": 3.079742011140428, "grad_norm": 0.2685631373940192, "learning_rate": 0.00048536381520353043, "loss": 3.0666189193725586, "step": 5254, "token_acc": 0.29048242505986516 }, { "epoch": 3.0803283494576372, "grad_norm": 0.24492814729613077, "learning_rate": 0.00048535564517352927, "loss": 3.120121955871582, "step": 5255, "token_acc": 0.283545353471065 }, { "epoch": 3.080914687774846, "grad_norm": 0.23527681622175808, "learning_rate": 0.0004853474729326788, "loss": 3.072406053543091, "step": 5256, "token_acc": 0.28882249034150775 }, { "epoch": 3.081501026092055, "grad_norm": 0.2908396629971575, "learning_rate": 0.00048533929848105606, "loss": 3.065654754638672, "step": 5257, "token_acc": 0.29263950577613773 }, { "epoch": 3.082087364409264, "grad_norm": 0.3105124866618194, "learning_rate": 0.00048533112181873775, "loss": 3.0710458755493164, "step": 5258, "token_acc": 0.2911501322540553 }, { "epoch": 3.0826737027264732, "grad_norm": 0.2927566062707977, "learning_rate": 0.0004853229429458006, "loss": 3.0859200954437256, "step": 5259, "token_acc": 0.29062255632542183 }, { "epoch": 3.0832600410436823, "grad_norm": 0.26433959481425817, "learning_rate": 0.0004853147618623215, "loss": 3.082026720046997, "step": 5260, "token_acc": 0.28819525688822334 }, { "epoch": 3.0838463793608915, "grad_norm": 0.2348281596231774, "learning_rate": 0.00048530657856837736, "loss": 3.1209797859191895, "step": 5261, "token_acc": 0.28373229531429056 }, { "epoch": 3.0844327176781, "grad_norm": 0.28384059459679134, "learning_rate": 0.0004852983930640449, "loss": 3.087246894836426, "step": 5262, "token_acc": 0.28995229115789256 }, { "epoch": 3.0850190559953092, "grad_norm": 0.2880180579183701, "learning_rate": 0.00048529020534940115, "loss": 3.147364616394043, "step": 5263, "token_acc": 0.2826135982210288 }, { "epoch": 3.0856053943125183, "grad_norm": 0.2690478163397538, "learning_rate": 0.000485282015424523, "loss": 3.0888078212738037, "step": 5264, "token_acc": 0.2916706397060886 }, { "epoch": 3.0861917326297275, "grad_norm": 0.2961167020785109, "learning_rate": 0.00048527382328948735, "loss": 3.0707712173461914, "step": 5265, "token_acc": 0.2924201375217404 }, { "epoch": 3.0867780709469366, "grad_norm": 0.24505964873609412, "learning_rate": 0.00048526562894437116, "loss": 3.0708301067352295, "step": 5266, "token_acc": 0.2925084559235798 }, { "epoch": 3.0873644092641452, "grad_norm": 0.2523145534302392, "learning_rate": 0.0004852574323892514, "loss": 3.1526927947998047, "step": 5267, "token_acc": 0.28015322961489453 }, { "epoch": 3.0879507475813543, "grad_norm": 0.26446104497246503, "learning_rate": 0.00048524923362420515, "loss": 3.105459213256836, "step": 5268, "token_acc": 0.2872541031311838 }, { "epoch": 3.0885370858985635, "grad_norm": 0.2644804666396965, "learning_rate": 0.0004852410326493093, "loss": 3.165639877319336, "step": 5269, "token_acc": 0.2787009935060166 }, { "epoch": 3.0891234242157726, "grad_norm": 0.3533793292216388, "learning_rate": 0.00048523282946464084, "loss": 3.098417282104492, "step": 5270, "token_acc": 0.2868668026199999 }, { "epoch": 3.0897097625329817, "grad_norm": 0.24459757383992764, "learning_rate": 0.0004852246240702771, "loss": 3.0434343814849854, "step": 5271, "token_acc": 0.29385356383568 }, { "epoch": 3.0902961008501904, "grad_norm": 0.25331396911565857, "learning_rate": 0.0004852164164662949, "loss": 3.05297589302063, "step": 5272, "token_acc": 0.2935096913819826 }, { "epoch": 3.0908824391673995, "grad_norm": 0.22470611493413228, "learning_rate": 0.00048520820665277144, "loss": 3.0502848625183105, "step": 5273, "token_acc": 0.29399595946553037 }, { "epoch": 3.0914687774846086, "grad_norm": 0.27873143917945054, "learning_rate": 0.0004851999946297838, "loss": 3.0773239135742188, "step": 5274, "token_acc": 0.2898796087283672 }, { "epoch": 3.0920551158018177, "grad_norm": 0.2553183012027645, "learning_rate": 0.0004851917803974092, "loss": 3.0916311740875244, "step": 5275, "token_acc": 0.28800445959785836 }, { "epoch": 3.092641454119027, "grad_norm": 0.22817695916192723, "learning_rate": 0.0004851835639557247, "loss": 3.068256139755249, "step": 5276, "token_acc": 0.2929437654580307 }, { "epoch": 3.093227792436236, "grad_norm": 0.2324256666105188, "learning_rate": 0.00048517534530480755, "loss": 3.1146092414855957, "step": 5277, "token_acc": 0.28521486136267915 }, { "epoch": 3.0938141307534446, "grad_norm": 0.23712258852232615, "learning_rate": 0.0004851671244447349, "loss": 3.0510077476501465, "step": 5278, "token_acc": 0.29443173538897355 }, { "epoch": 3.0944004690706537, "grad_norm": 0.23027849843841336, "learning_rate": 0.00048515890137558406, "loss": 3.0696325302124023, "step": 5279, "token_acc": 0.2922022506453511 }, { "epoch": 3.094986807387863, "grad_norm": 0.20010596700351374, "learning_rate": 0.0004851506760974321, "loss": 3.1069397926330566, "step": 5280, "token_acc": 0.2861826599767459 }, { "epoch": 3.095573145705072, "grad_norm": 0.22656025040393174, "learning_rate": 0.00048514244861035664, "loss": 3.0675530433654785, "step": 5281, "token_acc": 0.29302827990994157 }, { "epoch": 3.096159484022281, "grad_norm": 0.2220925769437115, "learning_rate": 0.00048513421891443456, "loss": 3.091341495513916, "step": 5282, "token_acc": 0.2889386990471916 }, { "epoch": 3.0967458223394897, "grad_norm": 0.23174505951330446, "learning_rate": 0.00048512598700974335, "loss": 3.0739829540252686, "step": 5283, "token_acc": 0.290051042320096 }, { "epoch": 3.097332160656699, "grad_norm": 0.21713762533140324, "learning_rate": 0.0004851177528963604, "loss": 3.07684063911438, "step": 5284, "token_acc": 0.29102620811677543 }, { "epoch": 3.097918498973908, "grad_norm": 0.19880068309360657, "learning_rate": 0.0004851095165743629, "loss": 3.110692024230957, "step": 5285, "token_acc": 0.2865157537617961 }, { "epoch": 3.098504837291117, "grad_norm": 0.2632894283567366, "learning_rate": 0.00048510127804382835, "loss": 3.094048500061035, "step": 5286, "token_acc": 0.2889997847214477 }, { "epoch": 3.099091175608326, "grad_norm": 0.26756388082820515, "learning_rate": 0.0004850930373048341, "loss": 3.0018835067749023, "step": 5287, "token_acc": 0.3015662201778397 }, { "epoch": 3.099677513925535, "grad_norm": 0.22573785091953907, "learning_rate": 0.00048508479435745757, "loss": 3.0963966846466064, "step": 5288, "token_acc": 0.28909743977530517 }, { "epoch": 3.100263852242744, "grad_norm": 0.23687499423800748, "learning_rate": 0.00048507654920177615, "loss": 3.099174976348877, "step": 5289, "token_acc": 0.2869317106152806 }, { "epoch": 3.100850190559953, "grad_norm": 0.27328660563388435, "learning_rate": 0.0004850683018378673, "loss": 3.0650577545166016, "step": 5290, "token_acc": 0.29184046633073063 }, { "epoch": 3.101436528877162, "grad_norm": 0.26886454781954544, "learning_rate": 0.0004850600522658086, "loss": 3.075317621231079, "step": 5291, "token_acc": 0.29188111663512406 }, { "epoch": 3.1020228671943713, "grad_norm": 0.21866429340224428, "learning_rate": 0.0004850518004856773, "loss": 3.1019554138183594, "step": 5292, "token_acc": 0.2887729384789927 }, { "epoch": 3.1026092055115804, "grad_norm": 0.2502165669135519, "learning_rate": 0.0004850435464975512, "loss": 3.0623326301574707, "step": 5293, "token_acc": 0.29168286349571626 }, { "epoch": 3.103195543828789, "grad_norm": 0.23376412934713223, "learning_rate": 0.00048503529030150775, "loss": 3.111783504486084, "step": 5294, "token_acc": 0.28539289854188277 }, { "epoch": 3.103781882145998, "grad_norm": 0.21710513803628576, "learning_rate": 0.0004850270318976243, "loss": 3.0831615924835205, "step": 5295, "token_acc": 0.2898277330189297 }, { "epoch": 3.1043682204632073, "grad_norm": 0.20717101484088, "learning_rate": 0.0004850187712859787, "loss": 3.055173873901367, "step": 5296, "token_acc": 0.29347306781397464 }, { "epoch": 3.1049545587804164, "grad_norm": 0.2029586861226379, "learning_rate": 0.0004850105084666484, "loss": 3.0654706954956055, "step": 5297, "token_acc": 0.2934862383007271 }, { "epoch": 3.1055408970976255, "grad_norm": 0.2004458865589157, "learning_rate": 0.0004850022434397111, "loss": 3.065861701965332, "step": 5298, "token_acc": 0.29267319606984166 }, { "epoch": 3.106127235414834, "grad_norm": 0.22544908963570545, "learning_rate": 0.0004849939762052443, "loss": 3.1037955284118652, "step": 5299, "token_acc": 0.2848294344537394 }, { "epoch": 3.1067135737320433, "grad_norm": 0.23831678521993327, "learning_rate": 0.0004849857067633259, "loss": 3.0608458518981934, "step": 5300, "token_acc": 0.2922557701254564 }, { "epoch": 3.1072999120492524, "grad_norm": 0.23374211858360971, "learning_rate": 0.0004849774351140333, "loss": 3.141669511795044, "step": 5301, "token_acc": 0.28283381499432375 }, { "epoch": 3.1078862503664615, "grad_norm": 0.22371598794648104, "learning_rate": 0.0004849691612574444, "loss": 3.094804525375366, "step": 5302, "token_acc": 0.28672789584716457 }, { "epoch": 3.1084725886836706, "grad_norm": 0.2404846105501374, "learning_rate": 0.0004849608851936368, "loss": 3.0776517391204834, "step": 5303, "token_acc": 0.289813220974745 }, { "epoch": 3.1090589270008797, "grad_norm": 0.3060840663069173, "learning_rate": 0.00048495260692268835, "loss": 3.079890251159668, "step": 5304, "token_acc": 0.28916527712757284 }, { "epoch": 3.1096452653180884, "grad_norm": 0.2999643536338987, "learning_rate": 0.0004849443264446767, "loss": 3.0992326736450195, "step": 5305, "token_acc": 0.2880369744818466 }, { "epoch": 3.1102316036352975, "grad_norm": 0.22509195588169298, "learning_rate": 0.0004849360437596797, "loss": 3.0654430389404297, "step": 5306, "token_acc": 0.2930437166641743 }, { "epoch": 3.1108179419525066, "grad_norm": 0.3221086788026361, "learning_rate": 0.00048492775886777517, "loss": 3.084301710128784, "step": 5307, "token_acc": 0.2898172591961048 }, { "epoch": 3.1114042802697157, "grad_norm": 0.32032370696140006, "learning_rate": 0.00048491947176904093, "loss": 3.06071400642395, "step": 5308, "token_acc": 0.29273710592270147 }, { "epoch": 3.111990618586925, "grad_norm": 0.2331465000433474, "learning_rate": 0.0004849111824635548, "loss": 3.114689350128174, "step": 5309, "token_acc": 0.2861944432739522 }, { "epoch": 3.1125769569041335, "grad_norm": 0.2726687377570615, "learning_rate": 0.00048490289095139475, "loss": 3.0953264236450195, "step": 5310, "token_acc": 0.2878053843658774 }, { "epoch": 3.1131632952213426, "grad_norm": 0.21608582124177647, "learning_rate": 0.00048489459723263844, "loss": 3.0749454498291016, "step": 5311, "token_acc": 0.2920474555204092 }, { "epoch": 3.1137496335385517, "grad_norm": 0.2582227387398244, "learning_rate": 0.000484886301307364, "loss": 3.0242645740509033, "step": 5312, "token_acc": 0.29808135913068184 }, { "epoch": 3.114335971855761, "grad_norm": 0.21601353435900633, "learning_rate": 0.00048487800317564925, "loss": 3.077521800994873, "step": 5313, "token_acc": 0.28984336674776645 }, { "epoch": 3.11492231017297, "grad_norm": 0.2593558941101283, "learning_rate": 0.00048486970283757213, "loss": 3.058673620223999, "step": 5314, "token_acc": 0.29319421716656013 }, { "epoch": 3.115508648490179, "grad_norm": 0.2017696676825862, "learning_rate": 0.00048486140029321064, "loss": 3.076042890548706, "step": 5315, "token_acc": 0.2910030581995697 }, { "epoch": 3.1160949868073877, "grad_norm": 0.257433688741311, "learning_rate": 0.0004848530955426428, "loss": 3.0691733360290527, "step": 5316, "token_acc": 0.2915739652422131 }, { "epoch": 3.116681325124597, "grad_norm": 0.2546950586009729, "learning_rate": 0.0004848447885859466, "loss": 3.045759439468384, "step": 5317, "token_acc": 0.29363566341766745 }, { "epoch": 3.117267663441806, "grad_norm": 0.20880778957489565, "learning_rate": 0.0004848364794232, "loss": 3.087078809738159, "step": 5318, "token_acc": 0.2907360652553894 }, { "epoch": 3.117854001759015, "grad_norm": 0.2547893683665738, "learning_rate": 0.0004848281680544812, "loss": 3.058403491973877, "step": 5319, "token_acc": 0.29223249764450454 }, { "epoch": 3.118440340076224, "grad_norm": 0.19858354383454282, "learning_rate": 0.0004848198544798682, "loss": 3.01145339012146, "step": 5320, "token_acc": 0.3007392970830897 }, { "epoch": 3.119026678393433, "grad_norm": 0.2794088818526359, "learning_rate": 0.00048481153869943904, "loss": 3.074542999267578, "step": 5321, "token_acc": 0.2902742136228415 }, { "epoch": 3.119613016710642, "grad_norm": 0.29449820160115914, "learning_rate": 0.00048480322071327195, "loss": 3.047043800354004, "step": 5322, "token_acc": 0.2938045572600843 }, { "epoch": 3.120199355027851, "grad_norm": 0.24142235610320656, "learning_rate": 0.00048479490052144494, "loss": 3.064574718475342, "step": 5323, "token_acc": 0.29273269628861937 }, { "epoch": 3.12078569334506, "grad_norm": 0.24364992340981872, "learning_rate": 0.00048478657812403624, "loss": 3.032588005065918, "step": 5324, "token_acc": 0.29596231878783785 }, { "epoch": 3.1213720316622693, "grad_norm": 0.20400466666161168, "learning_rate": 0.000484778253521124, "loss": 3.031632900238037, "step": 5325, "token_acc": 0.2961214068206314 }, { "epoch": 3.121958369979478, "grad_norm": 0.27296219356350443, "learning_rate": 0.0004847699267127865, "loss": 3.0798611640930176, "step": 5326, "token_acc": 0.28916984671523466 }, { "epoch": 3.122544708296687, "grad_norm": 0.27509862934428064, "learning_rate": 0.0004847615976991019, "loss": 3.0601420402526855, "step": 5327, "token_acc": 0.29229890897282124 }, { "epoch": 3.123131046613896, "grad_norm": 0.2376644248015055, "learning_rate": 0.00048475326648014837, "loss": 3.0424389839172363, "step": 5328, "token_acc": 0.2949330659781925 }, { "epoch": 3.1237173849311053, "grad_norm": 0.20726554994579008, "learning_rate": 0.0004847449330560043, "loss": 3.0863256454467773, "step": 5329, "token_acc": 0.289624333362048 }, { "epoch": 3.1243037232483144, "grad_norm": 0.25046257941014555, "learning_rate": 0.0004847365974267478, "loss": 3.115145683288574, "step": 5330, "token_acc": 0.2848473445288629 }, { "epoch": 3.1248900615655235, "grad_norm": 0.24475307927658718, "learning_rate": 0.00048472825959245736, "loss": 3.0597195625305176, "step": 5331, "token_acc": 0.29335720981415064 }, { "epoch": 3.125476399882732, "grad_norm": 0.22238941553654298, "learning_rate": 0.00048471991955321124, "loss": 3.0325560569763184, "step": 5332, "token_acc": 0.296774110697316 }, { "epoch": 3.1260627381999413, "grad_norm": 0.23982642420961964, "learning_rate": 0.00048471157730908777, "loss": 3.0690784454345703, "step": 5333, "token_acc": 0.29068035526842767 }, { "epoch": 3.1266490765171504, "grad_norm": 0.2767881152244385, "learning_rate": 0.00048470323286016524, "loss": 3.070901870727539, "step": 5334, "token_acc": 0.2906237532964784 }, { "epoch": 3.1272354148343595, "grad_norm": 0.2805077892705656, "learning_rate": 0.00048469488620652215, "loss": 3.114424228668213, "step": 5335, "token_acc": 0.28730042086979757 }, { "epoch": 3.1278217531515686, "grad_norm": 0.24718539348355087, "learning_rate": 0.0004846865373482369, "loss": 3.070014715194702, "step": 5336, "token_acc": 0.2904370926773189 }, { "epoch": 3.1284080914687773, "grad_norm": 0.2365259547729129, "learning_rate": 0.0004846781862853877, "loss": 3.0651416778564453, "step": 5337, "token_acc": 0.29182760132939517 }, { "epoch": 3.1289944297859864, "grad_norm": 0.28748719063069794, "learning_rate": 0.0004846698330180533, "loss": 3.0983052253723145, "step": 5338, "token_acc": 0.288061741462763 }, { "epoch": 3.1295807681031955, "grad_norm": 0.28886662796127593, "learning_rate": 0.00048466147754631206, "loss": 3.0901296138763428, "step": 5339, "token_acc": 0.2880048198918238 }, { "epoch": 3.1301671064204046, "grad_norm": 0.2811265587975524, "learning_rate": 0.00048465311987024246, "loss": 3.0517702102661133, "step": 5340, "token_acc": 0.2936519311208782 }, { "epoch": 3.1307534447376137, "grad_norm": 0.2950524170626383, "learning_rate": 0.000484644759989923, "loss": 3.064511299133301, "step": 5341, "token_acc": 0.2917712758467698 }, { "epoch": 3.1313397830548224, "grad_norm": 0.2914655794971766, "learning_rate": 0.0004846363979054321, "loss": 3.1119766235351562, "step": 5342, "token_acc": 0.2865607367133622 }, { "epoch": 3.1319261213720315, "grad_norm": 0.2286959528273482, "learning_rate": 0.0004846280336168485, "loss": 3.049220561981201, "step": 5343, "token_acc": 0.2931493248088425 }, { "epoch": 3.1325124596892406, "grad_norm": 0.23448041818124438, "learning_rate": 0.0004846196671242507, "loss": 3.0791497230529785, "step": 5344, "token_acc": 0.2911683270798292 }, { "epoch": 3.1330987980064497, "grad_norm": 0.2542394106302144, "learning_rate": 0.00048461129842771724, "loss": 3.0905866622924805, "step": 5345, "token_acc": 0.2885547874702556 }, { "epoch": 3.133685136323659, "grad_norm": 0.2638202204267411, "learning_rate": 0.0004846029275273268, "loss": 3.102177143096924, "step": 5346, "token_acc": 0.28609541785163173 }, { "epoch": 3.134271474640868, "grad_norm": 0.3488620140066714, "learning_rate": 0.000484594554423158, "loss": 3.099637031555176, "step": 5347, "token_acc": 0.28690434627282546 }, { "epoch": 3.1348578129580766, "grad_norm": 0.2794835559970808, "learning_rate": 0.00048458617911528945, "loss": 3.1367764472961426, "step": 5348, "token_acc": 0.28238201920128075 }, { "epoch": 3.1354441512752858, "grad_norm": 0.2540916046055134, "learning_rate": 0.00048457780160379986, "loss": 3.0785274505615234, "step": 5349, "token_acc": 0.29003170879802376 }, { "epoch": 3.136030489592495, "grad_norm": 0.32584807891255, "learning_rate": 0.00048456942188876797, "loss": 3.0652501583099365, "step": 5350, "token_acc": 0.2923613947788629 }, { "epoch": 3.136616827909704, "grad_norm": 0.27031670283157927, "learning_rate": 0.00048456103997027237, "loss": 3.0286927223205566, "step": 5351, "token_acc": 0.2966758739430557 }, { "epoch": 3.137203166226913, "grad_norm": 0.3007439063100857, "learning_rate": 0.00048455265584839194, "loss": 3.056358575820923, "step": 5352, "token_acc": 0.2926556047443394 }, { "epoch": 3.1377895045441218, "grad_norm": 0.2327031701974762, "learning_rate": 0.0004845442695232053, "loss": 3.098090410232544, "step": 5353, "token_acc": 0.28827324965490675 }, { "epoch": 3.138375842861331, "grad_norm": 0.3038421216598257, "learning_rate": 0.0004845358809947914, "loss": 3.095351457595825, "step": 5354, "token_acc": 0.2870464634354313 }, { "epoch": 3.13896218117854, "grad_norm": 0.22523652142551745, "learning_rate": 0.00048452749026322884, "loss": 3.0919747352600098, "step": 5355, "token_acc": 0.2876081881151083 }, { "epoch": 3.139548519495749, "grad_norm": 0.28415555062441206, "learning_rate": 0.00048451909732859656, "loss": 3.109529972076416, "step": 5356, "token_acc": 0.28773102529960054 }, { "epoch": 3.140134857812958, "grad_norm": 0.2080174683453784, "learning_rate": 0.00048451070219097345, "loss": 3.1018543243408203, "step": 5357, "token_acc": 0.28869290294750466 }, { "epoch": 3.1407211961301673, "grad_norm": 0.29649041923947733, "learning_rate": 0.00048450230485043823, "loss": 3.0854685306549072, "step": 5358, "token_acc": 0.2884209081132205 }, { "epoch": 3.141307534447376, "grad_norm": 0.2353750921014541, "learning_rate": 0.0004844939053070699, "loss": 3.074301242828369, "step": 5359, "token_acc": 0.28948242254335116 }, { "epoch": 3.141893872764585, "grad_norm": 0.2774620941204048, "learning_rate": 0.0004844855035609472, "loss": 3.0552220344543457, "step": 5360, "token_acc": 0.2931546678465331 }, { "epoch": 3.142480211081794, "grad_norm": 0.25784343387254877, "learning_rate": 0.0004844770996121493, "loss": 3.0955235958099365, "step": 5361, "token_acc": 0.2881070251666591 }, { "epoch": 3.1430665493990033, "grad_norm": 0.26692897311862945, "learning_rate": 0.00048446869346075496, "loss": 3.0753328800201416, "step": 5362, "token_acc": 0.2915813659865615 }, { "epoch": 3.1436528877162124, "grad_norm": 0.2692112962259802, "learning_rate": 0.0004844602851068433, "loss": 3.096564531326294, "step": 5363, "token_acc": 0.29042160320370647 }, { "epoch": 3.144239226033421, "grad_norm": 0.24231124846070337, "learning_rate": 0.000484451874550493, "loss": 3.0609450340270996, "step": 5364, "token_acc": 0.2953631037473378 }, { "epoch": 3.14482556435063, "grad_norm": 0.2889304038224138, "learning_rate": 0.0004844434617917834, "loss": 3.111720085144043, "step": 5365, "token_acc": 0.2852307248760017 }, { "epoch": 3.1454119026678393, "grad_norm": 0.21652773527573413, "learning_rate": 0.00048443504683079333, "loss": 3.0766842365264893, "step": 5366, "token_acc": 0.29031821743113884 }, { "epoch": 3.1459982409850484, "grad_norm": 0.30683036986750867, "learning_rate": 0.000484426629667602, "loss": 3.088304042816162, "step": 5367, "token_acc": 0.28739718772204437 }, { "epoch": 3.1465845793022575, "grad_norm": 0.20812457454841551, "learning_rate": 0.0004844182103022883, "loss": 3.091416597366333, "step": 5368, "token_acc": 0.28777015539175593 }, { "epoch": 3.1471709176194667, "grad_norm": 0.262296668432293, "learning_rate": 0.00048440978873493136, "loss": 3.0604071617126465, "step": 5369, "token_acc": 0.2923104206756539 }, { "epoch": 3.1477572559366753, "grad_norm": 0.2379811405894767, "learning_rate": 0.0004844013649656104, "loss": 3.0911827087402344, "step": 5370, "token_acc": 0.2876997154738455 }, { "epoch": 3.1483435942538844, "grad_norm": 0.262893051684386, "learning_rate": 0.0004843929389944044, "loss": 3.022144079208374, "step": 5371, "token_acc": 0.2977118186724231 }, { "epoch": 3.1489299325710935, "grad_norm": 0.2850739504790262, "learning_rate": 0.0004843845108213927, "loss": 3.1021642684936523, "step": 5372, "token_acc": 0.2871994023836978 }, { "epoch": 3.1495162708883027, "grad_norm": 0.23454127255866442, "learning_rate": 0.0004843760804466543, "loss": 3.072443723678589, "step": 5373, "token_acc": 0.29314002260589145 }, { "epoch": 3.1501026092055118, "grad_norm": 0.2457032619019582, "learning_rate": 0.00048436764787026837, "loss": 3.06713604927063, "step": 5374, "token_acc": 0.2919216305541637 }, { "epoch": 3.1506889475227204, "grad_norm": 0.22819694606477195, "learning_rate": 0.00048435921309231426, "loss": 3.124382495880127, "step": 5375, "token_acc": 0.28441843189570304 }, { "epoch": 3.1512752858399296, "grad_norm": 0.24297134445500634, "learning_rate": 0.0004843507761128712, "loss": 3.067120313644409, "step": 5376, "token_acc": 0.290918088001724 }, { "epoch": 3.1518616241571387, "grad_norm": 0.25449735185912636, "learning_rate": 0.00048434233693201833, "loss": 3.109018087387085, "step": 5377, "token_acc": 0.2864034596766928 }, { "epoch": 3.1524479624743478, "grad_norm": 0.2550315975994766, "learning_rate": 0.000484333895549835, "loss": 3.070399761199951, "step": 5378, "token_acc": 0.29107993320000825 }, { "epoch": 3.153034300791557, "grad_norm": 0.19232181794031733, "learning_rate": 0.0004843254519664005, "loss": 3.1145553588867188, "step": 5379, "token_acc": 0.28402401190506216 }, { "epoch": 3.1536206391087656, "grad_norm": 0.24998199395252224, "learning_rate": 0.0004843170061817941, "loss": 3.1127700805664062, "step": 5380, "token_acc": 0.2851928244869932 }, { "epoch": 3.1542069774259747, "grad_norm": 0.21220249260568033, "learning_rate": 0.0004843085581960953, "loss": 3.0855536460876465, "step": 5381, "token_acc": 0.2870381086654602 }, { "epoch": 3.154793315743184, "grad_norm": 0.2156426542553022, "learning_rate": 0.0004843001080093832, "loss": 3.073103427886963, "step": 5382, "token_acc": 0.29155722806748074 }, { "epoch": 3.155379654060393, "grad_norm": 0.2025900706281843, "learning_rate": 0.0004842916556217373, "loss": 3.064711332321167, "step": 5383, "token_acc": 0.2913845153986457 }, { "epoch": 3.155965992377602, "grad_norm": 0.2762018758559658, "learning_rate": 0.0004842832010332371, "loss": 3.1205625534057617, "step": 5384, "token_acc": 0.2847077975274972 }, { "epoch": 3.1565523306948107, "grad_norm": 0.25236008576031277, "learning_rate": 0.0004842747442439619, "loss": 3.085803508758545, "step": 5385, "token_acc": 0.2875594523952352 }, { "epoch": 3.15713866901202, "grad_norm": 0.1925612316388654, "learning_rate": 0.00048426628525399107, "loss": 3.127965211868286, "step": 5386, "token_acc": 0.2835134336731781 }, { "epoch": 3.157725007329229, "grad_norm": 0.2678794724195621, "learning_rate": 0.00048425782406340425, "loss": 3.0816855430603027, "step": 5387, "token_acc": 0.28798827682406675 }, { "epoch": 3.158311345646438, "grad_norm": 0.23982066458650814, "learning_rate": 0.00048424936067228085, "loss": 3.0889694690704346, "step": 5388, "token_acc": 0.29158860619246185 }, { "epoch": 3.158897683963647, "grad_norm": 0.22162238777793375, "learning_rate": 0.00048424089508070035, "loss": 3.1221237182617188, "step": 5389, "token_acc": 0.2845974605760358 }, { "epoch": 3.1594840222808562, "grad_norm": 0.2987548212310297, "learning_rate": 0.0004842324272887423, "loss": 3.088703155517578, "step": 5390, "token_acc": 0.289970086498133 }, { "epoch": 3.160070360598065, "grad_norm": 0.231410939295816, "learning_rate": 0.00048422395729648616, "loss": 3.1149497032165527, "step": 5391, "token_acc": 0.2861084443712342 }, { "epoch": 3.160656698915274, "grad_norm": 0.26257703828829787, "learning_rate": 0.0004842154851040116, "loss": 3.1110424995422363, "step": 5392, "token_acc": 0.2877152423054293 }, { "epoch": 3.161243037232483, "grad_norm": 0.3049586605964518, "learning_rate": 0.00048420701071139825, "loss": 3.0953636169433594, "step": 5393, "token_acc": 0.287868920438385 }, { "epoch": 3.1618293755496922, "grad_norm": 0.2174744563175756, "learning_rate": 0.0004841985341187255, "loss": 3.0917856693267822, "step": 5394, "token_acc": 0.2874669114120304 }, { "epoch": 3.1624157138669013, "grad_norm": 0.23318761866910048, "learning_rate": 0.00048419005532607316, "loss": 3.0464816093444824, "step": 5395, "token_acc": 0.2949274554702223 }, { "epoch": 3.16300205218411, "grad_norm": 0.24117122129786508, "learning_rate": 0.0004841815743335208, "loss": 3.084419012069702, "step": 5396, "token_acc": 0.28896534593289047 }, { "epoch": 3.163588390501319, "grad_norm": 0.21914907510306686, "learning_rate": 0.00048417309114114814, "loss": 3.102555751800537, "step": 5397, "token_acc": 0.28638021296770955 }, { "epoch": 3.1641747288185282, "grad_norm": 0.2352310785952553, "learning_rate": 0.00048416460574903484, "loss": 3.090198278427124, "step": 5398, "token_acc": 0.2871389858334842 }, { "epoch": 3.1647610671357373, "grad_norm": 0.20086174451892444, "learning_rate": 0.0004841561181572607, "loss": 3.0813889503479004, "step": 5399, "token_acc": 0.290448620900146 }, { "epoch": 3.1653474054529465, "grad_norm": 0.19788378806047002, "learning_rate": 0.00048414762836590525, "loss": 3.088487386703491, "step": 5400, "token_acc": 0.2894971250573388 }, { "epoch": 3.1659337437701556, "grad_norm": 0.21623090661415612, "learning_rate": 0.0004841391363750484, "loss": 3.1092023849487305, "step": 5401, "token_acc": 0.2847902758374134 }, { "epoch": 3.1665200820873642, "grad_norm": 0.23181314100769357, "learning_rate": 0.0004841306421847698, "loss": 3.1072206497192383, "step": 5402, "token_acc": 0.28503549829082303 }, { "epoch": 3.1671064204045734, "grad_norm": 0.2574239391829674, "learning_rate": 0.00048412214579514936, "loss": 3.077296257019043, "step": 5403, "token_acc": 0.29125323378196943 }, { "epoch": 3.1676927587217825, "grad_norm": 0.22701215493923937, "learning_rate": 0.0004841136472062668, "loss": 3.088351249694824, "step": 5404, "token_acc": 0.28836280811757653 }, { "epoch": 3.1682790970389916, "grad_norm": 0.20295419192559722, "learning_rate": 0.0004841051464182021, "loss": 3.0589370727539062, "step": 5405, "token_acc": 0.29253242170960814 }, { "epoch": 3.1688654353562007, "grad_norm": 0.2607549727110417, "learning_rate": 0.00048409664343103496, "loss": 3.079810619354248, "step": 5406, "token_acc": 0.2921655108914506 }, { "epoch": 3.1694517736734094, "grad_norm": 0.2556801178922923, "learning_rate": 0.0004840881382448453, "loss": 3.0851964950561523, "step": 5407, "token_acc": 0.28816055732383383 }, { "epoch": 3.1700381119906185, "grad_norm": 0.23139362571177743, "learning_rate": 0.00048407963085971294, "loss": 3.097228527069092, "step": 5408, "token_acc": 0.28813098979552243 }, { "epoch": 3.1706244503078276, "grad_norm": 0.27629954609462326, "learning_rate": 0.00048407112127571796, "loss": 3.0813326835632324, "step": 5409, "token_acc": 0.28972388242999836 }, { "epoch": 3.1712107886250367, "grad_norm": 0.3506953779670314, "learning_rate": 0.0004840626094929402, "loss": 3.105868339538574, "step": 5410, "token_acc": 0.28555266917454264 }, { "epoch": 3.171797126942246, "grad_norm": 0.25849498782941216, "learning_rate": 0.0004840540955114596, "loss": 3.1221442222595215, "step": 5411, "token_acc": 0.28317703586662857 }, { "epoch": 3.172383465259455, "grad_norm": 0.23552736836129423, "learning_rate": 0.00048404557933135617, "loss": 3.0864925384521484, "step": 5412, "token_acc": 0.28969401578891896 }, { "epoch": 3.1729698035766636, "grad_norm": 0.286349093098653, "learning_rate": 0.00048403706095270993, "loss": 3.0716917514801025, "step": 5413, "token_acc": 0.29013902048638435 }, { "epoch": 3.1735561418938727, "grad_norm": 0.2913096333447423, "learning_rate": 0.00048402854037560083, "loss": 3.0626206398010254, "step": 5414, "token_acc": 0.2925302135531545 }, { "epoch": 3.174142480211082, "grad_norm": 0.2868411487503322, "learning_rate": 0.0004840200176001091, "loss": 3.104795455932617, "step": 5415, "token_acc": 0.28755177509130025 }, { "epoch": 3.174728818528291, "grad_norm": 0.22780602563737737, "learning_rate": 0.00048401149262631443, "loss": 3.041975975036621, "step": 5416, "token_acc": 0.2953545206954757 }, { "epoch": 3.1753151568455, "grad_norm": 0.30273646416810757, "learning_rate": 0.0004840029654542972, "loss": 3.057687997817993, "step": 5417, "token_acc": 0.293371367332787 }, { "epoch": 3.1759014951627087, "grad_norm": 0.2503287543371628, "learning_rate": 0.0004839944360841375, "loss": 3.1214709281921387, "step": 5418, "token_acc": 0.28591176161997695 }, { "epoch": 3.176487833479918, "grad_norm": 0.2723316840276955, "learning_rate": 0.0004839859045159153, "loss": 3.032430648803711, "step": 5419, "token_acc": 0.2966773144182298 }, { "epoch": 3.177074171797127, "grad_norm": 0.2591239684175086, "learning_rate": 0.0004839773707497109, "loss": 3.0948195457458496, "step": 5420, "token_acc": 0.2878129631371433 }, { "epoch": 3.177660510114336, "grad_norm": 0.242404775241652, "learning_rate": 0.0004839688347856044, "loss": 3.1006669998168945, "step": 5421, "token_acc": 0.2850978411489698 }, { "epoch": 3.178246848431545, "grad_norm": 0.28534832713108255, "learning_rate": 0.0004839602966236759, "loss": 3.067765951156616, "step": 5422, "token_acc": 0.2915206294920092 }, { "epoch": 3.1788331867487543, "grad_norm": 0.2461833356481782, "learning_rate": 0.00048395175626400567, "loss": 3.0894522666931152, "step": 5423, "token_acc": 0.28878316414429217 }, { "epoch": 3.179419525065963, "grad_norm": 0.22623942131667513, "learning_rate": 0.00048394321370667396, "loss": 3.080869674682617, "step": 5424, "token_acc": 0.28945344188012423 }, { "epoch": 3.180005863383172, "grad_norm": 0.20643540575504152, "learning_rate": 0.00048393466895176106, "loss": 3.1093862056732178, "step": 5425, "token_acc": 0.2849288688954065 }, { "epoch": 3.180592201700381, "grad_norm": 0.25200526921651256, "learning_rate": 0.0004839261219993472, "loss": 3.062257766723633, "step": 5426, "token_acc": 0.2926565931606196 }, { "epoch": 3.1811785400175903, "grad_norm": 0.22787933108896155, "learning_rate": 0.00048391757284951256, "loss": 3.0733144283294678, "step": 5427, "token_acc": 0.2902532743874726 }, { "epoch": 3.1817648783347994, "grad_norm": 0.20010755065674193, "learning_rate": 0.0004839090215023375, "loss": 3.106715202331543, "step": 5428, "token_acc": 0.2864649795318354 }, { "epoch": 3.182351216652008, "grad_norm": 0.2665846649274655, "learning_rate": 0.00048390046795790246, "loss": 3.067208766937256, "step": 5429, "token_acc": 0.2916715598876872 }, { "epoch": 3.182937554969217, "grad_norm": 0.22573517009977176, "learning_rate": 0.00048389191221628766, "loss": 3.110297203063965, "step": 5430, "token_acc": 0.28531988496218286 }, { "epoch": 3.1835238932864263, "grad_norm": 0.23795589348656535, "learning_rate": 0.00048388335427757353, "loss": 3.0575461387634277, "step": 5431, "token_acc": 0.29496478422333205 }, { "epoch": 3.1841102316036354, "grad_norm": 0.23059708429853826, "learning_rate": 0.0004838747941418404, "loss": 3.097414970397949, "step": 5432, "token_acc": 0.2870912766241318 }, { "epoch": 3.1846965699208445, "grad_norm": 0.25725779749496724, "learning_rate": 0.0004838662318091688, "loss": 3.0693695545196533, "step": 5433, "token_acc": 0.2908157320303579 }, { "epoch": 3.185282908238053, "grad_norm": 0.2273306144777121, "learning_rate": 0.00048385766727963907, "loss": 3.095987558364868, "step": 5434, "token_acc": 0.2883468723678248 }, { "epoch": 3.1858692465552623, "grad_norm": 0.21600413921559986, "learning_rate": 0.00048384910055333173, "loss": 3.0629045963287354, "step": 5435, "token_acc": 0.2915187156387894 }, { "epoch": 3.1864555848724714, "grad_norm": 0.23535460203106645, "learning_rate": 0.00048384053163032714, "loss": 3.0876994132995605, "step": 5436, "token_acc": 0.28865827709559916 }, { "epoch": 3.1870419231896805, "grad_norm": 0.20340423157478893, "learning_rate": 0.0004838319605107059, "loss": 3.0654821395874023, "step": 5437, "token_acc": 0.2919456945371547 }, { "epoch": 3.1876282615068896, "grad_norm": 0.26068732820882895, "learning_rate": 0.0004838233871945485, "loss": 3.0552265644073486, "step": 5438, "token_acc": 0.2934917603348049 }, { "epoch": 3.1882145998240983, "grad_norm": 0.24787226519879124, "learning_rate": 0.0004838148116819354, "loss": 3.0732932090759277, "step": 5439, "token_acc": 0.29001191844379176 }, { "epoch": 3.1888009381413074, "grad_norm": 0.21675574358974603, "learning_rate": 0.00048380623397294723, "loss": 3.1133170127868652, "step": 5440, "token_acc": 0.2867183601914908 }, { "epoch": 3.1893872764585165, "grad_norm": 0.1986820327999222, "learning_rate": 0.00048379765406766456, "loss": 3.098450183868408, "step": 5441, "token_acc": 0.2868654858549923 }, { "epoch": 3.1899736147757256, "grad_norm": 0.26916121527649106, "learning_rate": 0.00048378907196616793, "loss": 3.086653470993042, "step": 5442, "token_acc": 0.28983141557398984 }, { "epoch": 3.1905599530929347, "grad_norm": 0.3488644789597987, "learning_rate": 0.0004837804876685381, "loss": 3.110848903656006, "step": 5443, "token_acc": 0.28541291643670985 }, { "epoch": 3.191146291410144, "grad_norm": 0.25415888971842154, "learning_rate": 0.0004837719011748556, "loss": 3.1153311729431152, "step": 5444, "token_acc": 0.2872074209616215 }, { "epoch": 3.1917326297273525, "grad_norm": 0.24029906246948998, "learning_rate": 0.00048376331248520103, "loss": 3.07814884185791, "step": 5445, "token_acc": 0.28974220556869845 }, { "epoch": 3.1923189680445616, "grad_norm": 0.3444781676707779, "learning_rate": 0.00048375472159965517, "loss": 3.107276439666748, "step": 5446, "token_acc": 0.2859525055364401 }, { "epoch": 3.1929053063617707, "grad_norm": 0.2574197223305299, "learning_rate": 0.00048374612851829866, "loss": 3.1433699131011963, "step": 5447, "token_acc": 0.2814090757087578 }, { "epoch": 3.19349164467898, "grad_norm": 0.2349115132285172, "learning_rate": 0.0004837375332412123, "loss": 3.1310391426086426, "step": 5448, "token_acc": 0.2841380946731522 }, { "epoch": 3.194077982996189, "grad_norm": 0.22250479862476977, "learning_rate": 0.00048372893576847676, "loss": 3.065086841583252, "step": 5449, "token_acc": 0.29228825241231426 }, { "epoch": 3.1946643213133976, "grad_norm": 0.21986526737823042, "learning_rate": 0.00048372033610017285, "loss": 3.068359851837158, "step": 5450, "token_acc": 0.29152397150457776 }, { "epoch": 3.1952506596306067, "grad_norm": 0.2010440904458156, "learning_rate": 0.0004837117342363813, "loss": 3.138734817504883, "step": 5451, "token_acc": 0.28323608799374983 }, { "epoch": 3.195836997947816, "grad_norm": 0.1967668288092555, "learning_rate": 0.00048370313017718293, "loss": 3.0715856552124023, "step": 5452, "token_acc": 0.2914232569706924 }, { "epoch": 3.196423336265025, "grad_norm": 0.2220683877189509, "learning_rate": 0.0004836945239226586, "loss": 3.029512405395508, "step": 5453, "token_acc": 0.2957632385515545 }, { "epoch": 3.197009674582234, "grad_norm": 0.22230900602186066, "learning_rate": 0.00048368591547288904, "loss": 3.04811692237854, "step": 5454, "token_acc": 0.2942054333819323 }, { "epoch": 3.197596012899443, "grad_norm": 0.23886735602667203, "learning_rate": 0.00048367730482795526, "loss": 3.0767033100128174, "step": 5455, "token_acc": 0.2910709311117891 }, { "epoch": 3.198182351216652, "grad_norm": 0.22109707624850036, "learning_rate": 0.00048366869198793807, "loss": 3.068223476409912, "step": 5456, "token_acc": 0.2934374149517449 }, { "epoch": 3.198768689533861, "grad_norm": 0.2263894580239652, "learning_rate": 0.0004836600769529184, "loss": 3.06014347076416, "step": 5457, "token_acc": 0.29183797192226335 }, { "epoch": 3.19935502785107, "grad_norm": 0.2243096443099745, "learning_rate": 0.00048365145972297717, "loss": 3.0624053478240967, "step": 5458, "token_acc": 0.2908863618682784 }, { "epoch": 3.199941366168279, "grad_norm": 0.18750881468522201, "learning_rate": 0.0004836428402981954, "loss": 3.012996196746826, "step": 5459, "token_acc": 0.3013225334659937 }, { "epoch": 3.2005277044854883, "grad_norm": 0.273552323644599, "learning_rate": 0.0004836342186786539, "loss": 3.115236759185791, "step": 5460, "token_acc": 0.2854183246141603 }, { "epoch": 3.201114042802697, "grad_norm": 0.38368304604424686, "learning_rate": 0.0004836255948644337, "loss": 3.0715832710266113, "step": 5461, "token_acc": 0.29018726202388534 }, { "epoch": 3.201700381119906, "grad_norm": 0.3036228903578986, "learning_rate": 0.0004836169688556159, "loss": 3.0731489658355713, "step": 5462, "token_acc": 0.2897559117801857 }, { "epoch": 3.202286719437115, "grad_norm": 0.23620765641881641, "learning_rate": 0.0004836083406522815, "loss": 3.1141715049743652, "step": 5463, "token_acc": 0.28582330207025575 }, { "epoch": 3.2028730577543243, "grad_norm": 0.3048368845601399, "learning_rate": 0.0004835997102545115, "loss": 3.043590545654297, "step": 5464, "token_acc": 0.29727681965559855 }, { "epoch": 3.2034593960715334, "grad_norm": 0.22769527062763317, "learning_rate": 0.000483591077662387, "loss": 3.0908002853393555, "step": 5465, "token_acc": 0.2890580712300256 }, { "epoch": 3.2040457343887425, "grad_norm": 0.2632695950206581, "learning_rate": 0.0004835824428759891, "loss": 3.1086792945861816, "step": 5466, "token_acc": 0.2859434135539465 }, { "epoch": 3.204632072705951, "grad_norm": 0.2310896011015095, "learning_rate": 0.00048357380589539897, "loss": 3.1089518070220947, "step": 5467, "token_acc": 0.2862588332387683 }, { "epoch": 3.2052184110231603, "grad_norm": 0.2630610456656749, "learning_rate": 0.0004835651667206976, "loss": 3.103029251098633, "step": 5468, "token_acc": 0.2864836428323499 }, { "epoch": 3.2058047493403694, "grad_norm": 0.23435979078593147, "learning_rate": 0.00048355652535196625, "loss": 3.0748791694641113, "step": 5469, "token_acc": 0.28868769591663035 }, { "epoch": 3.2063910876575785, "grad_norm": 0.3784516082466502, "learning_rate": 0.00048354788178928604, "loss": 3.1102170944213867, "step": 5470, "token_acc": 0.2860081144290229 }, { "epoch": 3.2069774259747876, "grad_norm": 0.22306993861077215, "learning_rate": 0.0004835392360327382, "loss": 3.1052050590515137, "step": 5471, "token_acc": 0.2875476759072006 }, { "epoch": 3.2075637642919963, "grad_norm": 0.2693641644113196, "learning_rate": 0.000483530588082404, "loss": 3.078205108642578, "step": 5472, "token_acc": 0.2908847396617234 }, { "epoch": 3.2081501026092054, "grad_norm": 0.21399341728918342, "learning_rate": 0.0004835219379383645, "loss": 3.083387851715088, "step": 5473, "token_acc": 0.2884688494367745 }, { "epoch": 3.2087364409264145, "grad_norm": 0.2729535591672113, "learning_rate": 0.00048351328560070116, "loss": 3.073638439178467, "step": 5474, "token_acc": 0.28968790797737787 }, { "epoch": 3.2093227792436236, "grad_norm": 0.22863452677591722, "learning_rate": 0.00048350463106949516, "loss": 3.1276912689208984, "step": 5475, "token_acc": 0.28609252756045517 }, { "epoch": 3.2099091175608327, "grad_norm": 0.22223867307764247, "learning_rate": 0.0004834959743448277, "loss": 3.113156318664551, "step": 5476, "token_acc": 0.2850723363998699 }, { "epoch": 3.210495455878042, "grad_norm": 0.2583761047262608, "learning_rate": 0.0004834873154267803, "loss": 3.0841078758239746, "step": 5477, "token_acc": 0.28956811228604595 }, { "epoch": 3.2110817941952505, "grad_norm": 0.2312522671351118, "learning_rate": 0.00048347865431543416, "loss": 3.1102442741394043, "step": 5478, "token_acc": 0.2879688941473254 }, { "epoch": 3.2116681325124596, "grad_norm": 0.24471345013846232, "learning_rate": 0.00048346999101087074, "loss": 3.107431411743164, "step": 5479, "token_acc": 0.2860838447646474 }, { "epoch": 3.2122544708296688, "grad_norm": 0.22558153654015145, "learning_rate": 0.0004834613255131713, "loss": 3.136211633682251, "step": 5480, "token_acc": 0.2802895361141678 }, { "epoch": 3.212840809146878, "grad_norm": 0.23939409213433005, "learning_rate": 0.0004834526578224173, "loss": 3.027445077896118, "step": 5481, "token_acc": 0.2982794953330769 }, { "epoch": 3.213427147464087, "grad_norm": 0.21857384711031744, "learning_rate": 0.0004834439879386902, "loss": 3.0736289024353027, "step": 5482, "token_acc": 0.29021089077746304 }, { "epoch": 3.2140134857812956, "grad_norm": 0.25467179521175926, "learning_rate": 0.00048343531586207136, "loss": 3.046515941619873, "step": 5483, "token_acc": 0.29532265211657865 }, { "epoch": 3.2145998240985048, "grad_norm": 0.26024118648733696, "learning_rate": 0.00048342664159264226, "loss": 3.06687068939209, "step": 5484, "token_acc": 0.2907771385959305 }, { "epoch": 3.215186162415714, "grad_norm": 0.20227729939136405, "learning_rate": 0.00048341796513048447, "loss": 3.0750865936279297, "step": 5485, "token_acc": 0.29024268010778786 }, { "epoch": 3.215772500732923, "grad_norm": 0.2412945672599017, "learning_rate": 0.0004834092864756794, "loss": 3.087524890899658, "step": 5486, "token_acc": 0.2875397199306342 }, { "epoch": 3.216358839050132, "grad_norm": 0.19130565217316675, "learning_rate": 0.00048340060562830867, "loss": 3.06927752494812, "step": 5487, "token_acc": 0.2926132787916929 }, { "epoch": 3.2169451773673408, "grad_norm": 0.2064681364454697, "learning_rate": 0.00048339192258845367, "loss": 3.065518856048584, "step": 5488, "token_acc": 0.2935095434347155 }, { "epoch": 3.21753151568455, "grad_norm": 0.1883689287245335, "learning_rate": 0.0004833832373561961, "loss": 3.098879098892212, "step": 5489, "token_acc": 0.2875645031204241 }, { "epoch": 3.218117854001759, "grad_norm": 0.23256275387425984, "learning_rate": 0.0004833745499316175, "loss": 3.0732219219207764, "step": 5490, "token_acc": 0.2896942603975413 }, { "epoch": 3.218704192318968, "grad_norm": 0.21086061395240516, "learning_rate": 0.00048336586031479947, "loss": 3.0566999912261963, "step": 5491, "token_acc": 0.29370183221863316 }, { "epoch": 3.219290530636177, "grad_norm": 0.21528969885137095, "learning_rate": 0.0004833571685058237, "loss": 3.073592185974121, "step": 5492, "token_acc": 0.2895099269299108 }, { "epoch": 3.219876868953386, "grad_norm": 0.27959681847456236, "learning_rate": 0.0004833484745047717, "loss": 3.0748610496520996, "step": 5493, "token_acc": 0.2908596434459699 }, { "epoch": 3.220463207270595, "grad_norm": 0.2115260374862627, "learning_rate": 0.00048333977831172524, "loss": 3.111239433288574, "step": 5494, "token_acc": 0.2857252376571604 }, { "epoch": 3.221049545587804, "grad_norm": 0.27346730532481467, "learning_rate": 0.00048333107992676604, "loss": 3.136847972869873, "step": 5495, "token_acc": 0.2816148693939693 }, { "epoch": 3.221635883905013, "grad_norm": 0.26843294646153165, "learning_rate": 0.00048332237934997575, "loss": 3.0761208534240723, "step": 5496, "token_acc": 0.29268709298601475 }, { "epoch": 3.2222222222222223, "grad_norm": 0.21955877344501762, "learning_rate": 0.0004833136765814361, "loss": 3.082918643951416, "step": 5497, "token_acc": 0.28897021439134485 }, { "epoch": 3.2228085605394314, "grad_norm": 0.24079336407276455, "learning_rate": 0.0004833049716212289, "loss": 3.0783653259277344, "step": 5498, "token_acc": 0.2902049102328237 }, { "epoch": 3.22339489885664, "grad_norm": 0.24800655161202645, "learning_rate": 0.00048329626446943575, "loss": 3.0728607177734375, "step": 5499, "token_acc": 0.2918452761782896 }, { "epoch": 3.223981237173849, "grad_norm": 0.2836032110449169, "learning_rate": 0.00048328755512613863, "loss": 3.0943117141723633, "step": 5500, "token_acc": 0.28712881515265776 }, { "epoch": 3.2245675754910583, "grad_norm": 0.24180727749636519, "learning_rate": 0.00048327884359141934, "loss": 3.086979866027832, "step": 5501, "token_acc": 0.29173869946044056 }, { "epoch": 3.2251539138082674, "grad_norm": 0.24400606363084748, "learning_rate": 0.0004832701298653596, "loss": 3.037991523742676, "step": 5502, "token_acc": 0.29601076383049135 }, { "epoch": 3.2257402521254765, "grad_norm": 0.280852611947615, "learning_rate": 0.00048326141394804134, "loss": 3.0824034214019775, "step": 5503, "token_acc": 0.28942529377002113 }, { "epoch": 3.226326590442685, "grad_norm": 0.23238860234146932, "learning_rate": 0.00048325269583954645, "loss": 3.0493392944335938, "step": 5504, "token_acc": 0.294387283037776 }, { "epoch": 3.2269129287598943, "grad_norm": 0.29869128465079686, "learning_rate": 0.0004832439755399568, "loss": 3.10185170173645, "step": 5505, "token_acc": 0.28625831479744046 }, { "epoch": 3.2274992670771034, "grad_norm": 0.24330989069721473, "learning_rate": 0.00048323525304935425, "loss": 3.099276304244995, "step": 5506, "token_acc": 0.28714688690197354 }, { "epoch": 3.2280856053943126, "grad_norm": 0.23793232832588967, "learning_rate": 0.00048322652836782075, "loss": 3.041694402694702, "step": 5507, "token_acc": 0.29627967119710064 }, { "epoch": 3.2286719437115217, "grad_norm": 0.22253746961225015, "learning_rate": 0.00048321780149543836, "loss": 3.112489700317383, "step": 5508, "token_acc": 0.28457195154109854 }, { "epoch": 3.2292582820287308, "grad_norm": 0.21079173998597106, "learning_rate": 0.000483209072432289, "loss": 3.066570520401001, "step": 5509, "token_acc": 0.2917556487120673 }, { "epoch": 3.2298446203459394, "grad_norm": 0.20509739825352644, "learning_rate": 0.00048320034117845466, "loss": 3.0440754890441895, "step": 5510, "token_acc": 0.2956563280713616 }, { "epoch": 3.2304309586631486, "grad_norm": 0.2397538912892448, "learning_rate": 0.0004831916077340173, "loss": 3.081899642944336, "step": 5511, "token_acc": 0.2880393230500597 }, { "epoch": 3.2310172969803577, "grad_norm": 0.2847713314617964, "learning_rate": 0.0004831828720990591, "loss": 3.136575222015381, "step": 5512, "token_acc": 0.28119859587240753 }, { "epoch": 3.231603635297567, "grad_norm": 0.20153762497357788, "learning_rate": 0.00048317413427366196, "loss": 3.121065616607666, "step": 5513, "token_acc": 0.2853670651179091 }, { "epoch": 3.232189973614776, "grad_norm": 0.26840765200651917, "learning_rate": 0.0004831653942579081, "loss": 3.084972381591797, "step": 5514, "token_acc": 0.2891232847414038 }, { "epoch": 3.2327763119319846, "grad_norm": 0.2897725008131054, "learning_rate": 0.0004831566520518795, "loss": 3.0773301124572754, "step": 5515, "token_acc": 0.28984804613526116 }, { "epoch": 3.2333626502491937, "grad_norm": 0.2360949334406414, "learning_rate": 0.00048314790765565833, "loss": 3.0869359970092773, "step": 5516, "token_acc": 0.28876775362988016 }, { "epoch": 3.233948988566403, "grad_norm": 0.2618863808779009, "learning_rate": 0.00048313916106932676, "loss": 3.0679574012756348, "step": 5517, "token_acc": 0.29228825540175557 }, { "epoch": 3.234535326883612, "grad_norm": 0.26822296565669146, "learning_rate": 0.00048313041229296693, "loss": 3.0789883136749268, "step": 5518, "token_acc": 0.2907333108254883 }, { "epoch": 3.235121665200821, "grad_norm": 0.22257082193300218, "learning_rate": 0.000483121661326661, "loss": 3.1184747219085693, "step": 5519, "token_acc": 0.2865320785661291 }, { "epoch": 3.23570800351803, "grad_norm": 0.22930275527166308, "learning_rate": 0.00048311290817049123, "loss": 3.0690481662750244, "step": 5520, "token_acc": 0.290849865082546 }, { "epoch": 3.236294341835239, "grad_norm": 0.26003098408709757, "learning_rate": 0.0004831041528245398, "loss": 3.083949565887451, "step": 5521, "token_acc": 0.28990047176010236 }, { "epoch": 3.236880680152448, "grad_norm": 0.24657393657409338, "learning_rate": 0.000483095395288889, "loss": 3.0486793518066406, "step": 5522, "token_acc": 0.29499796207310797 }, { "epoch": 3.237467018469657, "grad_norm": 0.2117358387276992, "learning_rate": 0.00048308663556362097, "loss": 3.094142198562622, "step": 5523, "token_acc": 0.2884252975053044 }, { "epoch": 3.238053356786866, "grad_norm": 0.2231204335676452, "learning_rate": 0.00048307787364881816, "loss": 3.1309266090393066, "step": 5524, "token_acc": 0.2814122065528463 }, { "epoch": 3.2386396951040752, "grad_norm": 0.2530448252927042, "learning_rate": 0.0004830691095445628, "loss": 3.1188931465148926, "step": 5525, "token_acc": 0.2842186854337186 }, { "epoch": 3.239226033421284, "grad_norm": 0.2305938803963083, "learning_rate": 0.00048306034325093717, "loss": 3.074246406555176, "step": 5526, "token_acc": 0.2922767810114516 }, { "epoch": 3.239812371738493, "grad_norm": 0.2281008706649152, "learning_rate": 0.0004830515747680237, "loss": 3.1134886741638184, "step": 5527, "token_acc": 0.2856305657497532 }, { "epoch": 3.240398710055702, "grad_norm": 0.2092998691822021, "learning_rate": 0.0004830428040959048, "loss": 3.090656042098999, "step": 5528, "token_acc": 0.28787272468789327 }, { "epoch": 3.2409850483729112, "grad_norm": 0.2027139671287903, "learning_rate": 0.0004830340312346627, "loss": 3.074843168258667, "step": 5529, "token_acc": 0.2911344813426269 }, { "epoch": 3.2415713866901203, "grad_norm": 0.21946936744481052, "learning_rate": 0.00048302525618437985, "loss": 3.0870721340179443, "step": 5530, "token_acc": 0.2876531907033521 }, { "epoch": 3.2421577250073295, "grad_norm": 0.20031433622959838, "learning_rate": 0.0004830164789451388, "loss": 3.02978515625, "step": 5531, "token_acc": 0.29657000954281254 }, { "epoch": 3.242744063324538, "grad_norm": 0.19882359922338685, "learning_rate": 0.0004830076995170219, "loss": 3.0596399307250977, "step": 5532, "token_acc": 0.29195369774919616 }, { "epoch": 3.2433304016417472, "grad_norm": 0.18831758626723094, "learning_rate": 0.00048299891790011177, "loss": 3.0716328620910645, "step": 5533, "token_acc": 0.29120888718100996 }, { "epoch": 3.2439167399589564, "grad_norm": 0.21150153206431097, "learning_rate": 0.0004829901340944906, "loss": 3.065882682800293, "step": 5534, "token_acc": 0.2932831929889967 }, { "epoch": 3.2445030782761655, "grad_norm": 0.23250115931994414, "learning_rate": 0.0004829813481002411, "loss": 3.0413084030151367, "step": 5535, "token_acc": 0.2960883786488202 }, { "epoch": 3.2450894165933746, "grad_norm": 0.2833858117252575, "learning_rate": 0.0004829725599174458, "loss": 3.1076743602752686, "step": 5536, "token_acc": 0.28634254610246823 }, { "epoch": 3.2456757549105832, "grad_norm": 0.310441014199654, "learning_rate": 0.0004829637695461873, "loss": 3.0486221313476562, "step": 5537, "token_acc": 0.2931980620106007 }, { "epoch": 3.2462620932277924, "grad_norm": 0.2765746236856749, "learning_rate": 0.00048295497698654804, "loss": 3.062201976776123, "step": 5538, "token_acc": 0.2915463123435394 }, { "epoch": 3.2468484315450015, "grad_norm": 0.19542594289498158, "learning_rate": 0.00048294618223861075, "loss": 3.082529067993164, "step": 5539, "token_acc": 0.2894271930420602 }, { "epoch": 3.2474347698622106, "grad_norm": 0.27849102169961654, "learning_rate": 0.0004829373853024579, "loss": 3.0803189277648926, "step": 5540, "token_acc": 0.29021554546331974 }, { "epoch": 3.2480211081794197, "grad_norm": 0.2227586326521921, "learning_rate": 0.0004829285861781723, "loss": 3.0661215782165527, "step": 5541, "token_acc": 0.29176579622500837 }, { "epoch": 3.2486074464966284, "grad_norm": 0.20364548104494964, "learning_rate": 0.0004829197848658364, "loss": 3.0735926628112793, "step": 5542, "token_acc": 0.2895822745143477 }, { "epoch": 3.2491937848138375, "grad_norm": 0.2621733612398241, "learning_rate": 0.000482910981365533, "loss": 3.0894033908843994, "step": 5543, "token_acc": 0.2894053730904972 }, { "epoch": 3.2497801231310466, "grad_norm": 0.19978639876452248, "learning_rate": 0.00048290217567734486, "loss": 3.0747861862182617, "step": 5544, "token_acc": 0.2884870831510017 }, { "epoch": 3.2503664614482557, "grad_norm": 0.22115138332441409, "learning_rate": 0.0004828933678013545, "loss": 3.066645860671997, "step": 5545, "token_acc": 0.2908671172069693 }, { "epoch": 3.250952799765465, "grad_norm": 0.21760238275526372, "learning_rate": 0.00048288455773764485, "loss": 3.0593342781066895, "step": 5546, "token_acc": 0.2922667418780518 }, { "epoch": 3.2515391380826735, "grad_norm": 0.1937639325976976, "learning_rate": 0.0004828757454862986, "loss": 3.125030040740967, "step": 5547, "token_acc": 0.28257318983790203 }, { "epoch": 3.2521254763998826, "grad_norm": 0.2355524685825062, "learning_rate": 0.00048286693104739856, "loss": 3.1203601360321045, "step": 5548, "token_acc": 0.28508052200831363 }, { "epoch": 3.2527118147170917, "grad_norm": 0.2242657730880174, "learning_rate": 0.0004828581144210274, "loss": 3.0945048332214355, "step": 5549, "token_acc": 0.2886382170470616 }, { "epoch": 3.253298153034301, "grad_norm": 0.24069046921192702, "learning_rate": 0.0004828492956072681, "loss": 3.099910259246826, "step": 5550, "token_acc": 0.2874247720165042 }, { "epoch": 3.25388449135151, "grad_norm": 0.23100186480487003, "learning_rate": 0.0004828404746062034, "loss": 3.0883474349975586, "step": 5551, "token_acc": 0.28865603399322487 }, { "epoch": 3.254470829668719, "grad_norm": 0.2168281781062133, "learning_rate": 0.00048283165141791616, "loss": 3.1352527141571045, "step": 5552, "token_acc": 0.2830030026912626 }, { "epoch": 3.2550571679859277, "grad_norm": 0.24755457077599033, "learning_rate": 0.0004828228260424894, "loss": 3.0980682373046875, "step": 5553, "token_acc": 0.28801312217880737 }, { "epoch": 3.255643506303137, "grad_norm": 0.2691490735128906, "learning_rate": 0.0004828139984800059, "loss": 3.074782371520996, "step": 5554, "token_acc": 0.291357906900317 }, { "epoch": 3.256229844620346, "grad_norm": 0.24624511990986464, "learning_rate": 0.00048280516873054857, "loss": 3.067749500274658, "step": 5555, "token_acc": 0.29118769008711787 }, { "epoch": 3.256816182937555, "grad_norm": 0.2320774780419121, "learning_rate": 0.00048279633679420046, "loss": 3.0465569496154785, "step": 5556, "token_acc": 0.2949327301182319 }, { "epoch": 3.257402521254764, "grad_norm": 0.27411897719442974, "learning_rate": 0.0004827875026710443, "loss": 3.104708671569824, "step": 5557, "token_acc": 0.2856473071956362 }, { "epoch": 3.257988859571973, "grad_norm": 0.22278925125316826, "learning_rate": 0.0004827786663611634, "loss": 3.073840379714966, "step": 5558, "token_acc": 0.2905030038518652 }, { "epoch": 3.258575197889182, "grad_norm": 0.23041377743166, "learning_rate": 0.0004827698278646405, "loss": 3.129525661468506, "step": 5559, "token_acc": 0.2827432203866181 }, { "epoch": 3.259161536206391, "grad_norm": 0.26890400252201524, "learning_rate": 0.0004827609871815588, "loss": 3.090733289718628, "step": 5560, "token_acc": 0.2878155352583507 }, { "epoch": 3.2597478745236, "grad_norm": 0.22484175163073575, "learning_rate": 0.0004827521443120013, "loss": 3.1161885261535645, "step": 5561, "token_acc": 0.28375744478451487 }, { "epoch": 3.2603342128408093, "grad_norm": 0.2550605487313567, "learning_rate": 0.000482743299256051, "loss": 3.0764551162719727, "step": 5562, "token_acc": 0.29176013782550086 }, { "epoch": 3.2609205511580184, "grad_norm": 0.2206675182139155, "learning_rate": 0.00048273445201379094, "loss": 3.0539767742156982, "step": 5563, "token_acc": 0.29212919629135947 }, { "epoch": 3.261506889475227, "grad_norm": 0.22240673153250273, "learning_rate": 0.0004827256025853044, "loss": 3.0845656394958496, "step": 5564, "token_acc": 0.28825744085474436 }, { "epoch": 3.262093227792436, "grad_norm": 0.2848616921402516, "learning_rate": 0.0004827167509706745, "loss": 3.0736348628997803, "step": 5565, "token_acc": 0.29237470192700166 }, { "epoch": 3.2626795661096453, "grad_norm": 0.23879443714578585, "learning_rate": 0.0004827078971699842, "loss": 3.0961108207702637, "step": 5566, "token_acc": 0.2863585590251891 }, { "epoch": 3.2632659044268544, "grad_norm": 0.2098864645776518, "learning_rate": 0.0004826990411833168, "loss": 3.0913915634155273, "step": 5567, "token_acc": 0.28922415066370555 }, { "epoch": 3.2638522427440635, "grad_norm": 0.2155371152336698, "learning_rate": 0.0004826901830107555, "loss": 3.0926570892333984, "step": 5568, "token_acc": 0.2879919394624394 }, { "epoch": 3.264438581061272, "grad_norm": 0.19687301641213242, "learning_rate": 0.00048268132265238354, "loss": 3.02547550201416, "step": 5569, "token_acc": 0.2988211242514649 }, { "epoch": 3.2650249193784813, "grad_norm": 0.2191573767298865, "learning_rate": 0.00048267246010828395, "loss": 3.086819648742676, "step": 5570, "token_acc": 0.2878316719462931 }, { "epoch": 3.2656112576956904, "grad_norm": 0.2544937132395049, "learning_rate": 0.00048266359537854023, "loss": 3.0988962650299072, "step": 5571, "token_acc": 0.2880391627597376 }, { "epoch": 3.2661975960128995, "grad_norm": 0.2076748724334846, "learning_rate": 0.00048265472846323554, "loss": 3.0667712688446045, "step": 5572, "token_acc": 0.2920748502143851 }, { "epoch": 3.2667839343301086, "grad_norm": 0.21024077915996622, "learning_rate": 0.0004826458593624532, "loss": 3.066645383834839, "step": 5573, "token_acc": 0.29202946574225125 }, { "epoch": 3.2673702726473177, "grad_norm": 0.20863706108736713, "learning_rate": 0.00048263698807627644, "loss": 3.085844039916992, "step": 5574, "token_acc": 0.2908935856959932 }, { "epoch": 3.2679566109645264, "grad_norm": 0.22750015638112903, "learning_rate": 0.00048262811460478874, "loss": 3.07515811920166, "step": 5575, "token_acc": 0.2902277157144097 }, { "epoch": 3.2685429492817355, "grad_norm": 0.21866372778163526, "learning_rate": 0.0004826192389480733, "loss": 3.0436739921569824, "step": 5576, "token_acc": 0.29545466005758575 }, { "epoch": 3.2691292875989446, "grad_norm": 0.2085153771693781, "learning_rate": 0.0004826103611062136, "loss": 3.081249237060547, "step": 5577, "token_acc": 0.29052503525118195 }, { "epoch": 3.2697156259161537, "grad_norm": 0.24075439402996177, "learning_rate": 0.00048260148107929303, "loss": 3.1441917419433594, "step": 5578, "token_acc": 0.2804097412368662 }, { "epoch": 3.270301964233363, "grad_norm": 0.23658815838790412, "learning_rate": 0.0004825925988673949, "loss": 3.0593252182006836, "step": 5579, "token_acc": 0.2931509483981973 }, { "epoch": 3.2708883025505715, "grad_norm": 0.24704226927261264, "learning_rate": 0.00048258371447060277, "loss": 3.0668158531188965, "step": 5580, "token_acc": 0.2924334429540275 }, { "epoch": 3.2714746408677806, "grad_norm": 0.2584884201416459, "learning_rate": 0.0004825748278890001, "loss": 3.089484214782715, "step": 5581, "token_acc": 0.28829440173719767 }, { "epoch": 3.2720609791849897, "grad_norm": 0.21770561941906955, "learning_rate": 0.0004825659391226703, "loss": 3.0831127166748047, "step": 5582, "token_acc": 0.2893206768130568 }, { "epoch": 3.272647317502199, "grad_norm": 0.24612857126906626, "learning_rate": 0.0004825570481716969, "loss": 3.0791373252868652, "step": 5583, "token_acc": 0.288189868679462 }, { "epoch": 3.273233655819408, "grad_norm": 0.20020996065935764, "learning_rate": 0.00048254815503616334, "loss": 3.0971226692199707, "step": 5584, "token_acc": 0.2882106166363097 }, { "epoch": 3.273819994136617, "grad_norm": 0.24961252140913656, "learning_rate": 0.00048253925971615324, "loss": 3.104736328125, "step": 5585, "token_acc": 0.28686475134706535 }, { "epoch": 3.2744063324538257, "grad_norm": 0.2985282583479439, "learning_rate": 0.0004825303622117502, "loss": 3.1484391689300537, "step": 5586, "token_acc": 0.2801261498028909 }, { "epoch": 3.274992670771035, "grad_norm": 0.3642982579503769, "learning_rate": 0.00048252146252303774, "loss": 3.1142873764038086, "step": 5587, "token_acc": 0.28479375690912384 }, { "epoch": 3.275579009088244, "grad_norm": 0.2755944521451712, "learning_rate": 0.0004825125606500994, "loss": 3.0979433059692383, "step": 5588, "token_acc": 0.2867411862175474 }, { "epoch": 3.276165347405453, "grad_norm": 0.2338510714318697, "learning_rate": 0.0004825036565930189, "loss": 3.057699203491211, "step": 5589, "token_acc": 0.2940097157221724 }, { "epoch": 3.2767516857226617, "grad_norm": 0.23144373217834288, "learning_rate": 0.00048249475035187984, "loss": 3.042672634124756, "step": 5590, "token_acc": 0.2935944510299185 }, { "epoch": 3.277338024039871, "grad_norm": 0.23461611146451863, "learning_rate": 0.00048248584192676593, "loss": 3.072084426879883, "step": 5591, "token_acc": 0.2910954812295389 }, { "epoch": 3.27792436235708, "grad_norm": 0.2404245539068119, "learning_rate": 0.00048247693131776083, "loss": 3.1187362670898438, "step": 5592, "token_acc": 0.2848057470353643 }, { "epoch": 3.278510700674289, "grad_norm": 0.19420473264809676, "learning_rate": 0.0004824680185249481, "loss": 3.06527042388916, "step": 5593, "token_acc": 0.29126760998517054 }, { "epoch": 3.279097038991498, "grad_norm": 0.20838846372068304, "learning_rate": 0.00048245910354841173, "loss": 3.0861334800720215, "step": 5594, "token_acc": 0.28757289909366196 }, { "epoch": 3.2796833773087073, "grad_norm": 0.2782528592092092, "learning_rate": 0.0004824501863882353, "loss": 3.114996910095215, "step": 5595, "token_acc": 0.28384358912389845 }, { "epoch": 3.280269715625916, "grad_norm": 0.2500523487965146, "learning_rate": 0.0004824412670445025, "loss": 3.111743688583374, "step": 5596, "token_acc": 0.2858417996903885 }, { "epoch": 3.280856053943125, "grad_norm": 0.2154042382800553, "learning_rate": 0.00048243234551729737, "loss": 3.105464458465576, "step": 5597, "token_acc": 0.2866963277283427 }, { "epoch": 3.281442392260334, "grad_norm": 0.2974301311415171, "learning_rate": 0.0004824234218067035, "loss": 3.1336405277252197, "step": 5598, "token_acc": 0.28211901570653375 }, { "epoch": 3.2820287305775433, "grad_norm": 0.24572189656894172, "learning_rate": 0.0004824144959128047, "loss": 3.0633602142333984, "step": 5599, "token_acc": 0.29346181518081976 }, { "epoch": 3.2826150688947524, "grad_norm": 0.2070264741476238, "learning_rate": 0.00048240556783568503, "loss": 3.0652341842651367, "step": 5600, "token_acc": 0.29193398462927295 }, { "epoch": 3.283201407211961, "grad_norm": 0.23818577454244913, "learning_rate": 0.00048239663757542806, "loss": 3.109423875808716, "step": 5601, "token_acc": 0.2848501034247317 }, { "epoch": 3.28378774552917, "grad_norm": 0.19073247199752943, "learning_rate": 0.0004823877051321179, "loss": 3.0666239261627197, "step": 5602, "token_acc": 0.292464746175054 }, { "epoch": 3.2843740838463793, "grad_norm": 0.29587031607351383, "learning_rate": 0.00048237877050583844, "loss": 3.0637729167938232, "step": 5603, "token_acc": 0.2924888502121179 }, { "epoch": 3.2849604221635884, "grad_norm": 0.21882348129684032, "learning_rate": 0.0004823698336966735, "loss": 3.089359760284424, "step": 5604, "token_acc": 0.28817105634654944 }, { "epoch": 3.2855467604807975, "grad_norm": 0.22778499390588072, "learning_rate": 0.0004823608947047072, "loss": 3.1079695224761963, "step": 5605, "token_acc": 0.28588037926970405 }, { "epoch": 3.2861330987980066, "grad_norm": 0.24175090605085456, "learning_rate": 0.0004823519535300234, "loss": 3.0653162002563477, "step": 5606, "token_acc": 0.29173449038607074 }, { "epoch": 3.2867194371152153, "grad_norm": 0.20790429547459105, "learning_rate": 0.000482343010172706, "loss": 3.0496041774749756, "step": 5607, "token_acc": 0.29385477753181355 }, { "epoch": 3.2873057754324244, "grad_norm": 0.2483078665022293, "learning_rate": 0.0004823340646328391, "loss": 3.102588415145874, "step": 5608, "token_acc": 0.2881679389312977 }, { "epoch": 3.2878921137496335, "grad_norm": 0.23515268495632705, "learning_rate": 0.0004823251169105068, "loss": 3.1007556915283203, "step": 5609, "token_acc": 0.28820658425648343 }, { "epoch": 3.2884784520668426, "grad_norm": 0.21233421145808948, "learning_rate": 0.0004823161670057931, "loss": 3.0945563316345215, "step": 5610, "token_acc": 0.28868392142161686 }, { "epoch": 3.2890647903840518, "grad_norm": 0.2589703532550193, "learning_rate": 0.000482307214918782, "loss": 3.1085004806518555, "step": 5611, "token_acc": 0.2854568938655302 }, { "epoch": 3.2896511287012604, "grad_norm": 0.22556743987744785, "learning_rate": 0.00048229826064955764, "loss": 3.1141834259033203, "step": 5612, "token_acc": 0.2856895444602239 }, { "epoch": 3.2902374670184695, "grad_norm": 0.21502366766656042, "learning_rate": 0.00048228930419820423, "loss": 3.1022229194641113, "step": 5613, "token_acc": 0.28490966550032026 }, { "epoch": 3.2908238053356786, "grad_norm": 0.2796042443947748, "learning_rate": 0.00048228034556480574, "loss": 3.0935940742492676, "step": 5614, "token_acc": 0.28808274433277703 }, { "epoch": 3.2914101436528878, "grad_norm": 0.2645599672131271, "learning_rate": 0.00048227138474944643, "loss": 3.081292152404785, "step": 5615, "token_acc": 0.29022148040790485 }, { "epoch": 3.291996481970097, "grad_norm": 0.22451169555887265, "learning_rate": 0.0004822624217522105, "loss": 3.0939862728118896, "step": 5616, "token_acc": 0.2878765789501088 }, { "epoch": 3.292582820287306, "grad_norm": 0.2750437428934105, "learning_rate": 0.000482253456573182, "loss": 3.0854268074035645, "step": 5617, "token_acc": 0.2911442896465007 }, { "epoch": 3.2931691586045146, "grad_norm": 0.24178880153119978, "learning_rate": 0.00048224448921244535, "loss": 3.1188011169433594, "step": 5618, "token_acc": 0.28323370338741816 }, { "epoch": 3.2937554969217238, "grad_norm": 0.3037231556032077, "learning_rate": 0.0004822355196700846, "loss": 3.0870141983032227, "step": 5619, "token_acc": 0.2881109310649209 }, { "epoch": 3.294341835238933, "grad_norm": 0.3075103611537917, "learning_rate": 0.00048222654794618413, "loss": 3.0998740196228027, "step": 5620, "token_acc": 0.2870975249802961 }, { "epoch": 3.294928173556142, "grad_norm": 0.23649248914714227, "learning_rate": 0.00048221757404082817, "loss": 3.0571320056915283, "step": 5621, "token_acc": 0.2924980581575387 }, { "epoch": 3.295514511873351, "grad_norm": 0.25067769450792743, "learning_rate": 0.000482208597954101, "loss": 3.081094264984131, "step": 5622, "token_acc": 0.2892858526756306 }, { "epoch": 3.2961008501905598, "grad_norm": 0.2744705993734885, "learning_rate": 0.00048219961968608695, "loss": 3.1241700649261475, "step": 5623, "token_acc": 0.2846619810464552 }, { "epoch": 3.296687188507769, "grad_norm": 0.2329372228460261, "learning_rate": 0.0004821906392368703, "loss": 3.0474495887756348, "step": 5624, "token_acc": 0.29451462323543565 }, { "epoch": 3.297273526824978, "grad_norm": 0.2272202919647519, "learning_rate": 0.0004821816566065356, "loss": 3.070211172103882, "step": 5625, "token_acc": 0.2924878278078837 }, { "epoch": 3.297859865142187, "grad_norm": 0.2316317267267144, "learning_rate": 0.0004821726717951671, "loss": 3.0903289318084717, "step": 5626, "token_acc": 0.28900211917227625 }, { "epoch": 3.298446203459396, "grad_norm": 0.224151862099049, "learning_rate": 0.0004821636848028491, "loss": 3.0758917331695557, "step": 5627, "token_acc": 0.29086551110830466 }, { "epoch": 3.2990325417766053, "grad_norm": 0.23649733478971557, "learning_rate": 0.00048215469562966617, "loss": 3.0742828845977783, "step": 5628, "token_acc": 0.2920394705879277 }, { "epoch": 3.299618880093814, "grad_norm": 0.22470763233267527, "learning_rate": 0.00048214570427570276, "loss": 3.0722882747650146, "step": 5629, "token_acc": 0.2915392483901897 }, { "epoch": 3.300205218411023, "grad_norm": 0.23471556058404805, "learning_rate": 0.00048213671074104326, "loss": 3.054325580596924, "step": 5630, "token_acc": 0.29333741566521965 }, { "epoch": 3.300791556728232, "grad_norm": 0.2068036009826202, "learning_rate": 0.00048212771502577215, "loss": 3.116121292114258, "step": 5631, "token_acc": 0.28464698423825086 }, { "epoch": 3.3013778950454413, "grad_norm": 0.2342874029765438, "learning_rate": 0.00048211871712997397, "loss": 3.0906126499176025, "step": 5632, "token_acc": 0.2870752644616511 }, { "epoch": 3.3019642333626504, "grad_norm": 0.2285501870819182, "learning_rate": 0.00048210971705373316, "loss": 3.099940776824951, "step": 5633, "token_acc": 0.2885108744594184 }, { "epoch": 3.302550571679859, "grad_norm": 0.2183531498227338, "learning_rate": 0.0004821007147971344, "loss": 3.045015573501587, "step": 5634, "token_acc": 0.2939536059463559 }, { "epoch": 3.303136909997068, "grad_norm": 0.21284600974492784, "learning_rate": 0.0004820917103602622, "loss": 3.0755672454833984, "step": 5635, "token_acc": 0.2911873138788831 }, { "epoch": 3.3037232483142773, "grad_norm": 0.1976349060974995, "learning_rate": 0.0004820827037432011, "loss": 3.1121678352355957, "step": 5636, "token_acc": 0.28570651272697123 }, { "epoch": 3.3043095866314864, "grad_norm": 0.19477786409651648, "learning_rate": 0.0004820736949460357, "loss": 3.115225315093994, "step": 5637, "token_acc": 0.28513020901628594 }, { "epoch": 3.3048959249486956, "grad_norm": 0.19311121610095494, "learning_rate": 0.0004820646839688507, "loss": 3.1131432056427, "step": 5638, "token_acc": 0.2856725433511065 }, { "epoch": 3.3054822632659047, "grad_norm": 0.2091723176479704, "learning_rate": 0.00048205567081173066, "loss": 3.1030778884887695, "step": 5639, "token_acc": 0.28610033641561117 }, { "epoch": 3.3060686015831133, "grad_norm": 0.19866899599185506, "learning_rate": 0.0004820466554747603, "loss": 3.104271173477173, "step": 5640, "token_acc": 0.28609438635397993 }, { "epoch": 3.3066549399003224, "grad_norm": 0.22217659679854995, "learning_rate": 0.00048203763795802435, "loss": 3.0823962688446045, "step": 5641, "token_acc": 0.28848467982364595 }, { "epoch": 3.3072412782175316, "grad_norm": 0.28319946674486257, "learning_rate": 0.0004820286182616075, "loss": 3.0545108318328857, "step": 5642, "token_acc": 0.29472654400858395 }, { "epoch": 3.3078276165347407, "grad_norm": 0.3758620258504028, "learning_rate": 0.0004820195963855943, "loss": 3.1115856170654297, "step": 5643, "token_acc": 0.285112367003635 }, { "epoch": 3.3084139548519493, "grad_norm": 0.32268690756611645, "learning_rate": 0.00048201057233006973, "loss": 3.1096878051757812, "step": 5644, "token_acc": 0.2852580979839109 }, { "epoch": 3.3090002931691584, "grad_norm": 0.2090951402460857, "learning_rate": 0.0004820015460951185, "loss": 3.1138486862182617, "step": 5645, "token_acc": 0.2858559794189894 }, { "epoch": 3.3095866314863676, "grad_norm": 0.2347910485965516, "learning_rate": 0.0004819925176808253, "loss": 3.075697898864746, "step": 5646, "token_acc": 0.29090870559189175 }, { "epoch": 3.3101729698035767, "grad_norm": 0.2106849560929445, "learning_rate": 0.0004819834870872751, "loss": 3.067077159881592, "step": 5647, "token_acc": 0.29152926355460745 }, { "epoch": 3.310759308120786, "grad_norm": 0.2870569893745065, "learning_rate": 0.00048197445431455253, "loss": 3.0558276176452637, "step": 5648, "token_acc": 0.29482275285918513 }, { "epoch": 3.311345646437995, "grad_norm": 0.23609561201216378, "learning_rate": 0.0004819654193627426, "loss": 3.1206564903259277, "step": 5649, "token_acc": 0.2844688913461218 }, { "epoch": 3.3119319847552036, "grad_norm": 0.24739831880535437, "learning_rate": 0.00048195638223193015, "loss": 3.099743604660034, "step": 5650, "token_acc": 0.28642793403624933 }, { "epoch": 3.3125183230724127, "grad_norm": 0.1952489481118913, "learning_rate": 0.0004819473429222001, "loss": 3.1334993839263916, "step": 5651, "token_acc": 0.28193109792177523 }, { "epoch": 3.313104661389622, "grad_norm": 0.2225534494965365, "learning_rate": 0.0004819383014336373, "loss": 3.081575870513916, "step": 5652, "token_acc": 0.29049981898332516 }, { "epoch": 3.313690999706831, "grad_norm": 0.20554899874877397, "learning_rate": 0.0004819292577663266, "loss": 3.081087350845337, "step": 5653, "token_acc": 0.29000757102819874 }, { "epoch": 3.31427733802404, "grad_norm": 0.1945376043378655, "learning_rate": 0.00048192021192035306, "loss": 3.077475070953369, "step": 5654, "token_acc": 0.2890689721917543 }, { "epoch": 3.3148636763412487, "grad_norm": 0.19961744607953738, "learning_rate": 0.0004819111638958017, "loss": 3.1203060150146484, "step": 5655, "token_acc": 0.28431469897064177 }, { "epoch": 3.315450014658458, "grad_norm": 0.24641374969103133, "learning_rate": 0.0004819021136927575, "loss": 3.051339864730835, "step": 5656, "token_acc": 0.2939230961018872 }, { "epoch": 3.316036352975667, "grad_norm": 0.23000644068891238, "learning_rate": 0.0004818930613113054, "loss": 3.132145881652832, "step": 5657, "token_acc": 0.282413129003687 }, { "epoch": 3.316622691292876, "grad_norm": 0.21131128608647196, "learning_rate": 0.00048188400675153046, "loss": 3.1004552841186523, "step": 5658, "token_acc": 0.286698705588549 }, { "epoch": 3.317209029610085, "grad_norm": 0.23313579881447813, "learning_rate": 0.0004818749500135177, "loss": 3.089571475982666, "step": 5659, "token_acc": 0.2892515582882949 }, { "epoch": 3.3177953679272942, "grad_norm": 0.22549084311286544, "learning_rate": 0.00048186589109735237, "loss": 3.10986328125, "step": 5660, "token_acc": 0.28766621835614653 }, { "epoch": 3.318381706244503, "grad_norm": 0.2272767737107594, "learning_rate": 0.0004818568300031193, "loss": 3.0698978900909424, "step": 5661, "token_acc": 0.2915396424449622 }, { "epoch": 3.318968044561712, "grad_norm": 0.2507221312722534, "learning_rate": 0.0004818477667309038, "loss": 3.1122870445251465, "step": 5662, "token_acc": 0.28360814690864944 }, { "epoch": 3.319554382878921, "grad_norm": 0.21717127683381995, "learning_rate": 0.00048183870128079093, "loss": 3.0622549057006836, "step": 5663, "token_acc": 0.2921744118092682 }, { "epoch": 3.3201407211961302, "grad_norm": 0.25397811370792395, "learning_rate": 0.00048182963365286593, "loss": 3.0772716999053955, "step": 5664, "token_acc": 0.29129152742284253 }, { "epoch": 3.3207270595133394, "grad_norm": 0.2547583787020968, "learning_rate": 0.00048182056384721386, "loss": 3.091721773147583, "step": 5665, "token_acc": 0.2868746749869995 }, { "epoch": 3.321313397830548, "grad_norm": 0.21469580942889255, "learning_rate": 0.00048181149186391994, "loss": 3.082245349884033, "step": 5666, "token_acc": 0.29057398603706264 }, { "epoch": 3.321899736147757, "grad_norm": 0.27147762287583027, "learning_rate": 0.00048180241770306943, "loss": 3.1196463108062744, "step": 5667, "token_acc": 0.2842632771895183 }, { "epoch": 3.3224860744649662, "grad_norm": 0.2872118185862629, "learning_rate": 0.0004817933413647476, "loss": 3.0859527587890625, "step": 5668, "token_acc": 0.289538641500456 }, { "epoch": 3.3230724127821754, "grad_norm": 0.2327667862099696, "learning_rate": 0.0004817842628490397, "loss": 3.0475997924804688, "step": 5669, "token_acc": 0.2940962746673668 }, { "epoch": 3.3236587510993845, "grad_norm": 0.2194225984805931, "learning_rate": 0.000481775182156031, "loss": 3.0961875915527344, "step": 5670, "token_acc": 0.28780341577749124 }, { "epoch": 3.3242450894165936, "grad_norm": 0.2508191311648795, "learning_rate": 0.00048176609928580674, "loss": 3.0760416984558105, "step": 5671, "token_acc": 0.29160896475708675 }, { "epoch": 3.3248314277338022, "grad_norm": 0.2907824952063711, "learning_rate": 0.00048175701423845224, "loss": 3.118382453918457, "step": 5672, "token_acc": 0.2853155221449338 }, { "epoch": 3.3254177660510114, "grad_norm": 0.24762266025330715, "learning_rate": 0.0004817479270140529, "loss": 3.093312978744507, "step": 5673, "token_acc": 0.2885064192195246 }, { "epoch": 3.3260041043682205, "grad_norm": 0.2688422834901757, "learning_rate": 0.0004817388376126941, "loss": 3.1101064682006836, "step": 5674, "token_acc": 0.2861246791072938 }, { "epoch": 3.3265904426854296, "grad_norm": 0.26183862098939253, "learning_rate": 0.0004817297460344612, "loss": 3.1113715171813965, "step": 5675, "token_acc": 0.28483649938813127 }, { "epoch": 3.3271767810026387, "grad_norm": 0.21981799167965343, "learning_rate": 0.0004817206522794396, "loss": 3.0412604808807373, "step": 5676, "token_acc": 0.29373301063168333 }, { "epoch": 3.3277631193198474, "grad_norm": 0.21396434011977744, "learning_rate": 0.00048171155634771476, "loss": 3.053217649459839, "step": 5677, "token_acc": 0.2933580241174207 }, { "epoch": 3.3283494576370565, "grad_norm": 0.20084703208755356, "learning_rate": 0.000481702458239372, "loss": 3.081444263458252, "step": 5678, "token_acc": 0.2896609590915132 }, { "epoch": 3.3289357959542656, "grad_norm": 0.21160760827504185, "learning_rate": 0.00048169335795449693, "loss": 3.0933430194854736, "step": 5679, "token_acc": 0.2876954413597378 }, { "epoch": 3.3295221342714747, "grad_norm": 0.21125912927362353, "learning_rate": 0.000481684255493175, "loss": 3.0747501850128174, "step": 5680, "token_acc": 0.2896137975139706 }, { "epoch": 3.330108472588684, "grad_norm": 0.20380786808522253, "learning_rate": 0.00048167515085549155, "loss": 3.0939369201660156, "step": 5681, "token_acc": 0.28700149784684514 }, { "epoch": 3.330694810905893, "grad_norm": 0.1992534224994094, "learning_rate": 0.00048166604404153236, "loss": 3.1031932830810547, "step": 5682, "token_acc": 0.28615151791887905 }, { "epoch": 3.3312811492231016, "grad_norm": 0.23637323677063057, "learning_rate": 0.0004816569350513828, "loss": 3.1141958236694336, "step": 5683, "token_acc": 0.2862725131427074 }, { "epoch": 3.3318674875403107, "grad_norm": 0.2296107817641607, "learning_rate": 0.0004816478238851285, "loss": 3.076993465423584, "step": 5684, "token_acc": 0.2899644372892592 }, { "epoch": 3.33245382585752, "grad_norm": 0.19651672648090426, "learning_rate": 0.00048163871054285513, "loss": 3.11259126663208, "step": 5685, "token_acc": 0.28543921040357906 }, { "epoch": 3.333040164174729, "grad_norm": 0.21849696750702524, "learning_rate": 0.0004816295950246481, "loss": 3.0663373470306396, "step": 5686, "token_acc": 0.293057210578648 }, { "epoch": 3.333626502491938, "grad_norm": 0.23396732387562622, "learning_rate": 0.0004816204773305932, "loss": 3.0868396759033203, "step": 5687, "token_acc": 0.28901468828836474 }, { "epoch": 3.3342128408091467, "grad_norm": 0.28670765774363827, "learning_rate": 0.00048161135746077605, "loss": 3.0919270515441895, "step": 5688, "token_acc": 0.288924335230772 }, { "epoch": 3.334799179126356, "grad_norm": 0.3717367531623226, "learning_rate": 0.00048160223541528224, "loss": 3.0542221069335938, "step": 5689, "token_acc": 0.2934421626175189 }, { "epoch": 3.335385517443565, "grad_norm": 0.3182113824960578, "learning_rate": 0.00048159311119419756, "loss": 3.0562243461608887, "step": 5690, "token_acc": 0.29468335818638747 }, { "epoch": 3.335971855760774, "grad_norm": 0.2748012763173354, "learning_rate": 0.00048158398479760767, "loss": 3.0409555435180664, "step": 5691, "token_acc": 0.2961833158727224 }, { "epoch": 3.336558194077983, "grad_norm": 0.3758953589577359, "learning_rate": 0.0004815748562255983, "loss": 3.049755096435547, "step": 5692, "token_acc": 0.2937759717773353 }, { "epoch": 3.3371445323951923, "grad_norm": 0.2330906597893559, "learning_rate": 0.00048156572547825526, "loss": 3.0975193977355957, "step": 5693, "token_acc": 0.2876893853375474 }, { "epoch": 3.337730870712401, "grad_norm": 0.29105387668326754, "learning_rate": 0.0004815565925556642, "loss": 3.1019654273986816, "step": 5694, "token_acc": 0.28834418554592905 }, { "epoch": 3.33831720902961, "grad_norm": 0.2142572911854091, "learning_rate": 0.00048154745745791094, "loss": 3.1076979637145996, "step": 5695, "token_acc": 0.28560871878054467 }, { "epoch": 3.338903547346819, "grad_norm": 0.3179448090273838, "learning_rate": 0.00048153832018508146, "loss": 3.1429243087768555, "step": 5696, "token_acc": 0.2810085105390181 }, { "epoch": 3.3394898856640283, "grad_norm": 0.2229124169650394, "learning_rate": 0.0004815291807372614, "loss": 3.088953971862793, "step": 5697, "token_acc": 0.2862387295189512 }, { "epoch": 3.340076223981237, "grad_norm": 0.24755959911567715, "learning_rate": 0.0004815200391145367, "loss": 3.0774893760681152, "step": 5698, "token_acc": 0.28960004099515746 }, { "epoch": 3.340662562298446, "grad_norm": 0.2498493871197153, "learning_rate": 0.0004815108953169931, "loss": 3.0650951862335205, "step": 5699, "token_acc": 0.2922341936797923 }, { "epoch": 3.341248900615655, "grad_norm": 0.2324897019439465, "learning_rate": 0.0004815017493447167, "loss": 3.0454745292663574, "step": 5700, "token_acc": 0.29462783484650573 }, { "epoch": 3.3418352389328643, "grad_norm": 0.2523601971066471, "learning_rate": 0.0004814926011977933, "loss": 3.1145246028900146, "step": 5701, "token_acc": 0.284871020820647 }, { "epoch": 3.3424215772500734, "grad_norm": 0.22452375315257608, "learning_rate": 0.00048148345087630883, "loss": 3.1056442260742188, "step": 5702, "token_acc": 0.28765953549513246 }, { "epoch": 3.3430079155672825, "grad_norm": 0.23563186322006818, "learning_rate": 0.0004814742983803493, "loss": 3.090827226638794, "step": 5703, "token_acc": 0.28820892694460953 }, { "epoch": 3.343594253884491, "grad_norm": 0.23346052564565173, "learning_rate": 0.0004814651437100006, "loss": 3.1089439392089844, "step": 5704, "token_acc": 0.2856773647854894 }, { "epoch": 3.3441805922017003, "grad_norm": 0.23736577474290993, "learning_rate": 0.00048145598686534887, "loss": 3.0792243480682373, "step": 5705, "token_acc": 0.2886090980250161 }, { "epoch": 3.3447669305189094, "grad_norm": 0.26103306842365875, "learning_rate": 0.00048144682784647996, "loss": 3.1224348545074463, "step": 5706, "token_acc": 0.28405102794325243 }, { "epoch": 3.3453532688361185, "grad_norm": 0.22484679590688617, "learning_rate": 0.00048143766665348, "loss": 3.061706304550171, "step": 5707, "token_acc": 0.2923737208697957 }, { "epoch": 3.3459396071533276, "grad_norm": 0.29717426051649715, "learning_rate": 0.00048142850328643504, "loss": 3.1045303344726562, "step": 5708, "token_acc": 0.28454244235340626 }, { "epoch": 3.3465259454705363, "grad_norm": 0.2889302846735189, "learning_rate": 0.00048141933774543114, "loss": 3.0770037174224854, "step": 5709, "token_acc": 0.291457906177255 }, { "epoch": 3.3471122837877454, "grad_norm": 0.2336356880595551, "learning_rate": 0.0004814101700305544, "loss": 3.126430034637451, "step": 5710, "token_acc": 0.2836267268353945 }, { "epoch": 3.3476986221049545, "grad_norm": 0.3138092962762753, "learning_rate": 0.0004814010001418909, "loss": 3.0566539764404297, "step": 5711, "token_acc": 0.29299494723510927 }, { "epoch": 3.3482849604221636, "grad_norm": 0.19939034817968354, "learning_rate": 0.0004813918280795269, "loss": 3.050727367401123, "step": 5712, "token_acc": 0.2942774056870666 }, { "epoch": 3.3488712987393727, "grad_norm": 0.3343314240904337, "learning_rate": 0.00048138265384354846, "loss": 3.15769100189209, "step": 5713, "token_acc": 0.28218551395188646 }, { "epoch": 3.349457637056582, "grad_norm": 0.22637676983025098, "learning_rate": 0.00048137347743404174, "loss": 3.060959577560425, "step": 5714, "token_acc": 0.29280808334997305 }, { "epoch": 3.3500439753737905, "grad_norm": 0.2738101562229442, "learning_rate": 0.000481364298851093, "loss": 3.127572536468506, "step": 5715, "token_acc": 0.28259453670642326 }, { "epoch": 3.3506303136909996, "grad_norm": 0.22417010382553762, "learning_rate": 0.0004813551180947885, "loss": 3.0949349403381348, "step": 5716, "token_acc": 0.287831161802384 }, { "epoch": 3.3512166520082087, "grad_norm": 0.25937109542935305, "learning_rate": 0.0004813459351652143, "loss": 3.102130889892578, "step": 5717, "token_acc": 0.28714053680182994 }, { "epoch": 3.351802990325418, "grad_norm": 0.22838379922335145, "learning_rate": 0.0004813367500624569, "loss": 3.09628963470459, "step": 5718, "token_acc": 0.28596513557952813 }, { "epoch": 3.352389328642627, "grad_norm": 0.23758687896684152, "learning_rate": 0.0004813275627866024, "loss": 3.0538878440856934, "step": 5719, "token_acc": 0.2957549964144296 }, { "epoch": 3.3529756669598356, "grad_norm": 0.21841973015618057, "learning_rate": 0.0004813183733377371, "loss": 3.0779478549957275, "step": 5720, "token_acc": 0.2904533920746633 }, { "epoch": 3.3535620052770447, "grad_norm": 0.2381607354302755, "learning_rate": 0.0004813091817159475, "loss": 3.0999414920806885, "step": 5721, "token_acc": 0.2870356019869655 }, { "epoch": 3.354148343594254, "grad_norm": 0.2610910647140743, "learning_rate": 0.0004812999879213198, "loss": 3.128830909729004, "step": 5722, "token_acc": 0.28514769798216194 }, { "epoch": 3.354734681911463, "grad_norm": 0.208631119058508, "learning_rate": 0.0004812907919539403, "loss": 3.042705535888672, "step": 5723, "token_acc": 0.2960087986170692 }, { "epoch": 3.355321020228672, "grad_norm": 0.24570551292326842, "learning_rate": 0.0004812815938138956, "loss": 3.1195530891418457, "step": 5724, "token_acc": 0.28511801435344714 }, { "epoch": 3.355907358545881, "grad_norm": 0.19530431013684368, "learning_rate": 0.00048127239350127197, "loss": 3.0791687965393066, "step": 5725, "token_acc": 0.28819945126032853 }, { "epoch": 3.35649369686309, "grad_norm": 0.20354008214004723, "learning_rate": 0.0004812631910161558, "loss": 3.0918564796447754, "step": 5726, "token_acc": 0.2875131477108099 }, { "epoch": 3.357080035180299, "grad_norm": 0.19945359696300652, "learning_rate": 0.0004812539863586336, "loss": 3.0526177883148193, "step": 5727, "token_acc": 0.2940605957854215 }, { "epoch": 3.357666373497508, "grad_norm": 0.21035645124820615, "learning_rate": 0.00048124477952879186, "loss": 3.1275181770324707, "step": 5728, "token_acc": 0.2841350258140575 }, { "epoch": 3.358252711814717, "grad_norm": 0.21589699669616275, "learning_rate": 0.00048123557052671696, "loss": 3.0966527462005615, "step": 5729, "token_acc": 0.28661376757328066 }, { "epoch": 3.3588390501319263, "grad_norm": 0.2321156662126395, "learning_rate": 0.0004812263593524955, "loss": 3.084888219833374, "step": 5730, "token_acc": 0.28849721234112385 }, { "epoch": 3.359425388449135, "grad_norm": 0.20656533903242963, "learning_rate": 0.00048121714600621394, "loss": 3.085391044616699, "step": 5731, "token_acc": 0.28785268736387626 }, { "epoch": 3.360011726766344, "grad_norm": 0.22435083180988985, "learning_rate": 0.00048120793048795886, "loss": 3.0957674980163574, "step": 5732, "token_acc": 0.2882599312031251 }, { "epoch": 3.360598065083553, "grad_norm": 0.25329957182049606, "learning_rate": 0.00048119871279781693, "loss": 3.115109443664551, "step": 5733, "token_acc": 0.28687792207792207 }, { "epoch": 3.3611844034007623, "grad_norm": 0.2027388215615873, "learning_rate": 0.00048118949293587455, "loss": 3.0481271743774414, "step": 5734, "token_acc": 0.294430839396021 }, { "epoch": 3.3617707417179714, "grad_norm": 0.239959939584249, "learning_rate": 0.0004811802709022184, "loss": 3.0477395057678223, "step": 5735, "token_acc": 0.29501432133363553 }, { "epoch": 3.3623570800351805, "grad_norm": 0.20143083654415844, "learning_rate": 0.00048117104669693513, "loss": 3.094008445739746, "step": 5736, "token_acc": 0.2874228565920654 }, { "epoch": 3.362943418352389, "grad_norm": 0.22177533077110737, "learning_rate": 0.00048116182032011145, "loss": 3.0817437171936035, "step": 5737, "token_acc": 0.28947974337774335 }, { "epoch": 3.3635297566695983, "grad_norm": 0.24851500372107896, "learning_rate": 0.0004811525917718339, "loss": 3.084895610809326, "step": 5738, "token_acc": 0.2884598733768704 }, { "epoch": 3.3641160949868074, "grad_norm": 0.21698641374797847, "learning_rate": 0.00048114336105218924, "loss": 3.0854220390319824, "step": 5739, "token_acc": 0.2892021030006743 }, { "epoch": 3.3647024333040165, "grad_norm": 0.23752104052936376, "learning_rate": 0.00048113412816126424, "loss": 3.0922746658325195, "step": 5740, "token_acc": 0.29005128152240894 }, { "epoch": 3.3652887716212256, "grad_norm": 0.2607651742569317, "learning_rate": 0.0004811248930991454, "loss": 3.064988374710083, "step": 5741, "token_acc": 0.29194212615132503 }, { "epoch": 3.3658751099384343, "grad_norm": 0.2972758987967038, "learning_rate": 0.0004811156558659198, "loss": 3.08284330368042, "step": 5742, "token_acc": 0.2888356701662883 }, { "epoch": 3.3664614482556434, "grad_norm": 0.26154677273649124, "learning_rate": 0.000481106416461674, "loss": 3.073464870452881, "step": 5743, "token_acc": 0.29190949422297857 }, { "epoch": 3.3670477865728525, "grad_norm": 0.21310976982862373, "learning_rate": 0.00048109717488649487, "loss": 3.041125774383545, "step": 5744, "token_acc": 0.2967950930670986 }, { "epoch": 3.3676341248900616, "grad_norm": 0.21450687766374527, "learning_rate": 0.0004810879311404691, "loss": 3.067251205444336, "step": 5745, "token_acc": 0.28984774261882695 }, { "epoch": 3.3682204632072708, "grad_norm": 0.23038102415734193, "learning_rate": 0.00048107868522368364, "loss": 3.0841753482818604, "step": 5746, "token_acc": 0.2891329646046627 }, { "epoch": 3.36880680152448, "grad_norm": 0.2143517825919709, "learning_rate": 0.0004810694371362253, "loss": 3.0788352489471436, "step": 5747, "token_acc": 0.2905380898025368 }, { "epoch": 3.3693931398416885, "grad_norm": 0.26354217614374253, "learning_rate": 0.00048106018687818096, "loss": 3.0930116176605225, "step": 5748, "token_acc": 0.28829707765883067 }, { "epoch": 3.3699794781588976, "grad_norm": 0.24387825181323383, "learning_rate": 0.00048105093444963763, "loss": 3.063042163848877, "step": 5749, "token_acc": 0.2924492609407716 }, { "epoch": 3.3705658164761068, "grad_norm": 0.19043904609869006, "learning_rate": 0.000481041679850682, "loss": 3.050421953201294, "step": 5750, "token_acc": 0.29360950874459285 }, { "epoch": 3.371152154793316, "grad_norm": 0.25782541601503517, "learning_rate": 0.00048103242308140124, "loss": 3.057704210281372, "step": 5751, "token_acc": 0.2941018667063311 }, { "epoch": 3.3717384931105245, "grad_norm": 0.2594283998728703, "learning_rate": 0.00048102316414188207, "loss": 3.0824294090270996, "step": 5752, "token_acc": 0.28943257100958886 }, { "epoch": 3.3723248314277336, "grad_norm": 0.22699682094921436, "learning_rate": 0.0004810139030322116, "loss": 3.118851900100708, "step": 5753, "token_acc": 0.2850955349705949 }, { "epoch": 3.3729111697449428, "grad_norm": 0.22707254573136912, "learning_rate": 0.0004810046397524769, "loss": 3.061995267868042, "step": 5754, "token_acc": 0.29152775586529034 }, { "epoch": 3.373497508062152, "grad_norm": 0.22714920060732402, "learning_rate": 0.00048099537430276474, "loss": 3.0638372898101807, "step": 5755, "token_acc": 0.2920150311773046 }, { "epoch": 3.374083846379361, "grad_norm": 0.18280795259911853, "learning_rate": 0.00048098610668316245, "loss": 3.0893235206604004, "step": 5756, "token_acc": 0.2859946966087299 }, { "epoch": 3.37467018469657, "grad_norm": 0.23415046979884144, "learning_rate": 0.0004809768368937568, "loss": 3.07240891456604, "step": 5757, "token_acc": 0.29082751996764733 }, { "epoch": 3.3752565230137788, "grad_norm": 0.2828043184444646, "learning_rate": 0.0004809675649346351, "loss": 3.1092591285705566, "step": 5758, "token_acc": 0.28759957694451826 }, { "epoch": 3.375842861330988, "grad_norm": 0.2552394450832197, "learning_rate": 0.0004809582908058844, "loss": 3.111790657043457, "step": 5759, "token_acc": 0.28387232600412843 }, { "epoch": 3.376429199648197, "grad_norm": 0.20007828106619335, "learning_rate": 0.0004809490145075918, "loss": 3.0867719650268555, "step": 5760, "token_acc": 0.28935293367888854 }, { "epoch": 3.377015537965406, "grad_norm": 0.34366265486089664, "learning_rate": 0.0004809397360398443, "loss": 3.0969371795654297, "step": 5761, "token_acc": 0.2878714859437751 }, { "epoch": 3.377601876282615, "grad_norm": 0.3099476734542247, "learning_rate": 0.0004809304554027292, "loss": 3.082019329071045, "step": 5762, "token_acc": 0.28911747183563496 }, { "epoch": 3.378188214599824, "grad_norm": 0.20427307692509192, "learning_rate": 0.00048092117259633375, "loss": 3.1058435440063477, "step": 5763, "token_acc": 0.2867441331563275 }, { "epoch": 3.378774552917033, "grad_norm": 0.2734124014165882, "learning_rate": 0.0004809118876207449, "loss": 3.0917820930480957, "step": 5764, "token_acc": 0.28890269370060007 }, { "epoch": 3.379360891234242, "grad_norm": 0.20355862732373922, "learning_rate": 0.0004809026004760502, "loss": 3.0744423866271973, "step": 5765, "token_acc": 0.2906720181558376 }, { "epoch": 3.379947229551451, "grad_norm": 0.25056662029779336, "learning_rate": 0.0004808933111623366, "loss": 3.055604934692383, "step": 5766, "token_acc": 0.29425527394456336 }, { "epoch": 3.3805335678686603, "grad_norm": 0.1990618498957767, "learning_rate": 0.0004808840196796914, "loss": 3.0865888595581055, "step": 5767, "token_acc": 0.2899646771125804 }, { "epoch": 3.3811199061858694, "grad_norm": 0.26088177363157555, "learning_rate": 0.0004808747260282021, "loss": 3.0731751918792725, "step": 5768, "token_acc": 0.2912242744063325 }, { "epoch": 3.381706244503078, "grad_norm": 0.21322159532527055, "learning_rate": 0.0004808654302079558, "loss": 3.0727858543395996, "step": 5769, "token_acc": 0.2907417068315308 }, { "epoch": 3.382292582820287, "grad_norm": 0.24936095740595454, "learning_rate": 0.0004808561322190399, "loss": 3.077361583709717, "step": 5770, "token_acc": 0.29025648326338027 }, { "epoch": 3.3828789211374963, "grad_norm": 0.1961713953261956, "learning_rate": 0.0004808468320615417, "loss": 3.0827064514160156, "step": 5771, "token_acc": 0.28903025702637625 }, { "epoch": 3.3834652594547054, "grad_norm": 0.2066706019402469, "learning_rate": 0.00048083752973554863, "loss": 3.1105713844299316, "step": 5772, "token_acc": 0.2863337526028179 }, { "epoch": 3.3840515977719146, "grad_norm": 0.191324531502551, "learning_rate": 0.00048082822524114793, "loss": 3.0602855682373047, "step": 5773, "token_acc": 0.29250441817117256 }, { "epoch": 3.3846379360891232, "grad_norm": 0.2172599810346052, "learning_rate": 0.0004808189185784272, "loss": 3.058100700378418, "step": 5774, "token_acc": 0.294175929000195 }, { "epoch": 3.3852242744063323, "grad_norm": 0.24849369799187593, "learning_rate": 0.00048080960974747366, "loss": 3.073580741882324, "step": 5775, "token_acc": 0.2911344833885743 }, { "epoch": 3.3858106127235414, "grad_norm": 0.1901356654576357, "learning_rate": 0.0004808002987483749, "loss": 3.0522637367248535, "step": 5776, "token_acc": 0.29239809202229367 }, { "epoch": 3.3863969510407506, "grad_norm": 0.22262370863072392, "learning_rate": 0.00048079098558121835, "loss": 3.1037778854370117, "step": 5777, "token_acc": 0.28617139893685983 }, { "epoch": 3.3869832893579597, "grad_norm": 0.25226341283453557, "learning_rate": 0.00048078167024609154, "loss": 3.0653905868530273, "step": 5778, "token_acc": 0.29262124815606466 }, { "epoch": 3.387569627675169, "grad_norm": 0.25095395010106475, "learning_rate": 0.00048077235274308184, "loss": 3.0395517349243164, "step": 5779, "token_acc": 0.29709670041042485 }, { "epoch": 3.3881559659923774, "grad_norm": 0.2835144388086564, "learning_rate": 0.00048076303307227684, "loss": 3.0758960247039795, "step": 5780, "token_acc": 0.2903199888032429 }, { "epoch": 3.3887423043095866, "grad_norm": 0.2853362401896701, "learning_rate": 0.0004807537112337642, "loss": 3.1059298515319824, "step": 5781, "token_acc": 0.28603461221877896 }, { "epoch": 3.3893286426267957, "grad_norm": 0.23077786522564972, "learning_rate": 0.0004807443872276314, "loss": 3.0568389892578125, "step": 5782, "token_acc": 0.29252857349812816 }, { "epoch": 3.389914980944005, "grad_norm": 0.244304402265046, "learning_rate": 0.00048073506105396585, "loss": 3.0718703269958496, "step": 5783, "token_acc": 0.29081794930433436 }, { "epoch": 3.390501319261214, "grad_norm": 0.24247940725513317, "learning_rate": 0.0004807257327128555, "loss": 3.099249839782715, "step": 5784, "token_acc": 0.28725392294849517 }, { "epoch": 3.3910876575784226, "grad_norm": 0.19237477165199504, "learning_rate": 0.0004807164022043876, "loss": 3.105773448944092, "step": 5785, "token_acc": 0.2860451983048137 }, { "epoch": 3.3916739958956317, "grad_norm": 0.22914280124163292, "learning_rate": 0.0004807070695286502, "loss": 3.126349449157715, "step": 5786, "token_acc": 0.2840254124569026 }, { "epoch": 3.392260334212841, "grad_norm": 0.2751607940150409, "learning_rate": 0.00048069773468573064, "loss": 3.0968384742736816, "step": 5787, "token_acc": 0.28644330870677376 }, { "epoch": 3.39284667253005, "grad_norm": 0.27182449339447723, "learning_rate": 0.00048068839767571674, "loss": 3.0790700912475586, "step": 5788, "token_acc": 0.2897938386768049 }, { "epoch": 3.393433010847259, "grad_norm": 0.2632142137006546, "learning_rate": 0.00048067905849869625, "loss": 3.096647024154663, "step": 5789, "token_acc": 0.2887520152769734 }, { "epoch": 3.394019349164468, "grad_norm": 0.24519727283546697, "learning_rate": 0.00048066971715475683, "loss": 3.1253411769866943, "step": 5790, "token_acc": 0.28409934388392544 }, { "epoch": 3.394605687481677, "grad_norm": 0.22431401527896172, "learning_rate": 0.00048066037364398624, "loss": 3.0510153770446777, "step": 5791, "token_acc": 0.29287805427316743 }, { "epoch": 3.395192025798886, "grad_norm": 0.2927509222649674, "learning_rate": 0.00048065102796647225, "loss": 3.089550495147705, "step": 5792, "token_acc": 0.28901016133934115 }, { "epoch": 3.395778364116095, "grad_norm": 0.33694070162379813, "learning_rate": 0.0004806416801223027, "loss": 3.053389549255371, "step": 5793, "token_acc": 0.29556451395878297 }, { "epoch": 3.396364702433304, "grad_norm": 0.2532747358177874, "learning_rate": 0.0004806323301115653, "loss": 3.078293800354004, "step": 5794, "token_acc": 0.2901449589208232 }, { "epoch": 3.3969510407505132, "grad_norm": 0.23156978629444264, "learning_rate": 0.00048062297793434797, "loss": 3.0768070220947266, "step": 5795, "token_acc": 0.29080607316842205 }, { "epoch": 3.397537379067722, "grad_norm": 0.2708681163987018, "learning_rate": 0.0004806136235907386, "loss": 3.093792200088501, "step": 5796, "token_acc": 0.2877976366879146 }, { "epoch": 3.398123717384931, "grad_norm": 0.1918179856309644, "learning_rate": 0.00048060426708082483, "loss": 3.081390142440796, "step": 5797, "token_acc": 0.28891674834964287 }, { "epoch": 3.39871005570214, "grad_norm": 0.2803560236727329, "learning_rate": 0.0004805949084046948, "loss": 3.118910789489746, "step": 5798, "token_acc": 0.28462239923999766 }, { "epoch": 3.3992963940193492, "grad_norm": 0.20387651993892913, "learning_rate": 0.0004805855475624363, "loss": 3.122476577758789, "step": 5799, "token_acc": 0.2821834258506441 }, { "epoch": 3.3998827323365584, "grad_norm": 0.22997903574199927, "learning_rate": 0.0004805761845541374, "loss": 3.0593228340148926, "step": 5800, "token_acc": 0.2926455275313539 }, { "epoch": 3.4004690706537675, "grad_norm": 0.19302973158168396, "learning_rate": 0.0004805668193798859, "loss": 3.0827794075012207, "step": 5801, "token_acc": 0.2894010425730725 }, { "epoch": 3.401055408970976, "grad_norm": 0.26162173266112315, "learning_rate": 0.0004805574520397699, "loss": 3.1175765991210938, "step": 5802, "token_acc": 0.28682978986761054 }, { "epoch": 3.4016417472881852, "grad_norm": 0.20087411893382062, "learning_rate": 0.00048054808253387716, "loss": 3.044686794281006, "step": 5803, "token_acc": 0.2961704056197598 }, { "epoch": 3.4022280856053944, "grad_norm": 0.24200512103068264, "learning_rate": 0.0004805387108622959, "loss": 3.093392848968506, "step": 5804, "token_acc": 0.288144082108278 }, { "epoch": 3.4028144239226035, "grad_norm": 0.21056269740455547, "learning_rate": 0.00048052933702511414, "loss": 3.105135202407837, "step": 5805, "token_acc": 0.2862509194073763 }, { "epoch": 3.403400762239812, "grad_norm": 0.28302352515804924, "learning_rate": 0.0004805199610224199, "loss": 3.0494604110717773, "step": 5806, "token_acc": 0.29358068995967235 }, { "epoch": 3.4039871005570213, "grad_norm": 0.19848003622649396, "learning_rate": 0.00048051058285430125, "loss": 3.1077818870544434, "step": 5807, "token_acc": 0.2857316285677386 }, { "epoch": 3.4045734388742304, "grad_norm": 0.26028314600754743, "learning_rate": 0.00048050120252084627, "loss": 3.1068668365478516, "step": 5808, "token_acc": 0.2860129383545594 }, { "epoch": 3.4051597771914395, "grad_norm": 0.22115195264037904, "learning_rate": 0.00048049182002214317, "loss": 3.070546865463257, "step": 5809, "token_acc": 0.29210780117250573 }, { "epoch": 3.4057461155086486, "grad_norm": 0.21981647549314387, "learning_rate": 0.00048048243535828, "loss": 3.055387496948242, "step": 5810, "token_acc": 0.2928867900499788 }, { "epoch": 3.4063324538258577, "grad_norm": 0.1991489256734722, "learning_rate": 0.0004804730485293448, "loss": 3.0660040378570557, "step": 5811, "token_acc": 0.2907783908249108 }, { "epoch": 3.4069187921430664, "grad_norm": 0.23595584227530747, "learning_rate": 0.000480463659535426, "loss": 3.0615086555480957, "step": 5812, "token_acc": 0.29362805617989385 }, { "epoch": 3.4075051304602755, "grad_norm": 0.22364849082900676, "learning_rate": 0.00048045426837661163, "loss": 3.0842325687408447, "step": 5813, "token_acc": 0.2893531722972433 }, { "epoch": 3.4080914687774846, "grad_norm": 0.20912965557822802, "learning_rate": 0.00048044487505298993, "loss": 3.0990984439849854, "step": 5814, "token_acc": 0.28828482797067007 }, { "epoch": 3.4086778070946937, "grad_norm": 0.20673397990875436, "learning_rate": 0.00048043547956464914, "loss": 3.098456859588623, "step": 5815, "token_acc": 0.28671040983179713 }, { "epoch": 3.409264145411903, "grad_norm": 0.21435965519557224, "learning_rate": 0.00048042608191167763, "loss": 3.0953593254089355, "step": 5816, "token_acc": 0.28847356834643445 }, { "epoch": 3.4098504837291115, "grad_norm": 0.250425038624519, "learning_rate": 0.00048041668209416354, "loss": 3.07012939453125, "step": 5817, "token_acc": 0.2915172343757516 }, { "epoch": 3.4104368220463206, "grad_norm": 0.22281897593464214, "learning_rate": 0.0004804072801121952, "loss": 3.0896058082580566, "step": 5818, "token_acc": 0.2876357525023135 }, { "epoch": 3.4110231603635297, "grad_norm": 0.26051131666350635, "learning_rate": 0.0004803978759658609, "loss": 3.105618476867676, "step": 5819, "token_acc": 0.28777552010836815 }, { "epoch": 3.411609498680739, "grad_norm": 0.21875868497938686, "learning_rate": 0.0004803884696552491, "loss": 3.106571674346924, "step": 5820, "token_acc": 0.2865750895806433 }, { "epoch": 3.412195836997948, "grad_norm": 0.32509710592213953, "learning_rate": 0.00048037906118044804, "loss": 3.0984458923339844, "step": 5821, "token_acc": 0.28611493958139683 }, { "epoch": 3.412782175315157, "grad_norm": 0.21447833212759176, "learning_rate": 0.0004803696505415461, "loss": 3.0879740715026855, "step": 5822, "token_acc": 0.28737390562744025 }, { "epoch": 3.4133685136323657, "grad_norm": 0.3447987789992168, "learning_rate": 0.0004803602377386318, "loss": 3.1139421463012695, "step": 5823, "token_acc": 0.28499731180068777 }, { "epoch": 3.413954851949575, "grad_norm": 0.3025454785828331, "learning_rate": 0.00048035082277179345, "loss": 3.091762065887451, "step": 5824, "token_acc": 0.28864349148354257 }, { "epoch": 3.414541190266784, "grad_norm": 0.28424914658042627, "learning_rate": 0.0004803414056411195, "loss": 3.0364151000976562, "step": 5825, "token_acc": 0.29480832057370404 }, { "epoch": 3.415127528583993, "grad_norm": 0.248807730418761, "learning_rate": 0.0004803319863466985, "loss": 3.1043953895568848, "step": 5826, "token_acc": 0.28467934095145386 }, { "epoch": 3.415713866901202, "grad_norm": 0.25396216771209795, "learning_rate": 0.00048032256488861883, "loss": 3.1151437759399414, "step": 5827, "token_acc": 0.2841369692976071 }, { "epoch": 3.416300205218411, "grad_norm": 0.2014072291587069, "learning_rate": 0.000480313141266969, "loss": 3.1546125411987305, "step": 5828, "token_acc": 0.28141557032558434 }, { "epoch": 3.41688654353562, "grad_norm": 0.290712634234121, "learning_rate": 0.0004803037154818375, "loss": 3.0473451614379883, "step": 5829, "token_acc": 0.2953191193461423 }, { "epoch": 3.417472881852829, "grad_norm": 0.21283381410406232, "learning_rate": 0.00048029428753331306, "loss": 3.096853256225586, "step": 5830, "token_acc": 0.287203227158536 }, { "epoch": 3.418059220170038, "grad_norm": 0.25027082994433786, "learning_rate": 0.00048028485742148406, "loss": 3.1329455375671387, "step": 5831, "token_acc": 0.2839542999540677 }, { "epoch": 3.4186455584872473, "grad_norm": 0.2108447522487557, "learning_rate": 0.0004802754251464391, "loss": 3.097334384918213, "step": 5832, "token_acc": 0.28691633817400797 }, { "epoch": 3.4192318968044564, "grad_norm": 0.24674563899870713, "learning_rate": 0.00048026599070826684, "loss": 3.1175389289855957, "step": 5833, "token_acc": 0.28472183486052444 }, { "epoch": 3.419818235121665, "grad_norm": 0.2178154824649168, "learning_rate": 0.00048025655410705595, "loss": 3.1463961601257324, "step": 5834, "token_acc": 0.28312026026350007 }, { "epoch": 3.420404573438874, "grad_norm": 0.23479124801882065, "learning_rate": 0.0004802471153428949, "loss": 3.10978364944458, "step": 5835, "token_acc": 0.2860416248693283 }, { "epoch": 3.4209909117560833, "grad_norm": 0.26709198247001814, "learning_rate": 0.0004802376744158725, "loss": 3.082141399383545, "step": 5836, "token_acc": 0.29098095097955695 }, { "epoch": 3.4215772500732924, "grad_norm": 0.21398849242316462, "learning_rate": 0.00048022823132607746, "loss": 3.1020755767822266, "step": 5837, "token_acc": 0.28861034366748567 }, { "epoch": 3.4221635883905015, "grad_norm": 0.2862274201728564, "learning_rate": 0.0004802187860735984, "loss": 3.086907386779785, "step": 5838, "token_acc": 0.29095168696033724 }, { "epoch": 3.42274992670771, "grad_norm": 0.21912026197580337, "learning_rate": 0.000480209338658524, "loss": 3.0943949222564697, "step": 5839, "token_acc": 0.287692192188305 }, { "epoch": 3.4233362650249193, "grad_norm": 0.28565839762132395, "learning_rate": 0.00048019988908094315, "loss": 3.0787644386291504, "step": 5840, "token_acc": 0.291068636061781 }, { "epoch": 3.4239226033421284, "grad_norm": 0.21368706092303408, "learning_rate": 0.0004801904373409445, "loss": 3.0760412216186523, "step": 5841, "token_acc": 0.2904921410686506 }, { "epoch": 3.4245089416593375, "grad_norm": 0.26817031917053785, "learning_rate": 0.0004801809834386169, "loss": 3.0939764976501465, "step": 5842, "token_acc": 0.2879793124398506 }, { "epoch": 3.4250952799765466, "grad_norm": 0.22522201372110295, "learning_rate": 0.0004801715273740491, "loss": 3.1108858585357666, "step": 5843, "token_acc": 0.2847255604452223 }, { "epoch": 3.4256816182937557, "grad_norm": 0.29889483013612345, "learning_rate": 0.00048016206914733, "loss": 3.03969144821167, "step": 5844, "token_acc": 0.29548378133618736 }, { "epoch": 3.4262679566109644, "grad_norm": 0.2070655861874664, "learning_rate": 0.00048015260875854837, "loss": 3.0833580493927, "step": 5845, "token_acc": 0.28904515361986005 }, { "epoch": 3.4268542949281735, "grad_norm": 0.238304551453722, "learning_rate": 0.0004801431462077932, "loss": 3.07564115524292, "step": 5846, "token_acc": 0.29077921020206027 }, { "epoch": 3.4274406332453826, "grad_norm": 0.22460132837599173, "learning_rate": 0.0004801336814951532, "loss": 3.113635301589966, "step": 5847, "token_acc": 0.28577541689009106 }, { "epoch": 3.4280269715625917, "grad_norm": 0.24815266817082246, "learning_rate": 0.0004801242146207174, "loss": 3.064605712890625, "step": 5848, "token_acc": 0.2912406158544344 }, { "epoch": 3.4286133098798004, "grad_norm": 0.23052326688352742, "learning_rate": 0.0004801147455845747, "loss": 3.0937862396240234, "step": 5849, "token_acc": 0.2874501521131393 }, { "epoch": 3.4291996481970095, "grad_norm": 0.23154088181448326, "learning_rate": 0.00048010527438681404, "loss": 3.057910442352295, "step": 5850, "token_acc": 0.2935089420822568 }, { "epoch": 3.4297859865142186, "grad_norm": 0.21041571332952558, "learning_rate": 0.0004800958010275244, "loss": 3.0691113471984863, "step": 5851, "token_acc": 0.29287693443870044 }, { "epoch": 3.4303723248314277, "grad_norm": 0.2429813309734059, "learning_rate": 0.00048008632550679476, "loss": 3.0724568367004395, "step": 5852, "token_acc": 0.29017508820983956 }, { "epoch": 3.430958663148637, "grad_norm": 0.20579764764510902, "learning_rate": 0.00048007684782471415, "loss": 3.0944318771362305, "step": 5853, "token_acc": 0.28809689057955273 }, { "epoch": 3.431545001465846, "grad_norm": 0.24348619575380165, "learning_rate": 0.00048006736798137165, "loss": 3.0369873046875, "step": 5854, "token_acc": 0.2957550881849183 }, { "epoch": 3.432131339783055, "grad_norm": 0.22050794185578645, "learning_rate": 0.00048005788597685616, "loss": 3.0986013412475586, "step": 5855, "token_acc": 0.2857560042293943 }, { "epoch": 3.4327176781002637, "grad_norm": 0.21902994089472705, "learning_rate": 0.00048004840181125686, "loss": 3.0648157596588135, "step": 5856, "token_acc": 0.2913929369393404 }, { "epoch": 3.433304016417473, "grad_norm": 0.22313334039128813, "learning_rate": 0.0004800389154846628, "loss": 3.0666134357452393, "step": 5857, "token_acc": 0.29199047084319396 }, { "epoch": 3.433890354734682, "grad_norm": 0.22607785023685548, "learning_rate": 0.0004800294269971632, "loss": 3.0783205032348633, "step": 5858, "token_acc": 0.2910110786124388 }, { "epoch": 3.434476693051891, "grad_norm": 0.20020795341939357, "learning_rate": 0.000480019936348847, "loss": 3.1080262660980225, "step": 5859, "token_acc": 0.2862674951019127 }, { "epoch": 3.4350630313690997, "grad_norm": 0.2468317868597328, "learning_rate": 0.0004800104435398035, "loss": 3.0623440742492676, "step": 5860, "token_acc": 0.2929462659618325 }, { "epoch": 3.435649369686309, "grad_norm": 0.2721877824932883, "learning_rate": 0.0004800009485701218, "loss": 3.0744192600250244, "step": 5861, "token_acc": 0.29018273582069226 }, { "epoch": 3.436235708003518, "grad_norm": 0.2124572696852251, "learning_rate": 0.00047999145143989114, "loss": 3.107813835144043, "step": 5862, "token_acc": 0.2856151952603471 }, { "epoch": 3.436822046320727, "grad_norm": 0.21054549913549925, "learning_rate": 0.0004799819521492007, "loss": 3.056771755218506, "step": 5863, "token_acc": 0.29344841012999 }, { "epoch": 3.437408384637936, "grad_norm": 0.22786342945410307, "learning_rate": 0.0004799724506981398, "loss": 3.0475807189941406, "step": 5864, "token_acc": 0.2929505097671713 }, { "epoch": 3.4379947229551453, "grad_norm": 0.22615496284597908, "learning_rate": 0.0004799629470867975, "loss": 3.0709400177001953, "step": 5865, "token_acc": 0.29007411259914184 }, { "epoch": 3.438581061272354, "grad_norm": 0.23612413106676294, "learning_rate": 0.00047995344131526323, "loss": 3.0852227210998535, "step": 5866, "token_acc": 0.29076783249025534 }, { "epoch": 3.439167399589563, "grad_norm": 0.24820027035710515, "learning_rate": 0.00047994393338362623, "loss": 3.0904593467712402, "step": 5867, "token_acc": 0.28847243354238566 }, { "epoch": 3.439753737906772, "grad_norm": 0.21806981933183187, "learning_rate": 0.0004799344232919759, "loss": 3.083679676055908, "step": 5868, "token_acc": 0.2912995211761706 }, { "epoch": 3.4403400762239813, "grad_norm": 0.2421915623171751, "learning_rate": 0.00047992491104040144, "loss": 3.073514461517334, "step": 5869, "token_acc": 0.29150059678588175 }, { "epoch": 3.4409264145411904, "grad_norm": 0.3239849721689016, "learning_rate": 0.0004799153966289923, "loss": 3.1023945808410645, "step": 5870, "token_acc": 0.28667478029886073 }, { "epoch": 3.441512752858399, "grad_norm": 0.35198821143481107, "learning_rate": 0.00047990588005783773, "loss": 3.1131088733673096, "step": 5871, "token_acc": 0.28420729102634895 }, { "epoch": 3.442099091175608, "grad_norm": 0.19056926777993347, "learning_rate": 0.00047989636132702733, "loss": 3.073345184326172, "step": 5872, "token_acc": 0.2910340140994849 }, { "epoch": 3.4426854294928173, "grad_norm": 0.290610025011877, "learning_rate": 0.00047988684043665046, "loss": 3.104518175125122, "step": 5873, "token_acc": 0.2852770205574421 }, { "epoch": 3.4432717678100264, "grad_norm": 0.2275268100529865, "learning_rate": 0.00047987731738679634, "loss": 3.047569751739502, "step": 5874, "token_acc": 0.293376609384259 }, { "epoch": 3.4438581061272355, "grad_norm": 0.25889692542266823, "learning_rate": 0.00047986779217755465, "loss": 3.0869345664978027, "step": 5875, "token_acc": 0.28864412810435836 }, { "epoch": 3.4444444444444446, "grad_norm": 0.25180984086815744, "learning_rate": 0.00047985826480901483, "loss": 3.047670364379883, "step": 5876, "token_acc": 0.2938706499454163 }, { "epoch": 3.4450307827616533, "grad_norm": 0.2280147041124647, "learning_rate": 0.0004798487352812663, "loss": 3.070922374725342, "step": 5877, "token_acc": 0.2922873640197658 }, { "epoch": 3.4456171210788624, "grad_norm": 0.24507837215999356, "learning_rate": 0.0004798392035943987, "loss": 3.0666935443878174, "step": 5878, "token_acc": 0.29259729491630826 }, { "epoch": 3.4462034593960715, "grad_norm": 0.23709361327681885, "learning_rate": 0.0004798296697485014, "loss": 3.091721296310425, "step": 5879, "token_acc": 0.288146028851545 }, { "epoch": 3.4467897977132806, "grad_norm": 0.2164678722251038, "learning_rate": 0.0004798201337436642, "loss": 3.080085277557373, "step": 5880, "token_acc": 0.2888464425979173 }, { "epoch": 3.4473761360304898, "grad_norm": 0.229220830230217, "learning_rate": 0.0004798105955799764, "loss": 3.0873990058898926, "step": 5881, "token_acc": 0.28886171431513635 }, { "epoch": 3.4479624743476984, "grad_norm": 0.23964510498453231, "learning_rate": 0.0004798010552575277, "loss": 3.0319862365722656, "step": 5882, "token_acc": 0.2955203538457582 }, { "epoch": 3.4485488126649075, "grad_norm": 0.21136483197286854, "learning_rate": 0.00047979151277640784, "loss": 3.069993019104004, "step": 5883, "token_acc": 0.29241147501320297 }, { "epoch": 3.4491351509821166, "grad_norm": 0.24858832947091292, "learning_rate": 0.00047978196813670636, "loss": 3.1154470443725586, "step": 5884, "token_acc": 0.28566514085320616 }, { "epoch": 3.4497214892993258, "grad_norm": 0.18313302123098088, "learning_rate": 0.0004797724213385129, "loss": 3.1227142810821533, "step": 5885, "token_acc": 0.28354372595960564 }, { "epoch": 3.450307827616535, "grad_norm": 0.22642898635030714, "learning_rate": 0.0004797628723819172, "loss": 3.0627830028533936, "step": 5886, "token_acc": 0.29105929197464475 }, { "epoch": 3.450894165933744, "grad_norm": 0.2353946425463065, "learning_rate": 0.0004797533212670089, "loss": 3.0720362663269043, "step": 5887, "token_acc": 0.28972563079534547 }, { "epoch": 3.4514805042509527, "grad_norm": 0.19565370516202266, "learning_rate": 0.00047974376799387767, "loss": 3.0723142623901367, "step": 5888, "token_acc": 0.2922520159968531 }, { "epoch": 3.4520668425681618, "grad_norm": 0.23873996806366388, "learning_rate": 0.0004797342125626134, "loss": 3.1233506202697754, "step": 5889, "token_acc": 0.28536165956638826 }, { "epoch": 3.452653180885371, "grad_norm": 0.20767168854301352, "learning_rate": 0.00047972465497330574, "loss": 3.09751296043396, "step": 5890, "token_acc": 0.288708119321991 }, { "epoch": 3.45323951920258, "grad_norm": 0.21617893846397562, "learning_rate": 0.0004797150952260445, "loss": 3.0834944248199463, "step": 5891, "token_acc": 0.2883928399019059 }, { "epoch": 3.453825857519789, "grad_norm": 0.24169749189591422, "learning_rate": 0.0004797055333209195, "loss": 3.1185481548309326, "step": 5892, "token_acc": 0.2850378968151446 }, { "epoch": 3.4544121958369978, "grad_norm": 0.19785715165223416, "learning_rate": 0.0004796959692580206, "loss": 3.1080713272094727, "step": 5893, "token_acc": 0.2861913682353985 }, { "epoch": 3.454998534154207, "grad_norm": 0.1978872677431063, "learning_rate": 0.00047968640303743746, "loss": 3.060851573944092, "step": 5894, "token_acc": 0.2914436737719735 }, { "epoch": 3.455584872471416, "grad_norm": 0.1997775882601183, "learning_rate": 0.0004796768346592603, "loss": 3.086789608001709, "step": 5895, "token_acc": 0.2900453394507326 }, { "epoch": 3.456171210788625, "grad_norm": 0.21493413444631596, "learning_rate": 0.0004796672641235785, "loss": 3.110297679901123, "step": 5896, "token_acc": 0.2871619939898398 }, { "epoch": 3.456757549105834, "grad_norm": 0.20435494200115842, "learning_rate": 0.00047965769143048245, "loss": 3.0590176582336426, "step": 5897, "token_acc": 0.2918432450657468 }, { "epoch": 3.4573438874230433, "grad_norm": 0.20248806829715904, "learning_rate": 0.0004796481165800617, "loss": 3.0713210105895996, "step": 5898, "token_acc": 0.2916391208731904 }, { "epoch": 3.457930225740252, "grad_norm": 0.2129810548603642, "learning_rate": 0.00047963853957240645, "loss": 3.107067584991455, "step": 5899, "token_acc": 0.28544293136734084 }, { "epoch": 3.458516564057461, "grad_norm": 0.19620967484533816, "learning_rate": 0.00047962896040760653, "loss": 3.0816421508789062, "step": 5900, "token_acc": 0.2881339540330232 }, { "epoch": 3.45910290237467, "grad_norm": 0.23558472529347352, "learning_rate": 0.0004796193790857519, "loss": 3.1406548023223877, "step": 5901, "token_acc": 0.28139418576319875 }, { "epoch": 3.4596892406918793, "grad_norm": 0.213363007482945, "learning_rate": 0.0004796097956069327, "loss": 3.0917458534240723, "step": 5902, "token_acc": 0.2888978469896272 }, { "epoch": 3.460275579009088, "grad_norm": 0.23480757352748172, "learning_rate": 0.00047960020997123886, "loss": 3.094179630279541, "step": 5903, "token_acc": 0.2880401999775076 }, { "epoch": 3.460861917326297, "grad_norm": 0.2999588220960693, "learning_rate": 0.0004795906221787604, "loss": 3.1163275241851807, "step": 5904, "token_acc": 0.28506473431408774 }, { "epoch": 3.4614482556435062, "grad_norm": 0.38834600505826655, "learning_rate": 0.00047958103222958746, "loss": 3.0935964584350586, "step": 5905, "token_acc": 0.28795436857198076 }, { "epoch": 3.4620345939607153, "grad_norm": 0.2394799030029201, "learning_rate": 0.00047957144012381004, "loss": 3.1024112701416016, "step": 5906, "token_acc": 0.2874212530637876 }, { "epoch": 3.4626209322779244, "grad_norm": 0.2260078154436305, "learning_rate": 0.00047956184586151835, "loss": 3.15973162651062, "step": 5907, "token_acc": 0.27877108843092924 }, { "epoch": 3.4632072705951336, "grad_norm": 0.23691234719081866, "learning_rate": 0.0004795522494428024, "loss": 3.129293918609619, "step": 5908, "token_acc": 0.28255072952064586 }, { "epoch": 3.4637936089123427, "grad_norm": 0.23005547958956088, "learning_rate": 0.00047954265086775245, "loss": 3.0672013759613037, "step": 5909, "token_acc": 0.2889954391299263 }, { "epoch": 3.4643799472295513, "grad_norm": 0.2697468946380799, "learning_rate": 0.00047953305013645855, "loss": 3.0594286918640137, "step": 5910, "token_acc": 0.29314551301245917 }, { "epoch": 3.4649662855467604, "grad_norm": 0.2032225219656816, "learning_rate": 0.000479523447249011, "loss": 3.091585159301758, "step": 5911, "token_acc": 0.2881227152937176 }, { "epoch": 3.4655526238639696, "grad_norm": 0.3188226230622295, "learning_rate": 0.00047951384220549994, "loss": 3.078885078430176, "step": 5912, "token_acc": 0.29143174843935127 }, { "epoch": 3.4661389621811787, "grad_norm": 0.19280524796679138, "learning_rate": 0.0004795042350060156, "loss": 3.1068716049194336, "step": 5913, "token_acc": 0.2850664022933845 }, { "epoch": 3.4667253004983873, "grad_norm": 0.25444860610954023, "learning_rate": 0.00047949462565064817, "loss": 3.1014790534973145, "step": 5914, "token_acc": 0.28729332428601745 }, { "epoch": 3.4673116388155965, "grad_norm": 0.22175254681788797, "learning_rate": 0.00047948501413948806, "loss": 3.067160129547119, "step": 5915, "token_acc": 0.2907565379032874 }, { "epoch": 3.4678979771328056, "grad_norm": 0.19633822875936438, "learning_rate": 0.0004794754004726254, "loss": 3.1334965229034424, "step": 5916, "token_acc": 0.282949222263062 }, { "epoch": 3.4684843154500147, "grad_norm": 0.2528880158320566, "learning_rate": 0.00047946578465015067, "loss": 3.0841922760009766, "step": 5917, "token_acc": 0.2898892110167737 }, { "epoch": 3.469070653767224, "grad_norm": 0.2162911476626947, "learning_rate": 0.000479456166672154, "loss": 3.123838186264038, "step": 5918, "token_acc": 0.2823161294663885 }, { "epoch": 3.469656992084433, "grad_norm": 0.27880606639401173, "learning_rate": 0.0004794465465387259, "loss": 3.05690860748291, "step": 5919, "token_acc": 0.2931890834785443 }, { "epoch": 3.4702433304016416, "grad_norm": 0.20137499945192558, "learning_rate": 0.0004794369242499567, "loss": 3.1275272369384766, "step": 5920, "token_acc": 0.2835038809658099 }, { "epoch": 3.4708296687188507, "grad_norm": 0.27705383580452014, "learning_rate": 0.00047942729980593674, "loss": 3.1256914138793945, "step": 5921, "token_acc": 0.2835555745239308 }, { "epoch": 3.47141600703606, "grad_norm": 0.23354785475975945, "learning_rate": 0.00047941767320675645, "loss": 3.0796656608581543, "step": 5922, "token_acc": 0.29050151710199923 }, { "epoch": 3.472002345353269, "grad_norm": 0.2695762584939313, "learning_rate": 0.0004794080444525063, "loss": 3.0675456523895264, "step": 5923, "token_acc": 0.29118756311712646 }, { "epoch": 3.472588683670478, "grad_norm": 0.28046400587515247, "learning_rate": 0.00047939841354327663, "loss": 3.1155524253845215, "step": 5924, "token_acc": 0.2844096885743833 }, { "epoch": 3.4731750219876867, "grad_norm": 0.25623743361026474, "learning_rate": 0.00047938878047915805, "loss": 3.1065926551818848, "step": 5925, "token_acc": 0.2857889126364972 }, { "epoch": 3.473761360304896, "grad_norm": 0.2607301049000717, "learning_rate": 0.00047937914526024095, "loss": 3.1076412200927734, "step": 5926, "token_acc": 0.28519253780050036 }, { "epoch": 3.474347698622105, "grad_norm": 0.24389432803925373, "learning_rate": 0.00047936950788661595, "loss": 3.10764479637146, "step": 5927, "token_acc": 0.286375068430114 }, { "epoch": 3.474934036939314, "grad_norm": 0.23698449996802792, "learning_rate": 0.0004793598683583734, "loss": 3.0681302547454834, "step": 5928, "token_acc": 0.29139032552479466 }, { "epoch": 3.475520375256523, "grad_norm": 0.23919141595620294, "learning_rate": 0.000479350226675604, "loss": 3.091010093688965, "step": 5929, "token_acc": 0.2887083888833232 }, { "epoch": 3.4761067135737322, "grad_norm": 0.2554510519315503, "learning_rate": 0.00047934058283839823, "loss": 3.067103862762451, "step": 5930, "token_acc": 0.2926132195930343 }, { "epoch": 3.476693051890941, "grad_norm": 0.22115131572193175, "learning_rate": 0.00047933093684684677, "loss": 3.0883805751800537, "step": 5931, "token_acc": 0.288605709159645 }, { "epoch": 3.47727939020815, "grad_norm": 0.24639565109498957, "learning_rate": 0.0004793212887010402, "loss": 3.09421443939209, "step": 5932, "token_acc": 0.28765966732564213 }, { "epoch": 3.477865728525359, "grad_norm": 0.2506120930951755, "learning_rate": 0.00047931163840106905, "loss": 3.107215404510498, "step": 5933, "token_acc": 0.285880593745144 }, { "epoch": 3.4784520668425682, "grad_norm": 0.2414804712930038, "learning_rate": 0.0004793019859470241, "loss": 3.0895485877990723, "step": 5934, "token_acc": 0.28935288251483504 }, { "epoch": 3.4790384051597774, "grad_norm": 0.2425742527155166, "learning_rate": 0.00047929233133899604, "loss": 3.1272435188293457, "step": 5935, "token_acc": 0.2846425013410989 }, { "epoch": 3.479624743476986, "grad_norm": 0.21342719786388123, "learning_rate": 0.00047928267457707544, "loss": 3.039905071258545, "step": 5936, "token_acc": 0.2953030750958212 }, { "epoch": 3.480211081794195, "grad_norm": 0.24285279288429706, "learning_rate": 0.00047927301566135313, "loss": 3.0955562591552734, "step": 5937, "token_acc": 0.28780713191304047 }, { "epoch": 3.4807974201114043, "grad_norm": 0.21792209894276007, "learning_rate": 0.00047926335459191975, "loss": 3.06919002532959, "step": 5938, "token_acc": 0.2916688702495161 }, { "epoch": 3.4813837584286134, "grad_norm": 0.24508291700661203, "learning_rate": 0.0004792536913688661, "loss": 3.0584664344787598, "step": 5939, "token_acc": 0.29375971821153174 }, { "epoch": 3.4819700967458225, "grad_norm": 0.24703601539517928, "learning_rate": 0.0004792440259922829, "loss": 3.082655429840088, "step": 5940, "token_acc": 0.2905120314603374 }, { "epoch": 3.4825564350630316, "grad_norm": 0.23334061865041292, "learning_rate": 0.00047923435846226105, "loss": 3.1030750274658203, "step": 5941, "token_acc": 0.2872049939482543 }, { "epoch": 3.4831427733802403, "grad_norm": 0.2181423836865363, "learning_rate": 0.0004792246887788912, "loss": 3.1213622093200684, "step": 5942, "token_acc": 0.28480210734277245 }, { "epoch": 3.4837291116974494, "grad_norm": 0.22873209863714986, "learning_rate": 0.0004792150169422644, "loss": 3.086028575897217, "step": 5943, "token_acc": 0.2891458099005558 }, { "epoch": 3.4843154500146585, "grad_norm": 0.25394056758167133, "learning_rate": 0.0004792053429524713, "loss": 3.0655152797698975, "step": 5944, "token_acc": 0.29217168982575875 }, { "epoch": 3.4849017883318676, "grad_norm": 0.20447412407690269, "learning_rate": 0.0004791956668096029, "loss": 3.0902397632598877, "step": 5945, "token_acc": 0.2883598023832747 }, { "epoch": 3.4854881266490767, "grad_norm": 0.2794192915241018, "learning_rate": 0.00047918598851375005, "loss": 3.129974603652954, "step": 5946, "token_acc": 0.2836223649190993 }, { "epoch": 3.4860744649662854, "grad_norm": 0.23200072967168187, "learning_rate": 0.0004791763080650037, "loss": 3.0960357189178467, "step": 5947, "token_acc": 0.2884312407581664 }, { "epoch": 3.4866608032834945, "grad_norm": 0.2200670654492003, "learning_rate": 0.00047916662546345474, "loss": 3.0676565170288086, "step": 5948, "token_acc": 0.29082657888041374 }, { "epoch": 3.4872471416007036, "grad_norm": 0.20374680263050843, "learning_rate": 0.00047915694070919414, "loss": 3.086987018585205, "step": 5949, "token_acc": 0.2887206490426005 }, { "epoch": 3.4878334799179127, "grad_norm": 0.2097389169319842, "learning_rate": 0.00047914725380231285, "loss": 3.0346245765686035, "step": 5950, "token_acc": 0.29652972034846986 }, { "epoch": 3.488419818235122, "grad_norm": 0.19290277756024904, "learning_rate": 0.00047913756474290194, "loss": 3.109086036682129, "step": 5951, "token_acc": 0.285141251138058 }, { "epoch": 3.489006156552331, "grad_norm": 0.21263018477641957, "learning_rate": 0.0004791278735310523, "loss": 3.112579822540283, "step": 5952, "token_acc": 0.28584466699805244 }, { "epoch": 3.4895924948695396, "grad_norm": 0.22980915798373552, "learning_rate": 0.0004791181801668551, "loss": 3.0642826557159424, "step": 5953, "token_acc": 0.2917265175333742 }, { "epoch": 3.4901788331867487, "grad_norm": 0.20310021724413638, "learning_rate": 0.00047910848465040136, "loss": 3.0787506103515625, "step": 5954, "token_acc": 0.2926423470473978 }, { "epoch": 3.490765171503958, "grad_norm": 0.23360404622020986, "learning_rate": 0.00047909878698178205, "loss": 3.0644192695617676, "step": 5955, "token_acc": 0.2917618410585524 }, { "epoch": 3.491351509821167, "grad_norm": 0.2350227070102769, "learning_rate": 0.0004790890871610884, "loss": 3.0965120792388916, "step": 5956, "token_acc": 0.28681651925938173 }, { "epoch": 3.4919378481383756, "grad_norm": 0.20336331340612135, "learning_rate": 0.0004790793851884114, "loss": 3.0605881214141846, "step": 5957, "token_acc": 0.29251919861785175 }, { "epoch": 3.4925241864555847, "grad_norm": 0.24261990551067122, "learning_rate": 0.0004790696810638424, "loss": 3.108607053756714, "step": 5958, "token_acc": 0.284672331139934 }, { "epoch": 3.493110524772794, "grad_norm": 0.2657625273659287, "learning_rate": 0.00047905997478747236, "loss": 3.089564800262451, "step": 5959, "token_acc": 0.2888890578239308 }, { "epoch": 3.493696863090003, "grad_norm": 0.247642327934848, "learning_rate": 0.0004790502663593925, "loss": 3.080648899078369, "step": 5960, "token_acc": 0.2886338742743178 }, { "epoch": 3.494283201407212, "grad_norm": 0.21627449388476294, "learning_rate": 0.0004790405557796941, "loss": 3.0909807682037354, "step": 5961, "token_acc": 0.2886746346036609 }, { "epoch": 3.494869539724421, "grad_norm": 0.20143870693636776, "learning_rate": 0.00047903084304846825, "loss": 3.0977392196655273, "step": 5962, "token_acc": 0.28764756469392544 }, { "epoch": 3.49545587804163, "grad_norm": 0.20517791781100495, "learning_rate": 0.00047902112816580625, "loss": 3.0804243087768555, "step": 5963, "token_acc": 0.2889963724304716 }, { "epoch": 3.496042216358839, "grad_norm": 0.19519751032683294, "learning_rate": 0.0004790114111317994, "loss": 3.066699504852295, "step": 5964, "token_acc": 0.2899672883726957 }, { "epoch": 3.496628554676048, "grad_norm": 0.2297208380412637, "learning_rate": 0.0004790016919465389, "loss": 3.045351266860962, "step": 5965, "token_acc": 0.29422974560866116 }, { "epoch": 3.497214892993257, "grad_norm": 0.20817944942754768, "learning_rate": 0.0004789919706101161, "loss": 3.0995283126831055, "step": 5966, "token_acc": 0.28771666054879796 }, { "epoch": 3.4978012313104663, "grad_norm": 0.17984041818737403, "learning_rate": 0.00047898224712262236, "loss": 3.063133716583252, "step": 5967, "token_acc": 0.2925494090377183 }, { "epoch": 3.498387569627675, "grad_norm": 0.292116076963666, "learning_rate": 0.0004789725214841489, "loss": 3.0872721672058105, "step": 5968, "token_acc": 0.28905245215774383 }, { "epoch": 3.498973907944884, "grad_norm": 0.313909267677068, "learning_rate": 0.0004789627936947872, "loss": 3.07171368598938, "step": 5969, "token_acc": 0.2908167612879861 }, { "epoch": 3.499560246262093, "grad_norm": 0.22306740094189756, "learning_rate": 0.00047895306375462854, "loss": 3.0661704540252686, "step": 5970, "token_acc": 0.2908186503042368 }, { "epoch": 3.5001465845793023, "grad_norm": 0.2788788165810863, "learning_rate": 0.00047894333166376434, "loss": 3.081724166870117, "step": 5971, "token_acc": 0.28948743356454376 }, { "epoch": 3.5007329228965114, "grad_norm": 0.2684937393772208, "learning_rate": 0.00047893359742228614, "loss": 3.091601848602295, "step": 5972, "token_acc": 0.2884710213144106 }, { "epoch": 3.5013192612137205, "grad_norm": 0.21539425873877566, "learning_rate": 0.0004789238610302852, "loss": 3.0789403915405273, "step": 5973, "token_acc": 0.2906933514282223 }, { "epoch": 3.5019055995309296, "grad_norm": 0.26179643399527397, "learning_rate": 0.0004789141224878531, "loss": 3.0750370025634766, "step": 5974, "token_acc": 0.2908853410740203 }, { "epoch": 3.5024919378481383, "grad_norm": 0.20841635969450642, "learning_rate": 0.0004789043817950812, "loss": 3.0844740867614746, "step": 5975, "token_acc": 0.28860233826653414 }, { "epoch": 3.5030782761653474, "grad_norm": 0.2320183461037999, "learning_rate": 0.0004788946389520612, "loss": 3.09194016456604, "step": 5976, "token_acc": 0.28934187594568817 }, { "epoch": 3.5036646144825565, "grad_norm": 0.21793289811200264, "learning_rate": 0.00047888489395888446, "loss": 3.0815606117248535, "step": 5977, "token_acc": 0.2894452440597437 }, { "epoch": 3.5042509527997656, "grad_norm": 0.3114209039212514, "learning_rate": 0.00047887514681564257, "loss": 3.128950834274292, "step": 5978, "token_acc": 0.2835602099693795 }, { "epoch": 3.5048372911169743, "grad_norm": 0.2201139633815306, "learning_rate": 0.00047886539752242706, "loss": 3.1177797317504883, "step": 5979, "token_acc": 0.2845294746551507 }, { "epoch": 3.5054236294341834, "grad_norm": 0.2670174859290515, "learning_rate": 0.0004788556460793296, "loss": 3.093773603439331, "step": 5980, "token_acc": 0.2898392374909286 }, { "epoch": 3.5060099677513925, "grad_norm": 0.21361580347317052, "learning_rate": 0.0004788458924864417, "loss": 3.09428071975708, "step": 5981, "token_acc": 0.287651671593041 }, { "epoch": 3.5065963060686016, "grad_norm": 0.21655124083199812, "learning_rate": 0.00047883613674385507, "loss": 3.1337759494781494, "step": 5982, "token_acc": 0.28267563942583335 }, { "epoch": 3.5071826443858107, "grad_norm": 0.21960754628886808, "learning_rate": 0.00047882637885166126, "loss": 3.0978775024414062, "step": 5983, "token_acc": 0.28644988474344746 }, { "epoch": 3.50776898270302, "grad_norm": 0.2254535058164971, "learning_rate": 0.0004788166188099519, "loss": 3.059940814971924, "step": 5984, "token_acc": 0.29226715492402533 }, { "epoch": 3.5083553210202285, "grad_norm": 0.18543314014344411, "learning_rate": 0.0004788068566188188, "loss": 3.06207537651062, "step": 5985, "token_acc": 0.2913921821158164 }, { "epoch": 3.5089416593374376, "grad_norm": 0.24836450388150022, "learning_rate": 0.0004787970922783536, "loss": 3.102248191833496, "step": 5986, "token_acc": 0.28772797336773503 }, { "epoch": 3.5095279976546467, "grad_norm": 0.22735159409742453, "learning_rate": 0.000478787325788648, "loss": 3.103120803833008, "step": 5987, "token_acc": 0.28605212867608026 }, { "epoch": 3.510114335971856, "grad_norm": 0.25862930411752544, "learning_rate": 0.0004787775571497938, "loss": 3.122980833053589, "step": 5988, "token_acc": 0.2819880622634885 }, { "epoch": 3.5107006742890645, "grad_norm": 0.20212926151408436, "learning_rate": 0.00047876778636188273, "loss": 3.1015028953552246, "step": 5989, "token_acc": 0.2868072535177652 }, { "epoch": 3.5112870126062736, "grad_norm": 0.2606634945491059, "learning_rate": 0.0004787580134250066, "loss": 3.0488338470458984, "step": 5990, "token_acc": 0.2939887537986113 }, { "epoch": 3.5118733509234827, "grad_norm": 0.2727368677444671, "learning_rate": 0.0004787482383392571, "loss": 3.0851917266845703, "step": 5991, "token_acc": 0.2894705036785221 }, { "epoch": 3.512459689240692, "grad_norm": 0.21441790378685785, "learning_rate": 0.0004787384611047262, "loss": 3.100041389465332, "step": 5992, "token_acc": 0.28854019312380835 }, { "epoch": 3.513046027557901, "grad_norm": 0.22969573302622853, "learning_rate": 0.00047872868172150573, "loss": 3.073490619659424, "step": 5993, "token_acc": 0.29006336121088006 }, { "epoch": 3.51363236587511, "grad_norm": 0.26748707152484175, "learning_rate": 0.00047871890018968743, "loss": 3.0738022327423096, "step": 5994, "token_acc": 0.29170121801578386 }, { "epoch": 3.514218704192319, "grad_norm": 0.24742014474517413, "learning_rate": 0.0004787091165093633, "loss": 3.08567214012146, "step": 5995, "token_acc": 0.2889967884997706 }, { "epoch": 3.514805042509528, "grad_norm": 0.20967098302264475, "learning_rate": 0.0004786993306806252, "loss": 3.0812015533447266, "step": 5996, "token_acc": 0.2888916464907023 }, { "epoch": 3.515391380826737, "grad_norm": 0.21407514659564247, "learning_rate": 0.0004786895427035651, "loss": 3.1094777584075928, "step": 5997, "token_acc": 0.2846600322979803 }, { "epoch": 3.515977719143946, "grad_norm": 0.20893276152304585, "learning_rate": 0.0004786797525782749, "loss": 3.116135358810425, "step": 5998, "token_acc": 0.2856432731796439 }, { "epoch": 3.516564057461155, "grad_norm": 0.2086591872128816, "learning_rate": 0.00047866996030484653, "loss": 3.0725951194763184, "step": 5999, "token_acc": 0.29233333760755 }, { "epoch": 3.517150395778364, "grad_norm": 0.26255282881530384, "learning_rate": 0.0004786601658833721, "loss": 3.0469374656677246, "step": 6000, "token_acc": 0.2942312352528538 }, { "epoch": 3.517736734095573, "grad_norm": 0.32893867897322204, "learning_rate": 0.0004786503693139435, "loss": 3.1021957397460938, "step": 6001, "token_acc": 0.28532917835040644 }, { "epoch": 3.518323072412782, "grad_norm": 0.2815719010791083, "learning_rate": 0.0004786405705966528, "loss": 3.132190227508545, "step": 6002, "token_acc": 0.28244159880415 }, { "epoch": 3.518909410729991, "grad_norm": 0.1950200050129264, "learning_rate": 0.00047863076973159196, "loss": 3.0931668281555176, "step": 6003, "token_acc": 0.28613224424398415 }, { "epoch": 3.5194957490472003, "grad_norm": 0.23810333904481973, "learning_rate": 0.0004786209667188532, "loss": 3.0766186714172363, "step": 6004, "token_acc": 0.2904505035160872 }, { "epoch": 3.5200820873644094, "grad_norm": 0.20382897109857082, "learning_rate": 0.0004786111615585285, "loss": 3.1412878036499023, "step": 6005, "token_acc": 0.2815263292050653 }, { "epoch": 3.5206684256816185, "grad_norm": 0.25180309420851293, "learning_rate": 0.0004786013542507099, "loss": 3.072093963623047, "step": 6006, "token_acc": 0.2912007941698831 }, { "epoch": 3.521254763998827, "grad_norm": 0.2226163934661916, "learning_rate": 0.0004785915447954898, "loss": 3.035400390625, "step": 6007, "token_acc": 0.2953153027232278 }, { "epoch": 3.5218411023160363, "grad_norm": 0.23056535899622616, "learning_rate": 0.00047858173319296007, "loss": 3.1257331371307373, "step": 6008, "token_acc": 0.281706169090828 }, { "epoch": 3.5224274406332454, "grad_norm": 0.24604349688522686, "learning_rate": 0.000478571919443213, "loss": 3.1137661933898926, "step": 6009, "token_acc": 0.28473933551350206 }, { "epoch": 3.5230137789504545, "grad_norm": 0.22146145992379532, "learning_rate": 0.0004785621035463408, "loss": 3.026855707168579, "step": 6010, "token_acc": 0.29788814741374575 }, { "epoch": 3.523600117267663, "grad_norm": 0.2981028208479258, "learning_rate": 0.00047855228550243553, "loss": 3.076784133911133, "step": 6011, "token_acc": 0.29015349550639474 }, { "epoch": 3.5241864555848723, "grad_norm": 0.20549515572512128, "learning_rate": 0.00047854246531158954, "loss": 3.0817158222198486, "step": 6012, "token_acc": 0.29020446543098205 }, { "epoch": 3.5247727939020814, "grad_norm": 0.2448656895681165, "learning_rate": 0.0004785326429738951, "loss": 3.0994873046875, "step": 6013, "token_acc": 0.2883166425714678 }, { "epoch": 3.5253591322192905, "grad_norm": 0.24045836148315997, "learning_rate": 0.00047852281848944435, "loss": 3.0627567768096924, "step": 6014, "token_acc": 0.2926420598069058 }, { "epoch": 3.5259454705364996, "grad_norm": 0.19414816276974475, "learning_rate": 0.00047851299185832974, "loss": 3.1357312202453613, "step": 6015, "token_acc": 0.28466136645693446 }, { "epoch": 3.5265318088537088, "grad_norm": 0.2100676942092523, "learning_rate": 0.00047850316308064347, "loss": 3.075984477996826, "step": 6016, "token_acc": 0.29138177311445107 }, { "epoch": 3.527118147170918, "grad_norm": 0.1767316226674439, "learning_rate": 0.0004784933321564779, "loss": 3.0914430618286133, "step": 6017, "token_acc": 0.28809369820401304 }, { "epoch": 3.5277044854881265, "grad_norm": 0.23834412801881802, "learning_rate": 0.00047848349908592534, "loss": 3.0830278396606445, "step": 6018, "token_acc": 0.2905995567120669 }, { "epoch": 3.5282908238053357, "grad_norm": 0.20136414791524018, "learning_rate": 0.0004784736638690782, "loss": 3.1203746795654297, "step": 6019, "token_acc": 0.28580708376661834 }, { "epoch": 3.5288771621225448, "grad_norm": 0.21203260072393462, "learning_rate": 0.0004784638265060289, "loss": 3.0485548973083496, "step": 6020, "token_acc": 0.2948570097914195 }, { "epoch": 3.529463500439754, "grad_norm": 0.2539327149376188, "learning_rate": 0.00047845398699686975, "loss": 3.0871853828430176, "step": 6021, "token_acc": 0.2900814998550398 }, { "epoch": 3.5300498387569625, "grad_norm": 0.19771395043935258, "learning_rate": 0.00047844414534169334, "loss": 3.064157247543335, "step": 6022, "token_acc": 0.29237666573054316 }, { "epoch": 3.5306361770741717, "grad_norm": 0.2035848446376272, "learning_rate": 0.0004784343015405919, "loss": 3.1128532886505127, "step": 6023, "token_acc": 0.2862062398417995 }, { "epoch": 3.5312225153913808, "grad_norm": 0.20468539844415362, "learning_rate": 0.0004784244555936581, "loss": 3.040302276611328, "step": 6024, "token_acc": 0.2966674867950604 }, { "epoch": 3.53180885370859, "grad_norm": 0.21116021043571656, "learning_rate": 0.00047841460750098434, "loss": 3.08364200592041, "step": 6025, "token_acc": 0.28990447542148173 }, { "epoch": 3.532395192025799, "grad_norm": 0.25132135319706655, "learning_rate": 0.0004784047572626631, "loss": 3.066924810409546, "step": 6026, "token_acc": 0.2926643445066123 }, { "epoch": 3.532981530343008, "grad_norm": 0.22122588785613362, "learning_rate": 0.0004783949048787869, "loss": 3.09605073928833, "step": 6027, "token_acc": 0.2876989230476744 }, { "epoch": 3.533567868660217, "grad_norm": 0.27596119066460945, "learning_rate": 0.00047838505034944836, "loss": 3.097844123840332, "step": 6028, "token_acc": 0.2858009234917776 }, { "epoch": 3.534154206977426, "grad_norm": 0.26715735946591523, "learning_rate": 0.0004783751936747401, "loss": 3.052086114883423, "step": 6029, "token_acc": 0.29449976380663634 }, { "epoch": 3.534740545294635, "grad_norm": 0.18687202174574064, "learning_rate": 0.0004783653348547545, "loss": 3.0766286849975586, "step": 6030, "token_acc": 0.28957626247043095 }, { "epoch": 3.535326883611844, "grad_norm": 0.24079070917471235, "learning_rate": 0.00047835547388958444, "loss": 3.1003494262695312, "step": 6031, "token_acc": 0.28722589195121445 }, { "epoch": 3.535913221929053, "grad_norm": 0.2170699969772782, "learning_rate": 0.0004783456107793224, "loss": 3.121161937713623, "step": 6032, "token_acc": 0.2838605049842325 }, { "epoch": 3.536499560246262, "grad_norm": 0.19770945047579877, "learning_rate": 0.00047833574552406103, "loss": 3.099660873413086, "step": 6033, "token_acc": 0.2890059989130889 }, { "epoch": 3.537085898563471, "grad_norm": 0.2213738190721832, "learning_rate": 0.0004783258781238929, "loss": 3.057271957397461, "step": 6034, "token_acc": 0.2938741895001046 }, { "epoch": 3.53767223688068, "grad_norm": 0.21195044027559198, "learning_rate": 0.000478316008578911, "loss": 3.1295642852783203, "step": 6035, "token_acc": 0.2822314864740462 }, { "epoch": 3.538258575197889, "grad_norm": 0.21743989145001846, "learning_rate": 0.00047830613688920777, "loss": 3.0409865379333496, "step": 6036, "token_acc": 0.29490492245448413 }, { "epoch": 3.5388449135150983, "grad_norm": 0.19379257134930958, "learning_rate": 0.000478296263054876, "loss": 3.068934917449951, "step": 6037, "token_acc": 0.2911316006445042 }, { "epoch": 3.5394312518323074, "grad_norm": 0.22145731308281166, "learning_rate": 0.0004782863870760085, "loss": 3.071803569793701, "step": 6038, "token_acc": 0.2905575511958491 }, { "epoch": 3.540017590149516, "grad_norm": 0.20561135145440868, "learning_rate": 0.00047827650895269805, "loss": 3.1095962524414062, "step": 6039, "token_acc": 0.2848458498230763 }, { "epoch": 3.5406039284667252, "grad_norm": 0.2558954360046193, "learning_rate": 0.00047826662868503733, "loss": 3.1218907833099365, "step": 6040, "token_acc": 0.2832678649367139 }, { "epoch": 3.5411902667839343, "grad_norm": 0.3125864658981447, "learning_rate": 0.00047825674627311935, "loss": 3.077052354812622, "step": 6041, "token_acc": 0.2916955134820218 }, { "epoch": 3.5417766051011434, "grad_norm": 0.28375200554268565, "learning_rate": 0.0004782468617170367, "loss": 3.0992369651794434, "step": 6042, "token_acc": 0.284857586299709 }, { "epoch": 3.542362943418352, "grad_norm": 0.2416543450716066, "learning_rate": 0.00047823697501688233, "loss": 3.082282304763794, "step": 6043, "token_acc": 0.2887448087063374 }, { "epoch": 3.5429492817355612, "grad_norm": 0.3171979429807752, "learning_rate": 0.00047822708617274923, "loss": 3.097660541534424, "step": 6044, "token_acc": 0.28746801575588987 }, { "epoch": 3.5435356200527703, "grad_norm": 0.21590103521526163, "learning_rate": 0.00047821719518473016, "loss": 3.10398006439209, "step": 6045, "token_acc": 0.2876173542921027 }, { "epoch": 3.5441219583699795, "grad_norm": 0.3520410154364761, "learning_rate": 0.0004782073020529181, "loss": 3.1175670623779297, "step": 6046, "token_acc": 0.28513065393288234 }, { "epoch": 3.5447082966871886, "grad_norm": 0.24730097886728836, "learning_rate": 0.00047819740677740584, "loss": 3.088317632675171, "step": 6047, "token_acc": 0.28929394987952556 }, { "epoch": 3.5452946350043977, "grad_norm": 0.2790158099033525, "learning_rate": 0.0004781875093582866, "loss": 3.119417190551758, "step": 6048, "token_acc": 0.28379942952772896 }, { "epoch": 3.545880973321607, "grad_norm": 0.2170574192007608, "learning_rate": 0.00047817760979565303, "loss": 3.08149790763855, "step": 6049, "token_acc": 0.28855751135879226 }, { "epoch": 3.5464673116388155, "grad_norm": 0.2741517409016626, "learning_rate": 0.0004781677080895984, "loss": 3.031248092651367, "step": 6050, "token_acc": 0.29570952785215177 }, { "epoch": 3.5470536499560246, "grad_norm": 0.21710240619242815, "learning_rate": 0.0004781578042402156, "loss": 3.1196911334991455, "step": 6051, "token_acc": 0.2842616265556659 }, { "epoch": 3.5476399882732337, "grad_norm": 0.24697629356132034, "learning_rate": 0.00047814789824759767, "loss": 3.0917844772338867, "step": 6052, "token_acc": 0.28773133881673224 }, { "epoch": 3.548226326590443, "grad_norm": 0.22666633676878634, "learning_rate": 0.0004781379901118377, "loss": 3.0785088539123535, "step": 6053, "token_acc": 0.2908692464446289 }, { "epoch": 3.5488126649076515, "grad_norm": 0.23028019361754382, "learning_rate": 0.00047812807983302867, "loss": 3.1226372718811035, "step": 6054, "token_acc": 0.28509020430242604 }, { "epoch": 3.5493990032248606, "grad_norm": 0.2090816704759526, "learning_rate": 0.0004781181674112638, "loss": 3.1141867637634277, "step": 6055, "token_acc": 0.2849584501883246 }, { "epoch": 3.5499853415420697, "grad_norm": 0.2202981861422166, "learning_rate": 0.0004781082528466361, "loss": 3.0736582279205322, "step": 6056, "token_acc": 0.289328404028792 }, { "epoch": 3.550571679859279, "grad_norm": 0.22477475895791238, "learning_rate": 0.0004780983361392387, "loss": 3.076233386993408, "step": 6057, "token_acc": 0.2910754538370214 }, { "epoch": 3.551158018176488, "grad_norm": 0.19289651386377385, "learning_rate": 0.00047808841728916486, "loss": 3.066540241241455, "step": 6058, "token_acc": 0.29031376523589436 }, { "epoch": 3.551744356493697, "grad_norm": 0.19333325795202816, "learning_rate": 0.0004780784962965077, "loss": 3.0855627059936523, "step": 6059, "token_acc": 0.2879536457055051 }, { "epoch": 3.552330694810906, "grad_norm": 0.21945646208421482, "learning_rate": 0.0004780685731613604, "loss": 3.0759763717651367, "step": 6060, "token_acc": 0.2916137629985624 }, { "epoch": 3.552917033128115, "grad_norm": 0.2262993380224385, "learning_rate": 0.00047805864788381616, "loss": 3.090543270111084, "step": 6061, "token_acc": 0.289961875089124 }, { "epoch": 3.553503371445324, "grad_norm": 0.21081935189939519, "learning_rate": 0.0004780487204639682, "loss": 3.0963568687438965, "step": 6062, "token_acc": 0.28842421423764747 }, { "epoch": 3.554089709762533, "grad_norm": 0.22710755985761227, "learning_rate": 0.0004780387909019099, "loss": 3.1080727577209473, "step": 6063, "token_acc": 0.286900846560574 }, { "epoch": 3.554676048079742, "grad_norm": 0.23025140142205822, "learning_rate": 0.00047802885919773436, "loss": 3.065800666809082, "step": 6064, "token_acc": 0.29320443026093485 }, { "epoch": 3.555262386396951, "grad_norm": 0.20999208801339014, "learning_rate": 0.000478018925351535, "loss": 3.0565340518951416, "step": 6065, "token_acc": 0.2922210614824063 }, { "epoch": 3.55584872471416, "grad_norm": 0.2106451756440314, "learning_rate": 0.0004780089893634051, "loss": 3.0704643726348877, "step": 6066, "token_acc": 0.28946269996139035 }, { "epoch": 3.556435063031369, "grad_norm": 0.26392526063665517, "learning_rate": 0.000477999051233438, "loss": 3.1443710327148438, "step": 6067, "token_acc": 0.28012039151801915 }, { "epoch": 3.557021401348578, "grad_norm": 0.23660124070894392, "learning_rate": 0.000477989110961727, "loss": 3.116360664367676, "step": 6068, "token_acc": 0.28421414822940166 }, { "epoch": 3.5576077396657872, "grad_norm": 0.19641525986087416, "learning_rate": 0.00047797916854836554, "loss": 3.115983009338379, "step": 6069, "token_acc": 0.28584151680229325 }, { "epoch": 3.5581940779829964, "grad_norm": 0.2016102454667875, "learning_rate": 0.000477969223993447, "loss": 3.071686029434204, "step": 6070, "token_acc": 0.2920063423513642 }, { "epoch": 3.5587804163002055, "grad_norm": 0.21777419084493524, "learning_rate": 0.00047795927729706484, "loss": 3.059999465942383, "step": 6071, "token_acc": 0.2915543114105494 }, { "epoch": 3.559366754617414, "grad_norm": 0.20466529287014912, "learning_rate": 0.0004779493284593124, "loss": 3.0969653129577637, "step": 6072, "token_acc": 0.2859841130210356 }, { "epoch": 3.5599530929346233, "grad_norm": 0.17594473084978354, "learning_rate": 0.00047793937748028323, "loss": 3.0789592266082764, "step": 6073, "token_acc": 0.2904278691295059 }, { "epoch": 3.5605394312518324, "grad_norm": 0.17291870875002943, "learning_rate": 0.0004779294243600707, "loss": 3.085874557495117, "step": 6074, "token_acc": 0.2911383545196599 }, { "epoch": 3.5611257695690415, "grad_norm": 0.21431402420415846, "learning_rate": 0.0004779194690987684, "loss": 3.1280360221862793, "step": 6075, "token_acc": 0.28440405057248197 }, { "epoch": 3.56171210788625, "grad_norm": 0.31055969854273807, "learning_rate": 0.0004779095116964698, "loss": 3.108879566192627, "step": 6076, "token_acc": 0.2872302628141491 }, { "epoch": 3.5622984462034593, "grad_norm": 0.3371314806592172, "learning_rate": 0.0004778995521532685, "loss": 3.0597105026245117, "step": 6077, "token_acc": 0.29391647501432533 }, { "epoch": 3.5628847845206684, "grad_norm": 0.2138190712878982, "learning_rate": 0.00047788959046925797, "loss": 3.072995662689209, "step": 6078, "token_acc": 0.2924031555587679 }, { "epoch": 3.5634711228378775, "grad_norm": 0.27984404009076747, "learning_rate": 0.0004778796266445318, "loss": 3.067516565322876, "step": 6079, "token_acc": 0.29301465598262694 }, { "epoch": 3.5640574611550866, "grad_norm": 0.21684213419178763, "learning_rate": 0.0004778696606791836, "loss": 3.0667333602905273, "step": 6080, "token_acc": 0.2919727116208624 }, { "epoch": 3.5646437994722957, "grad_norm": 0.25388039885578323, "learning_rate": 0.00047785969257330705, "loss": 3.0914976596832275, "step": 6081, "token_acc": 0.28904869098977615 }, { "epoch": 3.565230137789505, "grad_norm": 0.21544744718239908, "learning_rate": 0.00047784972232699566, "loss": 3.0481085777282715, "step": 6082, "token_acc": 0.29330518189754 }, { "epoch": 3.5658164761067135, "grad_norm": 0.2299557294622156, "learning_rate": 0.00047783974994034325, "loss": 3.0937414169311523, "step": 6083, "token_acc": 0.2875131191868431 }, { "epoch": 3.5664028144239226, "grad_norm": 0.27472314604687254, "learning_rate": 0.0004778297754134433, "loss": 3.084862232208252, "step": 6084, "token_acc": 0.2889697467946522 }, { "epoch": 3.5669891527411317, "grad_norm": 0.19171067473319317, "learning_rate": 0.0004778197987463897, "loss": 3.110020637512207, "step": 6085, "token_acc": 0.2859867323418398 }, { "epoch": 3.567575491058341, "grad_norm": 0.2535443857362273, "learning_rate": 0.0004778098199392761, "loss": 3.110361099243164, "step": 6086, "token_acc": 0.28642661224137184 }, { "epoch": 3.5681618293755495, "grad_norm": 0.22376798027829048, "learning_rate": 0.00047779983899219614, "loss": 3.1167047023773193, "step": 6087, "token_acc": 0.28659363805442645 }, { "epoch": 3.5687481676927586, "grad_norm": 0.2529065217246973, "learning_rate": 0.0004777898559052437, "loss": 3.0566165447235107, "step": 6088, "token_acc": 0.29346751375287944 }, { "epoch": 3.5693345060099677, "grad_norm": 0.21075350874348966, "learning_rate": 0.0004777798706785125, "loss": 3.100754737854004, "step": 6089, "token_acc": 0.2861667542236739 }, { "epoch": 3.569920844327177, "grad_norm": 0.2709613704428958, "learning_rate": 0.0004777698833120964, "loss": 3.0707015991210938, "step": 6090, "token_acc": 0.29262913612122887 }, { "epoch": 3.570507182644386, "grad_norm": 0.2343618137578625, "learning_rate": 0.0004777598938060891, "loss": 3.0947189331054688, "step": 6091, "token_acc": 0.28833164856320836 }, { "epoch": 3.571093520961595, "grad_norm": 0.2241432259566997, "learning_rate": 0.00047774990216058454, "loss": 3.0784573554992676, "step": 6092, "token_acc": 0.2894586818333891 }, { "epoch": 3.5716798592788037, "grad_norm": 0.19551955466333742, "learning_rate": 0.00047773990837567657, "loss": 3.0498709678649902, "step": 6093, "token_acc": 0.2927040473990049 }, { "epoch": 3.572266197596013, "grad_norm": 0.19587532929007806, "learning_rate": 0.00047772991245145904, "loss": 3.069504737854004, "step": 6094, "token_acc": 0.2911250003270274 }, { "epoch": 3.572852535913222, "grad_norm": 0.21101221017589122, "learning_rate": 0.00047771991438802573, "loss": 3.0905089378356934, "step": 6095, "token_acc": 0.287147384482961 }, { "epoch": 3.573438874230431, "grad_norm": 0.20236658811978933, "learning_rate": 0.0004777099141854708, "loss": 3.0461857318878174, "step": 6096, "token_acc": 0.29494617560779374 }, { "epoch": 3.5740252125476397, "grad_norm": 0.24114969280795062, "learning_rate": 0.00047769991184388804, "loss": 3.0389723777770996, "step": 6097, "token_acc": 0.29420878881310536 }, { "epoch": 3.574611550864849, "grad_norm": 0.22186159189140797, "learning_rate": 0.0004776899073633715, "loss": 3.0373387336730957, "step": 6098, "token_acc": 0.29637481950097094 }, { "epoch": 3.575197889182058, "grad_norm": 0.19430216809701573, "learning_rate": 0.000477679900744015, "loss": 3.0670900344848633, "step": 6099, "token_acc": 0.29157500013002347 }, { "epoch": 3.575784227499267, "grad_norm": 0.2610316997840719, "learning_rate": 0.0004776698919859127, "loss": 3.100808620452881, "step": 6100, "token_acc": 0.28657782032805584 }, { "epoch": 3.576370565816476, "grad_norm": 0.2535076239290258, "learning_rate": 0.00047765988108915857, "loss": 3.0593724250793457, "step": 6101, "token_acc": 0.2918306525021828 }, { "epoch": 3.5769569041336853, "grad_norm": 0.2291889075268578, "learning_rate": 0.0004776498680538466, "loss": 3.079282283782959, "step": 6102, "token_acc": 0.2887910681525556 }, { "epoch": 3.5775432424508944, "grad_norm": 0.2098851794151694, "learning_rate": 0.00047763985288007085, "loss": 3.0537474155426025, "step": 6103, "token_acc": 0.29461693496951297 }, { "epoch": 3.578129580768103, "grad_norm": 0.2514736907085398, "learning_rate": 0.0004776298355679255, "loss": 3.081895351409912, "step": 6104, "token_acc": 0.28882546429296113 }, { "epoch": 3.578715919085312, "grad_norm": 0.25639747013447084, "learning_rate": 0.0004776198161175045, "loss": 3.072284460067749, "step": 6105, "token_acc": 0.2907978283118113 }, { "epoch": 3.5793022574025213, "grad_norm": 0.20468551853002448, "learning_rate": 0.0004776097945289021, "loss": 3.0495762825012207, "step": 6106, "token_acc": 0.29452453689986 }, { "epoch": 3.5798885957197304, "grad_norm": 0.2622304404380167, "learning_rate": 0.0004775997708022124, "loss": 3.127688407897949, "step": 6107, "token_acc": 0.28256165053640897 }, { "epoch": 3.580474934036939, "grad_norm": 0.22849655173306108, "learning_rate": 0.00047758974493752947, "loss": 3.138761520385742, "step": 6108, "token_acc": 0.282469409637541 }, { "epoch": 3.581061272354148, "grad_norm": 0.21289464254768897, "learning_rate": 0.0004775797169349476, "loss": 3.0690762996673584, "step": 6109, "token_acc": 0.29035518793976217 }, { "epoch": 3.5816476106713573, "grad_norm": 0.2503781919365284, "learning_rate": 0.000477569686794561, "loss": 3.0683746337890625, "step": 6110, "token_acc": 0.2913076559254766 }, { "epoch": 3.5822339489885664, "grad_norm": 0.2061015191146862, "learning_rate": 0.0004775596545164639, "loss": 3.0513505935668945, "step": 6111, "token_acc": 0.29413052605549816 }, { "epoch": 3.5828202873057755, "grad_norm": 0.22973891102742997, "learning_rate": 0.0004775496201007504, "loss": 3.1333398818969727, "step": 6112, "token_acc": 0.2838682111164955 }, { "epoch": 3.5834066256229846, "grad_norm": 0.201200575524959, "learning_rate": 0.0004775395835475148, "loss": 3.1161303520202637, "step": 6113, "token_acc": 0.28424492476523894 }, { "epoch": 3.5839929639401937, "grad_norm": 0.19444768381835464, "learning_rate": 0.0004775295448568515, "loss": 3.083249807357788, "step": 6114, "token_acc": 0.290126555049654 }, { "epoch": 3.5845793022574024, "grad_norm": 0.17201432056093507, "learning_rate": 0.0004775195040288547, "loss": 3.0807690620422363, "step": 6115, "token_acc": 0.29083650073565476 }, { "epoch": 3.5851656405746115, "grad_norm": 0.22920058568961252, "learning_rate": 0.0004775094610636188, "loss": 3.0926599502563477, "step": 6116, "token_acc": 0.28719250904270033 }, { "epoch": 3.5857519788918206, "grad_norm": 0.24767227692114227, "learning_rate": 0.00047749941596123805, "loss": 3.095942735671997, "step": 6117, "token_acc": 0.2886343798767424 }, { "epoch": 3.5863383172090297, "grad_norm": 0.20748647290726602, "learning_rate": 0.0004774893687218068, "loss": 3.0598459243774414, "step": 6118, "token_acc": 0.29318196818691833 }, { "epoch": 3.5869246555262384, "grad_norm": 0.21878671420519874, "learning_rate": 0.0004774793193454196, "loss": 3.071408271789551, "step": 6119, "token_acc": 0.29111665422500593 }, { "epoch": 3.5875109938434475, "grad_norm": 0.289051300361958, "learning_rate": 0.0004774692678321706, "loss": 3.114727258682251, "step": 6120, "token_acc": 0.28526619612914667 }, { "epoch": 3.5880973321606566, "grad_norm": 0.2238484436034807, "learning_rate": 0.00047745921418215434, "loss": 3.053147792816162, "step": 6121, "token_acc": 0.2937151580410811 }, { "epoch": 3.5886836704778657, "grad_norm": 0.18646507233837395, "learning_rate": 0.0004774491583954653, "loss": 3.032620906829834, "step": 6122, "token_acc": 0.29550179257692605 }, { "epoch": 3.589270008795075, "grad_norm": 0.23580325117038076, "learning_rate": 0.00047743910047219797, "loss": 3.0558462142944336, "step": 6123, "token_acc": 0.2919754589535647 }, { "epoch": 3.589856347112284, "grad_norm": 0.19364107394716706, "learning_rate": 0.00047742904041244674, "loss": 3.0633974075317383, "step": 6124, "token_acc": 0.29236689097835195 }, { "epoch": 3.590442685429493, "grad_norm": 0.23520470132089974, "learning_rate": 0.0004774189782163061, "loss": 3.0803332328796387, "step": 6125, "token_acc": 0.28942831764452365 }, { "epoch": 3.5910290237467017, "grad_norm": 0.26290736887822436, "learning_rate": 0.0004774089138838706, "loss": 3.0983376502990723, "step": 6126, "token_acc": 0.28632994659701205 }, { "epoch": 3.591615362063911, "grad_norm": 0.2237605436060088, "learning_rate": 0.00047739884741523485, "loss": 3.0694193840026855, "step": 6127, "token_acc": 0.291640866873065 }, { "epoch": 3.59220170038112, "grad_norm": 0.20886323923382272, "learning_rate": 0.00047738877881049334, "loss": 3.101034164428711, "step": 6128, "token_acc": 0.2860221885025124 }, { "epoch": 3.592788038698329, "grad_norm": 0.217531252974018, "learning_rate": 0.0004773787080697406, "loss": 3.056234836578369, "step": 6129, "token_acc": 0.2934061575236296 }, { "epoch": 3.5933743770155377, "grad_norm": 0.20772401907988092, "learning_rate": 0.0004773686351930714, "loss": 3.085545063018799, "step": 6130, "token_acc": 0.2884021027909629 }, { "epoch": 3.593960715332747, "grad_norm": 0.21938291358499384, "learning_rate": 0.0004773585601805802, "loss": 3.072904109954834, "step": 6131, "token_acc": 0.28970801948450087 }, { "epoch": 3.594547053649956, "grad_norm": 0.23890405225059724, "learning_rate": 0.00047734848303236167, "loss": 3.0692057609558105, "step": 6132, "token_acc": 0.2916914125148148 }, { "epoch": 3.595133391967165, "grad_norm": 0.2395632705844236, "learning_rate": 0.00047733840374851054, "loss": 3.0858116149902344, "step": 6133, "token_acc": 0.2886483827327512 }, { "epoch": 3.595719730284374, "grad_norm": 0.19938532101372497, "learning_rate": 0.0004773283223291214, "loss": 3.102911949157715, "step": 6134, "token_acc": 0.2872610776176701 }, { "epoch": 3.5963060686015833, "grad_norm": 0.1860892143999292, "learning_rate": 0.0004773182387742891, "loss": 3.078582763671875, "step": 6135, "token_acc": 0.2893563354163819 }, { "epoch": 3.5968924069187924, "grad_norm": 0.19354341727959734, "learning_rate": 0.0004773081530841082, "loss": 3.1193454265594482, "step": 6136, "token_acc": 0.2844470666962947 }, { "epoch": 3.597478745236001, "grad_norm": 0.19924702050923326, "learning_rate": 0.0004772980652586735, "loss": 3.091604709625244, "step": 6137, "token_acc": 0.2889168533336067 }, { "epoch": 3.59806508355321, "grad_norm": 0.22927768235522245, "learning_rate": 0.0004772879752980798, "loss": 3.0477333068847656, "step": 6138, "token_acc": 0.2941999606271981 }, { "epoch": 3.5986514218704193, "grad_norm": 0.2979808594007924, "learning_rate": 0.0004772778832024217, "loss": 3.0915913581848145, "step": 6139, "token_acc": 0.2887546965116186 }, { "epoch": 3.5992377601876284, "grad_norm": 0.28665369194031165, "learning_rate": 0.00047726778897179433, "loss": 3.1050682067871094, "step": 6140, "token_acc": 0.28550109482014047 }, { "epoch": 3.599824098504837, "grad_norm": 0.2478521673189605, "learning_rate": 0.0004772576926062923, "loss": 3.1017208099365234, "step": 6141, "token_acc": 0.28742015546848426 }, { "epoch": 3.600410436822046, "grad_norm": 0.2847732604249733, "learning_rate": 0.00047724759410601035, "loss": 3.0851950645446777, "step": 6142, "token_acc": 0.28896857474799775 }, { "epoch": 3.6009967751392553, "grad_norm": 0.22798102669016376, "learning_rate": 0.00047723749347104363, "loss": 3.096829414367676, "step": 6143, "token_acc": 0.28860728040750405 }, { "epoch": 3.6015831134564644, "grad_norm": 0.21713068733581306, "learning_rate": 0.0004772273907014868, "loss": 3.0462703704833984, "step": 6144, "token_acc": 0.29586516340446306 }, { "epoch": 3.6021694517736735, "grad_norm": 0.2268065175670834, "learning_rate": 0.00047721728579743487, "loss": 3.0793564319610596, "step": 6145, "token_acc": 0.28781232814947444 }, { "epoch": 3.6027557900908826, "grad_norm": 0.23139116845101704, "learning_rate": 0.0004772071787589826, "loss": 3.0989623069763184, "step": 6146, "token_acc": 0.28692971158724584 }, { "epoch": 3.6033421284080913, "grad_norm": 0.21611950213225842, "learning_rate": 0.00047719706958622513, "loss": 3.048210382461548, "step": 6147, "token_acc": 0.2940842841126426 }, { "epoch": 3.6039284667253004, "grad_norm": 0.21748068594075223, "learning_rate": 0.0004771869582792573, "loss": 3.084216594696045, "step": 6148, "token_acc": 0.289161380243295 }, { "epoch": 3.6045148050425095, "grad_norm": 0.19099557933798814, "learning_rate": 0.00047717684483817425, "loss": 3.1019816398620605, "step": 6149, "token_acc": 0.2862859384076139 }, { "epoch": 3.6051011433597187, "grad_norm": 0.21046883733817803, "learning_rate": 0.0004771667292630708, "loss": 3.1363589763641357, "step": 6150, "token_acc": 0.2820389541841518 }, { "epoch": 3.6056874816769273, "grad_norm": 0.24918852904935335, "learning_rate": 0.000477156611554042, "loss": 3.1044440269470215, "step": 6151, "token_acc": 0.2866876974942667 }, { "epoch": 3.6062738199941364, "grad_norm": 0.2871318398068339, "learning_rate": 0.00047714649171118296, "loss": 3.1065168380737305, "step": 6152, "token_acc": 0.2857030679471374 }, { "epoch": 3.6068601583113455, "grad_norm": 0.20726122903419147, "learning_rate": 0.0004771363697345887, "loss": 3.0898056030273438, "step": 6153, "token_acc": 0.2879864226578202 }, { "epoch": 3.6074464966285547, "grad_norm": 0.22849655198521618, "learning_rate": 0.0004771262456243544, "loss": 3.1361660957336426, "step": 6154, "token_acc": 0.2797342594149294 }, { "epoch": 3.6080328349457638, "grad_norm": 0.21458153035986993, "learning_rate": 0.00047711611938057496, "loss": 3.073190212249756, "step": 6155, "token_acc": 0.28994228802540767 }, { "epoch": 3.608619173262973, "grad_norm": 0.25545589844118616, "learning_rate": 0.00047710599100334565, "loss": 3.1131415367126465, "step": 6156, "token_acc": 0.2839673387252815 }, { "epoch": 3.609205511580182, "grad_norm": 0.24726736855435918, "learning_rate": 0.00047709586049276165, "loss": 3.084441900253296, "step": 6157, "token_acc": 0.2895341914828877 }, { "epoch": 3.6097918498973907, "grad_norm": 0.1984414527088492, "learning_rate": 0.000477085727848918, "loss": 3.0631606578826904, "step": 6158, "token_acc": 0.29389436714845335 }, { "epoch": 3.6103781882145998, "grad_norm": 0.1898311563909263, "learning_rate": 0.00047707559307191, "loss": 3.0513486862182617, "step": 6159, "token_acc": 0.2939433535160204 }, { "epoch": 3.610964526531809, "grad_norm": 0.2344889160096546, "learning_rate": 0.00047706545616183274, "loss": 3.0630717277526855, "step": 6160, "token_acc": 0.2917871916892165 }, { "epoch": 3.611550864849018, "grad_norm": 0.2014382254291346, "learning_rate": 0.0004770553171187815, "loss": 3.034069299697876, "step": 6161, "token_acc": 0.29681578856899565 }, { "epoch": 3.6121372031662267, "grad_norm": 0.19323502557645017, "learning_rate": 0.0004770451759428515, "loss": 3.1025657653808594, "step": 6162, "token_acc": 0.28746280373633487 }, { "epoch": 3.6127235414834358, "grad_norm": 0.24961574108924697, "learning_rate": 0.00047703503263413807, "loss": 3.084105968475342, "step": 6163, "token_acc": 0.28898550572529097 }, { "epoch": 3.613309879800645, "grad_norm": 0.2861568362886386, "learning_rate": 0.0004770248871927364, "loss": 3.052492380142212, "step": 6164, "token_acc": 0.29388593242303646 }, { "epoch": 3.613896218117854, "grad_norm": 0.19815537713231182, "learning_rate": 0.0004770147396187418, "loss": 3.1211977005004883, "step": 6165, "token_acc": 0.2837363404456261 }, { "epoch": 3.614482556435063, "grad_norm": 0.23209739560927015, "learning_rate": 0.00047700458991224964, "loss": 3.161158561706543, "step": 6166, "token_acc": 0.27751919614454357 }, { "epoch": 3.615068894752272, "grad_norm": 0.2911482814418456, "learning_rate": 0.0004769944380733553, "loss": 3.0570719242095947, "step": 6167, "token_acc": 0.2914293673800319 }, { "epoch": 3.6156552330694813, "grad_norm": 0.22721726031400136, "learning_rate": 0.0004769842841021541, "loss": 3.066655158996582, "step": 6168, "token_acc": 0.2915528568117965 }, { "epoch": 3.61624157138669, "grad_norm": 0.19723944276794295, "learning_rate": 0.0004769741279987414, "loss": 3.106085777282715, "step": 6169, "token_acc": 0.28563930594589565 }, { "epoch": 3.616827909703899, "grad_norm": 0.2043437274005556, "learning_rate": 0.00047696396976321256, "loss": 3.059593677520752, "step": 6170, "token_acc": 0.2935196984750961 }, { "epoch": 3.6174142480211082, "grad_norm": 0.18683702186265808, "learning_rate": 0.00047695380939566314, "loss": 3.0783352851867676, "step": 6171, "token_acc": 0.29276142879092976 }, { "epoch": 3.6180005863383173, "grad_norm": 0.24316790097054164, "learning_rate": 0.00047694364689618844, "loss": 3.096801280975342, "step": 6172, "token_acc": 0.2881470695905161 }, { "epoch": 3.618586924655526, "grad_norm": 0.21890597200081613, "learning_rate": 0.00047693348226488403, "loss": 3.060990333557129, "step": 6173, "token_acc": 0.2916454143993911 }, { "epoch": 3.619173262972735, "grad_norm": 0.21315895460354048, "learning_rate": 0.0004769233155018454, "loss": 3.0711026191711426, "step": 6174, "token_acc": 0.29160285246324935 }, { "epoch": 3.6197596012899442, "grad_norm": 0.2509137142499466, "learning_rate": 0.0004769131466071679, "loss": 3.1608777046203613, "step": 6175, "token_acc": 0.2791856200116948 }, { "epoch": 3.6203459396071533, "grad_norm": 0.21963180563037735, "learning_rate": 0.0004769029755809472, "loss": 3.0996413230895996, "step": 6176, "token_acc": 0.28584191115185864 }, { "epoch": 3.6209322779243625, "grad_norm": 0.22359001954251972, "learning_rate": 0.00047689280242327884, "loss": 3.0976853370666504, "step": 6177, "token_acc": 0.2881094049655651 }, { "epoch": 3.6215186162415716, "grad_norm": 0.22446769676673142, "learning_rate": 0.0004768826271342583, "loss": 3.077608346939087, "step": 6178, "token_acc": 0.29003724633300376 }, { "epoch": 3.6221049545587807, "grad_norm": 0.23685189025065137, "learning_rate": 0.0004768724497139812, "loss": 3.0701286792755127, "step": 6179, "token_acc": 0.2918935206885717 }, { "epoch": 3.6226912928759893, "grad_norm": 0.23679083876027873, "learning_rate": 0.00047686227016254315, "loss": 3.0648012161254883, "step": 6180, "token_acc": 0.29228603260883945 }, { "epoch": 3.6232776311931985, "grad_norm": 0.24234513773312819, "learning_rate": 0.0004768520884800398, "loss": 3.1013264656066895, "step": 6181, "token_acc": 0.2870063169672341 }, { "epoch": 3.6238639695104076, "grad_norm": 0.25733685944573786, "learning_rate": 0.00047684190466656674, "loss": 3.118103504180908, "step": 6182, "token_acc": 0.283923168668202 }, { "epoch": 3.6244503078276167, "grad_norm": 0.2248902928697395, "learning_rate": 0.00047683171872221964, "loss": 3.0534701347351074, "step": 6183, "token_acc": 0.29327650883413986 }, { "epoch": 3.6250366461448253, "grad_norm": 0.27249207301391154, "learning_rate": 0.0004768215306470943, "loss": 3.0688135623931885, "step": 6184, "token_acc": 0.2924603344081659 }, { "epoch": 3.6256229844620345, "grad_norm": 0.304496002357318, "learning_rate": 0.0004768113404412862, "loss": 3.0284557342529297, "step": 6185, "token_acc": 0.2968628611594096 }, { "epoch": 3.6262093227792436, "grad_norm": 0.18877088115358923, "learning_rate": 0.00047680114810489126, "loss": 3.0931167602539062, "step": 6186, "token_acc": 0.28689559548820254 }, { "epoch": 3.6267956610964527, "grad_norm": 0.22025622833470732, "learning_rate": 0.00047679095363800515, "loss": 3.062897205352783, "step": 6187, "token_acc": 0.2929161372299873 }, { "epoch": 3.627381999413662, "grad_norm": 0.20737391241847097, "learning_rate": 0.0004767807570407236, "loss": 3.0727691650390625, "step": 6188, "token_acc": 0.28956141456141454 }, { "epoch": 3.627968337730871, "grad_norm": 0.2209541145496801, "learning_rate": 0.0004767705583131424, "loss": 3.083425521850586, "step": 6189, "token_acc": 0.28936211580317456 }, { "epoch": 3.62855467604808, "grad_norm": 0.24295002611734384, "learning_rate": 0.00047676035745535753, "loss": 3.095484733581543, "step": 6190, "token_acc": 0.2877602432931928 }, { "epoch": 3.6291410143652887, "grad_norm": 0.18887464587121233, "learning_rate": 0.0004767501544674645, "loss": 3.090249538421631, "step": 6191, "token_acc": 0.2878626069541656 }, { "epoch": 3.629727352682498, "grad_norm": 0.2739051507571642, "learning_rate": 0.0004767399493495594, "loss": 3.0976619720458984, "step": 6192, "token_acc": 0.2878212243919472 }, { "epoch": 3.630313690999707, "grad_norm": 0.19223149731864148, "learning_rate": 0.00047672974210173803, "loss": 3.0885186195373535, "step": 6193, "token_acc": 0.2885616825972057 }, { "epoch": 3.630900029316916, "grad_norm": 0.2316482804956627, "learning_rate": 0.0004767195327240962, "loss": 3.097896099090576, "step": 6194, "token_acc": 0.28610354923211256 }, { "epoch": 3.6314863676341247, "grad_norm": 0.2178982831809425, "learning_rate": 0.00047670932121672994, "loss": 3.0861058235168457, "step": 6195, "token_acc": 0.2888716146243579 }, { "epoch": 3.632072705951334, "grad_norm": 0.21499441368527678, "learning_rate": 0.000476699107579735, "loss": 3.0841989517211914, "step": 6196, "token_acc": 0.28859822135683333 }, { "epoch": 3.632659044268543, "grad_norm": 0.20166165837469957, "learning_rate": 0.0004766888918132075, "loss": 3.1198253631591797, "step": 6197, "token_acc": 0.28530043467832994 }, { "epoch": 3.633245382585752, "grad_norm": 0.25243003728941044, "learning_rate": 0.00047667867391724326, "loss": 3.0948214530944824, "step": 6198, "token_acc": 0.2888231001990224 }, { "epoch": 3.633831720902961, "grad_norm": 0.2597949203094148, "learning_rate": 0.00047666845389193844, "loss": 3.131648302078247, "step": 6199, "token_acc": 0.2822694007305449 }, { "epoch": 3.6344180592201702, "grad_norm": 0.21529718017280955, "learning_rate": 0.00047665823173738886, "loss": 3.0767598152160645, "step": 6200, "token_acc": 0.29077363418817675 }, { "epoch": 3.635004397537379, "grad_norm": 0.24940255725691335, "learning_rate": 0.00047664800745369064, "loss": 3.0652365684509277, "step": 6201, "token_acc": 0.29327032760704985 }, { "epoch": 3.635590735854588, "grad_norm": 0.20055836852612532, "learning_rate": 0.0004766377810409398, "loss": 3.1221132278442383, "step": 6202, "token_acc": 0.28106990745784055 }, { "epoch": 3.636177074171797, "grad_norm": 0.22678682101731504, "learning_rate": 0.0004766275524992324, "loss": 3.0877137184143066, "step": 6203, "token_acc": 0.28937667472457174 }, { "epoch": 3.6367634124890063, "grad_norm": 0.2754439934437512, "learning_rate": 0.0004766173218286645, "loss": 3.1052157878875732, "step": 6204, "token_acc": 0.28661094723041625 }, { "epoch": 3.637349750806215, "grad_norm": 0.21171118960532365, "learning_rate": 0.00047660708902933223, "loss": 3.093491315841675, "step": 6205, "token_acc": 0.28907416658429796 }, { "epoch": 3.637936089123424, "grad_norm": 0.25315282061976235, "learning_rate": 0.0004765968541013318, "loss": 3.090362548828125, "step": 6206, "token_acc": 0.28790232201490334 }, { "epoch": 3.638522427440633, "grad_norm": 0.24392693684960898, "learning_rate": 0.0004765866170447592, "loss": 3.1106064319610596, "step": 6207, "token_acc": 0.28598399687955406 }, { "epoch": 3.6391087657578423, "grad_norm": 0.18222006037600721, "learning_rate": 0.0004765763778597107, "loss": 3.0544707775115967, "step": 6208, "token_acc": 0.2932226615208466 }, { "epoch": 3.6396951040750514, "grad_norm": 0.22079661216213745, "learning_rate": 0.0004765661365462824, "loss": 3.1136183738708496, "step": 6209, "token_acc": 0.28499264938736313 }, { "epoch": 3.6402814423922605, "grad_norm": 0.17803388429075456, "learning_rate": 0.0004765558931045706, "loss": 3.0879428386688232, "step": 6210, "token_acc": 0.288806224267627 }, { "epoch": 3.6408677807094696, "grad_norm": 0.22646868712896623, "learning_rate": 0.00047654564753467143, "loss": 3.090172290802002, "step": 6211, "token_acc": 0.2889591964846202 }, { "epoch": 3.6414541190266783, "grad_norm": 0.19908643396019987, "learning_rate": 0.00047653539983668117, "loss": 3.084719181060791, "step": 6212, "token_acc": 0.2874309579038247 }, { "epoch": 3.6420404573438874, "grad_norm": 0.19252639591798976, "learning_rate": 0.00047652515001069613, "loss": 3.044753313064575, "step": 6213, "token_acc": 0.2956261458450256 }, { "epoch": 3.6426267956610965, "grad_norm": 0.22536987238600592, "learning_rate": 0.00047651489805681255, "loss": 3.090442180633545, "step": 6214, "token_acc": 0.28893456069757123 }, { "epoch": 3.6432131339783056, "grad_norm": 0.21112075705873906, "learning_rate": 0.00047650464397512674, "loss": 3.1197757720947266, "step": 6215, "token_acc": 0.28439293245410063 }, { "epoch": 3.6437994722955143, "grad_norm": 0.21088413497887928, "learning_rate": 0.00047649438776573494, "loss": 3.111454963684082, "step": 6216, "token_acc": 0.28508773094956635 }, { "epoch": 3.6443858106127234, "grad_norm": 0.2338630637272971, "learning_rate": 0.00047648412942873363, "loss": 3.0553455352783203, "step": 6217, "token_acc": 0.2925149152852453 }, { "epoch": 3.6449721489299325, "grad_norm": 0.2527369762607655, "learning_rate": 0.00047647386896421915, "loss": 3.0808944702148438, "step": 6218, "token_acc": 0.29092652573045624 }, { "epoch": 3.6455584872471416, "grad_norm": 0.24080196818436336, "learning_rate": 0.0004764636063722878, "loss": 3.115847110748291, "step": 6219, "token_acc": 0.28540999199038847 }, { "epoch": 3.6461448255643507, "grad_norm": 0.2047574716932012, "learning_rate": 0.000476453341653036, "loss": 3.0454177856445312, "step": 6220, "token_acc": 0.29516576871448796 }, { "epoch": 3.64673116388156, "grad_norm": 0.21725096741971558, "learning_rate": 0.00047644307480656026, "loss": 3.1163296699523926, "step": 6221, "token_acc": 0.28523514324579713 }, { "epoch": 3.647317502198769, "grad_norm": 0.26251639460341036, "learning_rate": 0.00047643280583295694, "loss": 3.0693156719207764, "step": 6222, "token_acc": 0.29153100527749404 }, { "epoch": 3.6479038405159776, "grad_norm": 0.3069609613672709, "learning_rate": 0.00047642253473232255, "loss": 3.0870721340179443, "step": 6223, "token_acc": 0.28760876442583444 }, { "epoch": 3.6484901788331867, "grad_norm": 0.294005411916087, "learning_rate": 0.00047641226150475357, "loss": 3.087651252746582, "step": 6224, "token_acc": 0.28842587111795115 }, { "epoch": 3.649076517150396, "grad_norm": 0.2063518943121151, "learning_rate": 0.0004764019861503465, "loss": 3.0559561252593994, "step": 6225, "token_acc": 0.2925019755871138 }, { "epoch": 3.649662855467605, "grad_norm": 0.28362668986195483, "learning_rate": 0.0004763917086691978, "loss": 3.0708789825439453, "step": 6226, "token_acc": 0.29053344258506403 }, { "epoch": 3.6502491937848136, "grad_norm": 0.30496142217560235, "learning_rate": 0.00047638142906140403, "loss": 3.1105563640594482, "step": 6227, "token_acc": 0.285320124212932 }, { "epoch": 3.6508355321020227, "grad_norm": 0.22565563687211634, "learning_rate": 0.00047637114732706186, "loss": 3.0886902809143066, "step": 6228, "token_acc": 0.28961035509249416 }, { "epoch": 3.651421870419232, "grad_norm": 0.18822991769045821, "learning_rate": 0.00047636086346626783, "loss": 3.027623176574707, "step": 6229, "token_acc": 0.2981633530059425 }, { "epoch": 3.652008208736441, "grad_norm": 0.23974902357576536, "learning_rate": 0.0004763505774791184, "loss": 3.1236419677734375, "step": 6230, "token_acc": 0.2841485020938473 }, { "epoch": 3.65259454705365, "grad_norm": 0.21181182226205186, "learning_rate": 0.0004763402893657104, "loss": 3.094027042388916, "step": 6231, "token_acc": 0.28781114371087463 }, { "epoch": 3.653180885370859, "grad_norm": 0.26768787816589984, "learning_rate": 0.00047632999912614033, "loss": 3.114208221435547, "step": 6232, "token_acc": 0.2845583187072404 }, { "epoch": 3.6537672236880683, "grad_norm": 0.2890370560076679, "learning_rate": 0.00047631970676050494, "loss": 3.102231979370117, "step": 6233, "token_acc": 0.28813852721621974 }, { "epoch": 3.654353562005277, "grad_norm": 0.22943613032760116, "learning_rate": 0.00047630941226890083, "loss": 3.092461109161377, "step": 6234, "token_acc": 0.28880832964294173 }, { "epoch": 3.654939900322486, "grad_norm": 0.2472866075783113, "learning_rate": 0.0004762991156514248, "loss": 3.107983350753784, "step": 6235, "token_acc": 0.28601362646334627 }, { "epoch": 3.655526238639695, "grad_norm": 0.3046132094720925, "learning_rate": 0.0004762888169081735, "loss": 3.151815414428711, "step": 6236, "token_acc": 0.2816000657336145 }, { "epoch": 3.6561125769569043, "grad_norm": 0.24462702280019508, "learning_rate": 0.0004762785160392437, "loss": 3.0504300594329834, "step": 6237, "token_acc": 0.2928942364049131 }, { "epoch": 3.656698915274113, "grad_norm": 0.25652431828056904, "learning_rate": 0.0004762682130447321, "loss": 3.0745091438293457, "step": 6238, "token_acc": 0.2898492358529533 }, { "epoch": 3.657285253591322, "grad_norm": 0.2925398133522588, "learning_rate": 0.00047625790792473556, "loss": 3.0625357627868652, "step": 6239, "token_acc": 0.2931582539557371 }, { "epoch": 3.657871591908531, "grad_norm": 0.2369871157977074, "learning_rate": 0.0004762476006793509, "loss": 3.048499345779419, "step": 6240, "token_acc": 0.2934155913370694 }, { "epoch": 3.6584579302257403, "grad_norm": 0.23558119800398816, "learning_rate": 0.0004762372913086749, "loss": 3.099194288253784, "step": 6241, "token_acc": 0.2885295956271731 }, { "epoch": 3.6590442685429494, "grad_norm": 0.23242298993515245, "learning_rate": 0.0004762269798128044, "loss": 3.086749315261841, "step": 6242, "token_acc": 0.28819099846184715 }, { "epoch": 3.6596306068601585, "grad_norm": 0.2658606701628042, "learning_rate": 0.00047621666619183624, "loss": 3.108912229537964, "step": 6243, "token_acc": 0.2866268014539078 }, { "epoch": 3.660216945177367, "grad_norm": 0.20165901670828362, "learning_rate": 0.0004762063504458673, "loss": 3.088257312774658, "step": 6244, "token_acc": 0.28996089647055845 }, { "epoch": 3.6608032834945763, "grad_norm": 0.21964186599107574, "learning_rate": 0.0004761960325749946, "loss": 3.076192855834961, "step": 6245, "token_acc": 0.2898234789520489 }, { "epoch": 3.6613896218117854, "grad_norm": 0.1983458800912109, "learning_rate": 0.00047618571257931495, "loss": 3.0533924102783203, "step": 6246, "token_acc": 0.29369114925855117 }, { "epoch": 3.6619759601289945, "grad_norm": 0.2233203065309482, "learning_rate": 0.00047617539045892535, "loss": 3.121384620666504, "step": 6247, "token_acc": 0.28413885590704774 }, { "epoch": 3.6625622984462036, "grad_norm": 0.2615998448871885, "learning_rate": 0.00047616506621392266, "loss": 3.098362445831299, "step": 6248, "token_acc": 0.28746183604919645 }, { "epoch": 3.6631486367634123, "grad_norm": 0.21387160312354464, "learning_rate": 0.00047615473984440396, "loss": 3.1184983253479004, "step": 6249, "token_acc": 0.2831478582172352 }, { "epoch": 3.6637349750806214, "grad_norm": 0.22303363105330418, "learning_rate": 0.0004761444113504663, "loss": 3.048077344894409, "step": 6250, "token_acc": 0.2935597682991048 }, { "epoch": 3.6643213133978305, "grad_norm": 0.18200892173872896, "learning_rate": 0.00047613408073220653, "loss": 3.102721691131592, "step": 6251, "token_acc": 0.28736738284373836 }, { "epoch": 3.6649076517150396, "grad_norm": 0.24624543516368888, "learning_rate": 0.0004761237479897218, "loss": 3.066155433654785, "step": 6252, "token_acc": 0.29176031562144644 }, { "epoch": 3.6654939900322487, "grad_norm": 0.2536493957241849, "learning_rate": 0.0004761134131231092, "loss": 3.112647294998169, "step": 6253, "token_acc": 0.2854767511089418 }, { "epoch": 3.666080328349458, "grad_norm": 0.21241022032794418, "learning_rate": 0.00047610307613246575, "loss": 3.0763943195343018, "step": 6254, "token_acc": 0.29107534151082864 }, { "epoch": 3.6666666666666665, "grad_norm": 0.23362248433459845, "learning_rate": 0.00047609273701788857, "loss": 3.0946872234344482, "step": 6255, "token_acc": 0.2873993673765168 }, { "epoch": 3.6672530049838756, "grad_norm": 0.21881371002325514, "learning_rate": 0.0004760823957794748, "loss": 3.0392847061157227, "step": 6256, "token_acc": 0.2959683198785911 }, { "epoch": 3.6678393433010847, "grad_norm": 0.1899075179973683, "learning_rate": 0.00047607205241732164, "loss": 3.080958843231201, "step": 6257, "token_acc": 0.2889517285327386 }, { "epoch": 3.668425681618294, "grad_norm": 0.20643395051812038, "learning_rate": 0.0004760617069315261, "loss": 3.106217384338379, "step": 6258, "token_acc": 0.2872088770303944 }, { "epoch": 3.6690120199355025, "grad_norm": 0.20507092832618257, "learning_rate": 0.00047605135932218546, "loss": 3.0806360244750977, "step": 6259, "token_acc": 0.2894196357856811 }, { "epoch": 3.6695983582527116, "grad_norm": 0.17100317733580359, "learning_rate": 0.0004760410095893969, "loss": 3.0370893478393555, "step": 6260, "token_acc": 0.29525727775034194 }, { "epoch": 3.6701846965699207, "grad_norm": 0.20698216792346336, "learning_rate": 0.00047603065773325773, "loss": 3.1035714149475098, "step": 6261, "token_acc": 0.2866073456134352 }, { "epoch": 3.67077103488713, "grad_norm": 0.20582652474136504, "learning_rate": 0.000476020303753865, "loss": 3.119476556777954, "step": 6262, "token_acc": 0.28628544935508743 }, { "epoch": 3.671357373204339, "grad_norm": 0.22798684487631307, "learning_rate": 0.0004760099476513161, "loss": 3.105592966079712, "step": 6263, "token_acc": 0.28439875332929865 }, { "epoch": 3.671943711521548, "grad_norm": 0.20290258734572972, "learning_rate": 0.0004759995894257084, "loss": 3.038015365600586, "step": 6264, "token_acc": 0.29646191177561376 }, { "epoch": 3.672530049838757, "grad_norm": 0.2443659943765557, "learning_rate": 0.000475989229077139, "loss": 3.0509090423583984, "step": 6265, "token_acc": 0.29322042655484765 }, { "epoch": 3.673116388155966, "grad_norm": 0.24525846602370088, "learning_rate": 0.00047597886660570533, "loss": 3.0995731353759766, "step": 6266, "token_acc": 0.2867245708012726 }, { "epoch": 3.673702726473175, "grad_norm": 0.19504477506266588, "learning_rate": 0.0004759685020115047, "loss": 3.1039326190948486, "step": 6267, "token_acc": 0.2870681362617757 }, { "epoch": 3.674289064790384, "grad_norm": 0.21086440987066887, "learning_rate": 0.0004759581352946345, "loss": 3.068416118621826, "step": 6268, "token_acc": 0.2908765454228721 }, { "epoch": 3.674875403107593, "grad_norm": 0.21902279984431536, "learning_rate": 0.0004759477664551921, "loss": 3.0437686443328857, "step": 6269, "token_acc": 0.29653908933917555 }, { "epoch": 3.675461741424802, "grad_norm": 0.18404177002741884, "learning_rate": 0.00047593739549327496, "loss": 3.0717930793762207, "step": 6270, "token_acc": 0.2906671721777623 }, { "epoch": 3.676048079742011, "grad_norm": 0.18090798508197553, "learning_rate": 0.00047592702240898045, "loss": 3.117943286895752, "step": 6271, "token_acc": 0.2842980246654179 }, { "epoch": 3.67663441805922, "grad_norm": 0.23377700145256902, "learning_rate": 0.00047591664720240593, "loss": 3.101257562637329, "step": 6272, "token_acc": 0.2874908117189961 }, { "epoch": 3.677220756376429, "grad_norm": 0.22691808097231375, "learning_rate": 0.00047590626987364893, "loss": 3.1025478839874268, "step": 6273, "token_acc": 0.28647035745073807 }, { "epoch": 3.6778070946936383, "grad_norm": 0.22916477012381561, "learning_rate": 0.000475895890422807, "loss": 3.062260627746582, "step": 6274, "token_acc": 0.2907589368035616 }, { "epoch": 3.6783934330108474, "grad_norm": 0.23994674069313526, "learning_rate": 0.0004758855088499775, "loss": 3.0703911781311035, "step": 6275, "token_acc": 0.2906898722535551 }, { "epoch": 3.6789797713280565, "grad_norm": 0.2379897405007281, "learning_rate": 0.0004758751251552581, "loss": 3.0789647102355957, "step": 6276, "token_acc": 0.28963149570008484 }, { "epoch": 3.679566109645265, "grad_norm": 0.19730749848966164, "learning_rate": 0.0004758647393387463, "loss": 3.0743894577026367, "step": 6277, "token_acc": 0.29058828674251874 }, { "epoch": 3.6801524479624743, "grad_norm": 0.22335253272557967, "learning_rate": 0.0004758543514005396, "loss": 3.0588574409484863, "step": 6278, "token_acc": 0.29139148378609203 }, { "epoch": 3.6807387862796834, "grad_norm": 0.23368121401051467, "learning_rate": 0.0004758439613407355, "loss": 3.053405523300171, "step": 6279, "token_acc": 0.29412319544396603 }, { "epoch": 3.6813251245968925, "grad_norm": 0.22202402098337143, "learning_rate": 0.0004758335691594318, "loss": 3.0849609375, "step": 6280, "token_acc": 0.2893810896258389 }, { "epoch": 3.681911462914101, "grad_norm": 0.20135257202952184, "learning_rate": 0.0004758231748567261, "loss": 3.0706231594085693, "step": 6281, "token_acc": 0.29125868961961293 }, { "epoch": 3.6824978012313103, "grad_norm": 0.21289311571550834, "learning_rate": 0.00047581277843271585, "loss": 3.054109573364258, "step": 6282, "token_acc": 0.29351748529243354 }, { "epoch": 3.6830841395485194, "grad_norm": 0.2573553424762857, "learning_rate": 0.0004758023798874989, "loss": 3.0890324115753174, "step": 6283, "token_acc": 0.28887107913372434 }, { "epoch": 3.6836704778657285, "grad_norm": 0.2283806847761128, "learning_rate": 0.0004757919792211729, "loss": 3.1533498764038086, "step": 6284, "token_acc": 0.27820880661347586 }, { "epoch": 3.6842568161829377, "grad_norm": 0.20540577972612378, "learning_rate": 0.00047578157643383544, "loss": 3.075993537902832, "step": 6285, "token_acc": 0.2906262835035839 }, { "epoch": 3.6848431545001468, "grad_norm": 0.22375276760162746, "learning_rate": 0.00047577117152558437, "loss": 3.0647780895233154, "step": 6286, "token_acc": 0.2925636988979915 }, { "epoch": 3.685429492817356, "grad_norm": 0.21415046372869917, "learning_rate": 0.0004757607644965173, "loss": 3.0991854667663574, "step": 6287, "token_acc": 0.2854994673232099 }, { "epoch": 3.6860158311345645, "grad_norm": 0.21465970960962477, "learning_rate": 0.0004757503553467321, "loss": 3.0805742740631104, "step": 6288, "token_acc": 0.2893275300614402 }, { "epoch": 3.6866021694517737, "grad_norm": 0.21714624925360393, "learning_rate": 0.00047573994407632653, "loss": 3.0888969898223877, "step": 6289, "token_acc": 0.2888452482192804 }, { "epoch": 3.6871885077689828, "grad_norm": 0.34475094524051425, "learning_rate": 0.0004757295306853984, "loss": 3.130868434906006, "step": 6290, "token_acc": 0.28242955288108407 }, { "epoch": 3.687774846086192, "grad_norm": 0.32778502380039076, "learning_rate": 0.0004757191151740454, "loss": 3.0563433170318604, "step": 6291, "token_acc": 0.292867452051083 }, { "epoch": 3.6883611844034006, "grad_norm": 0.23288229488820736, "learning_rate": 0.0004757086975423656, "loss": 3.0520567893981934, "step": 6292, "token_acc": 0.2950680778847275 }, { "epoch": 3.6889475227206097, "grad_norm": 0.2983543937554026, "learning_rate": 0.00047569827779045664, "loss": 3.070394992828369, "step": 6293, "token_acc": 0.2911868819395137 }, { "epoch": 3.6895338610378188, "grad_norm": 0.34117789944213417, "learning_rate": 0.0004756878559184166, "loss": 3.057326316833496, "step": 6294, "token_acc": 0.29340623332872967 }, { "epoch": 3.690120199355028, "grad_norm": 0.23148547060191585, "learning_rate": 0.00047567743192634314, "loss": 3.100137233734131, "step": 6295, "token_acc": 0.287123777614479 }, { "epoch": 3.690706537672237, "grad_norm": 0.28121143603649723, "learning_rate": 0.0004756670058143344, "loss": 3.065199136734009, "step": 6296, "token_acc": 0.29168741445610374 }, { "epoch": 3.691292875989446, "grad_norm": 0.22718486945526592, "learning_rate": 0.00047565657758248816, "loss": 3.0887603759765625, "step": 6297, "token_acc": 0.2873286165611868 }, { "epoch": 3.6918792143066548, "grad_norm": 0.2506051681794216, "learning_rate": 0.00047564614723090247, "loss": 3.0363707542419434, "step": 6298, "token_acc": 0.2942847230133496 }, { "epoch": 3.692465552623864, "grad_norm": 0.21521561727648317, "learning_rate": 0.00047563571475967535, "loss": 3.105945110321045, "step": 6299, "token_acc": 0.28972472547412337 }, { "epoch": 3.693051890941073, "grad_norm": 0.21361156627561806, "learning_rate": 0.0004756252801689047, "loss": 3.061645984649658, "step": 6300, "token_acc": 0.29263675220190993 }, { "epoch": 3.693638229258282, "grad_norm": 0.2069030764472745, "learning_rate": 0.00047561484345868854, "loss": 3.0578322410583496, "step": 6301, "token_acc": 0.29353489200884036 }, { "epoch": 3.6942245675754912, "grad_norm": 0.20670603616125363, "learning_rate": 0.00047560440462912504, "loss": 3.099064826965332, "step": 6302, "token_acc": 0.28871987818757244 }, { "epoch": 3.6948109058927, "grad_norm": 0.21841739846028618, "learning_rate": 0.00047559396368031205, "loss": 3.078824281692505, "step": 6303, "token_acc": 0.2903975589354999 }, { "epoch": 3.695397244209909, "grad_norm": 0.19856069888976535, "learning_rate": 0.00047558352061234785, "loss": 3.140575885772705, "step": 6304, "token_acc": 0.2827258443598966 }, { "epoch": 3.695983582527118, "grad_norm": 0.20165550675336996, "learning_rate": 0.0004755730754253304, "loss": 3.0617616176605225, "step": 6305, "token_acc": 0.29107232207647954 }, { "epoch": 3.6965699208443272, "grad_norm": 0.20947075345087968, "learning_rate": 0.0004755626281193579, "loss": 3.060178279876709, "step": 6306, "token_acc": 0.2916798309447722 }, { "epoch": 3.6971562591615363, "grad_norm": 0.19413655033577182, "learning_rate": 0.00047555217869452843, "loss": 3.1074752807617188, "step": 6307, "token_acc": 0.2858410441624947 }, { "epoch": 3.6977425974787455, "grad_norm": 0.20651578958433153, "learning_rate": 0.0004755417271509402, "loss": 3.1317760944366455, "step": 6308, "token_acc": 0.2822121026941752 }, { "epoch": 3.698328935795954, "grad_norm": 0.1851037953116679, "learning_rate": 0.0004755312734886914, "loss": 3.0649638175964355, "step": 6309, "token_acc": 0.29192491363543993 }, { "epoch": 3.6989152741131632, "grad_norm": 0.21068140323826287, "learning_rate": 0.00047552081770788014, "loss": 3.0676498413085938, "step": 6310, "token_acc": 0.29133634355906485 }, { "epoch": 3.6995016124303723, "grad_norm": 0.22738852852724492, "learning_rate": 0.0004755103598086047, "loss": 3.07861590385437, "step": 6311, "token_acc": 0.29074357232592235 }, { "epoch": 3.7000879507475815, "grad_norm": 0.17778649446636097, "learning_rate": 0.0004754998997909633, "loss": 3.0292582511901855, "step": 6312, "token_acc": 0.2948299685770333 }, { "epoch": 3.70067428906479, "grad_norm": 0.2146787775697283, "learning_rate": 0.0004754894376550542, "loss": 3.072861433029175, "step": 6313, "token_acc": 0.2900795934975487 }, { "epoch": 3.7012606273819992, "grad_norm": 0.2664391773091337, "learning_rate": 0.0004754789734009757, "loss": 3.096806764602661, "step": 6314, "token_acc": 0.2867016180820937 }, { "epoch": 3.7018469656992083, "grad_norm": 0.25718888422964237, "learning_rate": 0.0004754685070288261, "loss": 3.047502040863037, "step": 6315, "token_acc": 0.2929279636146373 }, { "epoch": 3.7024333040164175, "grad_norm": 0.20189826627025353, "learning_rate": 0.0004754580385387037, "loss": 3.112776279449463, "step": 6316, "token_acc": 0.28554291602372517 }, { "epoch": 3.7030196423336266, "grad_norm": 0.22236618456791737, "learning_rate": 0.00047544756793070674, "loss": 3.084738254547119, "step": 6317, "token_acc": 0.28979864578309356 }, { "epoch": 3.7036059806508357, "grad_norm": 0.2724813211451575, "learning_rate": 0.00047543709520493383, "loss": 3.090397834777832, "step": 6318, "token_acc": 0.28971236408241335 }, { "epoch": 3.704192318968045, "grad_norm": 0.2551393406451765, "learning_rate": 0.0004754266203614831, "loss": 3.0775043964385986, "step": 6319, "token_acc": 0.2916023043233322 }, { "epoch": 3.7047786572852535, "grad_norm": 0.22539730988524811, "learning_rate": 0.000475416143400453, "loss": 3.0729165077209473, "step": 6320, "token_acc": 0.29112627801915136 }, { "epoch": 3.7053649956024626, "grad_norm": 0.28555473781419266, "learning_rate": 0.0004754056643219421, "loss": 3.109708309173584, "step": 6321, "token_acc": 0.28676166073065096 }, { "epoch": 3.7059513339196717, "grad_norm": 0.1966779139163532, "learning_rate": 0.00047539518312604865, "loss": 3.096301555633545, "step": 6322, "token_acc": 0.2870080579072935 }, { "epoch": 3.706537672236881, "grad_norm": 0.2932845820030136, "learning_rate": 0.00047538469981287115, "loss": 3.041316509246826, "step": 6323, "token_acc": 0.2964478087561542 }, { "epoch": 3.7071240105540895, "grad_norm": 0.28659920298395736, "learning_rate": 0.00047537421438250814, "loss": 3.0733954906463623, "step": 6324, "token_acc": 0.29084740434116246 }, { "epoch": 3.7077103488712986, "grad_norm": 0.22756561576942325, "learning_rate": 0.00047536372683505815, "loss": 3.0740504264831543, "step": 6325, "token_acc": 0.2897523999370508 }, { "epoch": 3.7082966871885077, "grad_norm": 0.3631635284043102, "learning_rate": 0.0004753532371706195, "loss": 3.0869903564453125, "step": 6326, "token_acc": 0.2899041188030017 }, { "epoch": 3.708883025505717, "grad_norm": 0.22212766138999201, "learning_rate": 0.0004753427453892909, "loss": 3.0890285968780518, "step": 6327, "token_acc": 0.28805227409690076 }, { "epoch": 3.709469363822926, "grad_norm": 0.238277483012778, "learning_rate": 0.0004753322514911709, "loss": 3.1144447326660156, "step": 6328, "token_acc": 0.2848909809402926 }, { "epoch": 3.710055702140135, "grad_norm": 0.21968159059481188, "learning_rate": 0.000475321755476358, "loss": 3.0980639457702637, "step": 6329, "token_acc": 0.28712004781420764 }, { "epoch": 3.710642040457344, "grad_norm": 0.21928506862340844, "learning_rate": 0.00047531125734495087, "loss": 3.0754799842834473, "step": 6330, "token_acc": 0.2914571151375305 }, { "epoch": 3.711228378774553, "grad_norm": 0.19981257691269322, "learning_rate": 0.000475300757097048, "loss": 3.059030532836914, "step": 6331, "token_acc": 0.29247898094667985 }, { "epoch": 3.711814717091762, "grad_norm": 0.21537483668662177, "learning_rate": 0.00047529025473274825, "loss": 3.0732533931732178, "step": 6332, "token_acc": 0.2909539894569835 }, { "epoch": 3.712401055408971, "grad_norm": 0.1978371814252081, "learning_rate": 0.00047527975025214996, "loss": 3.1028800010681152, "step": 6333, "token_acc": 0.285731436460772 }, { "epoch": 3.71298739372618, "grad_norm": 0.23446813288548501, "learning_rate": 0.0004752692436553522, "loss": 3.0716171264648438, "step": 6334, "token_acc": 0.290743820872282 }, { "epoch": 3.713573732043389, "grad_norm": 0.2336705396491335, "learning_rate": 0.0004752587349424533, "loss": 3.0415682792663574, "step": 6335, "token_acc": 0.2934679407564221 }, { "epoch": 3.714160070360598, "grad_norm": 0.2182677270496905, "learning_rate": 0.00047524822411355215, "loss": 3.0925354957580566, "step": 6336, "token_acc": 0.28842309001191085 }, { "epoch": 3.714746408677807, "grad_norm": 0.22765144769728646, "learning_rate": 0.00047523771116874747, "loss": 3.054443359375, "step": 6337, "token_acc": 0.2943773349450056 }, { "epoch": 3.715332746995016, "grad_norm": 0.19958336277078395, "learning_rate": 0.000475227196108138, "loss": 3.1000285148620605, "step": 6338, "token_acc": 0.2850800267310533 }, { "epoch": 3.7159190853122253, "grad_norm": 0.22876066559832609, "learning_rate": 0.00047521667893182253, "loss": 3.144841432571411, "step": 6339, "token_acc": 0.28191783512241003 }, { "epoch": 3.7165054236294344, "grad_norm": 0.22160308200784715, "learning_rate": 0.00047520615963989987, "loss": 3.0947482585906982, "step": 6340, "token_acc": 0.2872743775180933 }, { "epoch": 3.7170917619466435, "grad_norm": 0.19663828098623984, "learning_rate": 0.00047519563823246875, "loss": 3.066298007965088, "step": 6341, "token_acc": 0.292177660782492 }, { "epoch": 3.717678100263852, "grad_norm": 0.20398305968807984, "learning_rate": 0.0004751851147096281, "loss": 3.0842764377593994, "step": 6342, "token_acc": 0.29026055841805126 }, { "epoch": 3.7182644385810613, "grad_norm": 0.1896376488946728, "learning_rate": 0.0004751745890714767, "loss": 3.115405797958374, "step": 6343, "token_acc": 0.2853894346444416 }, { "epoch": 3.7188507768982704, "grad_norm": 0.23692828993406395, "learning_rate": 0.00047516406131811354, "loss": 3.128922939300537, "step": 6344, "token_acc": 0.2841535617649423 }, { "epoch": 3.7194371152154795, "grad_norm": 0.22687820994633073, "learning_rate": 0.00047515353144963736, "loss": 3.0918853282928467, "step": 6345, "token_acc": 0.28732471748766936 }, { "epoch": 3.720023453532688, "grad_norm": 0.20137883907480006, "learning_rate": 0.00047514299946614717, "loss": 3.0686206817626953, "step": 6346, "token_acc": 0.29174380634906505 }, { "epoch": 3.7206097918498973, "grad_norm": 0.17200122758836944, "learning_rate": 0.0004751324653677419, "loss": 3.0734782218933105, "step": 6347, "token_acc": 0.29060088224669683 }, { "epoch": 3.7211961301671064, "grad_norm": 0.19134925356395815, "learning_rate": 0.00047512192915452053, "loss": 3.045711040496826, "step": 6348, "token_acc": 0.29364700620928313 }, { "epoch": 3.7217824684843155, "grad_norm": 0.18758235668477113, "learning_rate": 0.00047511139082658194, "loss": 3.0867111682891846, "step": 6349, "token_acc": 0.2886962329338933 }, { "epoch": 3.7223688068015246, "grad_norm": 0.20260956495466897, "learning_rate": 0.0004751008503840252, "loss": 3.1001393795013428, "step": 6350, "token_acc": 0.28853601460642375 }, { "epoch": 3.7229551451187337, "grad_norm": 0.22310748227684707, "learning_rate": 0.00047509030782694927, "loss": 3.0518672466278076, "step": 6351, "token_acc": 0.29270070522449254 }, { "epoch": 3.7235414834359424, "grad_norm": 0.23294080017392524, "learning_rate": 0.0004750797631554532, "loss": 3.143362522125244, "step": 6352, "token_acc": 0.2823193673710059 }, { "epoch": 3.7241278217531515, "grad_norm": 0.23264506010627564, "learning_rate": 0.0004750692163696361, "loss": 3.128628730773926, "step": 6353, "token_acc": 0.28241240821765917 }, { "epoch": 3.7247141600703606, "grad_norm": 0.24198188370330811, "learning_rate": 0.0004750586674695969, "loss": 3.1108243465423584, "step": 6354, "token_acc": 0.28610885752820137 }, { "epoch": 3.7253004983875697, "grad_norm": 0.3137847620825557, "learning_rate": 0.00047504811645543486, "loss": 3.104112148284912, "step": 6355, "token_acc": 0.28617102487784507 }, { "epoch": 3.7258868367047784, "grad_norm": 0.3303062147803285, "learning_rate": 0.0004750375633272491, "loss": 3.0763165950775146, "step": 6356, "token_acc": 0.2890334387123154 }, { "epoch": 3.7264731750219875, "grad_norm": 0.19696538558008614, "learning_rate": 0.00047502700808513856, "loss": 3.057522773742676, "step": 6357, "token_acc": 0.2935974087356371 }, { "epoch": 3.7270595133391966, "grad_norm": 0.2997035596768689, "learning_rate": 0.00047501645072920253, "loss": 3.1123204231262207, "step": 6358, "token_acc": 0.28498380553175073 }, { "epoch": 3.7276458516564057, "grad_norm": 0.22792941995398375, "learning_rate": 0.0004750058912595402, "loss": 3.084667921066284, "step": 6359, "token_acc": 0.28957525942776235 }, { "epoch": 3.728232189973615, "grad_norm": 0.23187680012319573, "learning_rate": 0.0004749953296762507, "loss": 3.1029927730560303, "step": 6360, "token_acc": 0.2879277559101331 }, { "epoch": 3.728818528290824, "grad_norm": 0.2599375000107801, "learning_rate": 0.00047498476597943317, "loss": 3.0861122608184814, "step": 6361, "token_acc": 0.2884346557283376 }, { "epoch": 3.729404866608033, "grad_norm": 0.2452764840818546, "learning_rate": 0.0004749742001691871, "loss": 3.088015556335449, "step": 6362, "token_acc": 0.2898343796018472 }, { "epoch": 3.7299912049252417, "grad_norm": 0.2363777536075646, "learning_rate": 0.00047496363224561145, "loss": 3.0809226036071777, "step": 6363, "token_acc": 0.29010584140605494 }, { "epoch": 3.730577543242451, "grad_norm": 0.2224000507144588, "learning_rate": 0.00047495306220880567, "loss": 3.1168951988220215, "step": 6364, "token_acc": 0.28508976310083367 }, { "epoch": 3.73116388155966, "grad_norm": 0.27244797013263033, "learning_rate": 0.00047494249005886903, "loss": 3.0753300189971924, "step": 6365, "token_acc": 0.29095698475619913 }, { "epoch": 3.731750219876869, "grad_norm": 0.20947797041117322, "learning_rate": 0.00047493191579590075, "loss": 3.1048426628112793, "step": 6366, "token_acc": 0.286545908423578 }, { "epoch": 3.7323365581940777, "grad_norm": 0.2449995968578766, "learning_rate": 0.00047492133942000025, "loss": 3.071350574493408, "step": 6367, "token_acc": 0.2913103653466583 }, { "epoch": 3.732922896511287, "grad_norm": 0.2326212799842961, "learning_rate": 0.0004749107609312668, "loss": 3.084408760070801, "step": 6368, "token_acc": 0.2890459751505434 }, { "epoch": 3.733509234828496, "grad_norm": 0.20377827842068108, "learning_rate": 0.00047490018032979986, "loss": 3.0904946327209473, "step": 6369, "token_acc": 0.28881516489866677 }, { "epoch": 3.734095573145705, "grad_norm": 0.22610124446801713, "learning_rate": 0.00047488959761569883, "loss": 3.143413543701172, "step": 6370, "token_acc": 0.28060643422111237 }, { "epoch": 3.734681911462914, "grad_norm": 0.19149977516521693, "learning_rate": 0.000474879012789063, "loss": 3.045419692993164, "step": 6371, "token_acc": 0.29487237466913185 }, { "epoch": 3.7352682497801233, "grad_norm": 0.2156397829702423, "learning_rate": 0.0004748684258499919, "loss": 3.081057071685791, "step": 6372, "token_acc": 0.29122793211090997 }, { "epoch": 3.7358545880973324, "grad_norm": 0.22736878276091305, "learning_rate": 0.000474857836798585, "loss": 3.024683952331543, "step": 6373, "token_acc": 0.29650535617738893 }, { "epoch": 3.736440926414541, "grad_norm": 0.22701789248966978, "learning_rate": 0.0004748472456349416, "loss": 3.0468409061431885, "step": 6374, "token_acc": 0.29384342277907843 }, { "epoch": 3.73702726473175, "grad_norm": 0.20546274159111727, "learning_rate": 0.0004748366523591614, "loss": 3.07374906539917, "step": 6375, "token_acc": 0.29122983431366767 }, { "epoch": 3.7376136030489593, "grad_norm": 0.1922325029554844, "learning_rate": 0.00047482605697134385, "loss": 3.0682168006896973, "step": 6376, "token_acc": 0.2902951561839889 }, { "epoch": 3.7381999413661684, "grad_norm": 0.20137358024658844, "learning_rate": 0.00047481545947158845, "loss": 3.137650728225708, "step": 6377, "token_acc": 0.28087518020026014 }, { "epoch": 3.738786279683377, "grad_norm": 0.2294557176990818, "learning_rate": 0.0004748048598599947, "loss": 3.0798189640045166, "step": 6378, "token_acc": 0.29042616146846445 }, { "epoch": 3.739372618000586, "grad_norm": 0.2118823668288472, "learning_rate": 0.00047479425813666223, "loss": 3.0516233444213867, "step": 6379, "token_acc": 0.293272564883892 }, { "epoch": 3.7399589563177953, "grad_norm": 0.1998996152742327, "learning_rate": 0.0004747836543016906, "loss": 3.0955705642700195, "step": 6380, "token_acc": 0.2899631948059422 }, { "epoch": 3.7405452946350044, "grad_norm": 0.23288761918033363, "learning_rate": 0.00047477304835517946, "loss": 3.0891377925872803, "step": 6381, "token_acc": 0.28923892420951386 }, { "epoch": 3.7411316329522135, "grad_norm": 0.25974493784458447, "learning_rate": 0.00047476244029722846, "loss": 3.1461148262023926, "step": 6382, "token_acc": 0.28120567470610075 }, { "epoch": 3.7417179712694226, "grad_norm": 0.21724643186351109, "learning_rate": 0.0004747518301279372, "loss": 3.0851516723632812, "step": 6383, "token_acc": 0.2884287441462655 }, { "epoch": 3.7423043095866317, "grad_norm": 0.21108497092543624, "learning_rate": 0.0004747412178474053, "loss": 3.036337375640869, "step": 6384, "token_acc": 0.2965106584404198 }, { "epoch": 3.7428906479038404, "grad_norm": 0.25328917846710813, "learning_rate": 0.0004747306034557325, "loss": 3.0986037254333496, "step": 6385, "token_acc": 0.28756795303747495 }, { "epoch": 3.7434769862210495, "grad_norm": 0.25887677769530903, "learning_rate": 0.00047471998695301857, "loss": 3.1008682250976562, "step": 6386, "token_acc": 0.28516363961659996 }, { "epoch": 3.7440633245382586, "grad_norm": 0.25135231797596536, "learning_rate": 0.00047470936833936305, "loss": 3.062150478363037, "step": 6387, "token_acc": 0.2920213879710537 }, { "epoch": 3.7446496628554677, "grad_norm": 0.19991561481775152, "learning_rate": 0.0004746987476148659, "loss": 3.102640390396118, "step": 6388, "token_acc": 0.2881951906620025 }, { "epoch": 3.7452360011726764, "grad_norm": 0.2545281507767261, "learning_rate": 0.00047468812477962686, "loss": 3.0894246101379395, "step": 6389, "token_acc": 0.28666467764814263 }, { "epoch": 3.7458223394898855, "grad_norm": 0.27288966421379557, "learning_rate": 0.00047467749983374553, "loss": 3.07578706741333, "step": 6390, "token_acc": 0.28974354353271936 }, { "epoch": 3.7464086778070946, "grad_norm": 0.19786237631646636, "learning_rate": 0.00047466687277732193, "loss": 3.0883891582489014, "step": 6391, "token_acc": 0.28828307144554977 }, { "epoch": 3.7469950161243037, "grad_norm": 0.21565778977889355, "learning_rate": 0.00047465624361045576, "loss": 3.1305129528045654, "step": 6392, "token_acc": 0.2834539327954463 }, { "epoch": 3.747581354441513, "grad_norm": 0.24551120635081713, "learning_rate": 0.000474645612333247, "loss": 3.0904908180236816, "step": 6393, "token_acc": 0.29071121303822717 }, { "epoch": 3.748167692758722, "grad_norm": 0.19047104739032295, "learning_rate": 0.00047463497894579534, "loss": 3.0701749324798584, "step": 6394, "token_acc": 0.29226095654428924 }, { "epoch": 3.748754031075931, "grad_norm": 0.22056156495431772, "learning_rate": 0.00047462434344820075, "loss": 3.0653555393218994, "step": 6395, "token_acc": 0.29207595403684433 }, { "epoch": 3.7493403693931397, "grad_norm": 0.25326340255258445, "learning_rate": 0.00047461370584056317, "loss": 3.082517623901367, "step": 6396, "token_acc": 0.2895502774512471 }, { "epoch": 3.749926707710349, "grad_norm": 0.19065836356036706, "learning_rate": 0.0004746030661229825, "loss": 3.0944652557373047, "step": 6397, "token_acc": 0.28873820361199726 }, { "epoch": 3.750513046027558, "grad_norm": 0.2523395435345909, "learning_rate": 0.0004745924242955587, "loss": 3.109856605529785, "step": 6398, "token_acc": 0.28643605561569263 }, { "epoch": 3.751099384344767, "grad_norm": 0.2832290168847538, "learning_rate": 0.00047458178035839164, "loss": 3.085594654083252, "step": 6399, "token_acc": 0.2886416117619643 }, { "epoch": 3.7516857226619758, "grad_norm": 0.18623456571495328, "learning_rate": 0.00047457113431158146, "loss": 3.051520586013794, "step": 6400, "token_acc": 0.29476584022038566 }, { "epoch": 3.752272060979185, "grad_norm": 0.2528214702796898, "learning_rate": 0.000474560486155228, "loss": 3.0712080001831055, "step": 6401, "token_acc": 0.2902262736038465 }, { "epoch": 3.752858399296394, "grad_norm": 0.21497797853552386, "learning_rate": 0.00047454983588943146, "loss": 3.09531307220459, "step": 6402, "token_acc": 0.2902139357248528 }, { "epoch": 3.753444737613603, "grad_norm": 0.21596729792076638, "learning_rate": 0.0004745391835142917, "loss": 3.0975565910339355, "step": 6403, "token_acc": 0.2857240535956135 }, { "epoch": 3.754031075930812, "grad_norm": 0.21625966222461265, "learning_rate": 0.00047452852902990896, "loss": 3.069629192352295, "step": 6404, "token_acc": 0.29146853071533585 }, { "epoch": 3.7546174142480213, "grad_norm": 0.19659421613237166, "learning_rate": 0.0004745178724363832, "loss": 3.067108392715454, "step": 6405, "token_acc": 0.29222581770882905 }, { "epoch": 3.75520375256523, "grad_norm": 0.25160829218458997, "learning_rate": 0.00047450721373381465, "loss": 3.0912842750549316, "step": 6406, "token_acc": 0.28888556191695536 }, { "epoch": 3.755790090882439, "grad_norm": 0.18700214518917274, "learning_rate": 0.0004744965529223033, "loss": 3.0798068046569824, "step": 6407, "token_acc": 0.2893945584645525 }, { "epoch": 3.756376429199648, "grad_norm": 0.24660516983618272, "learning_rate": 0.00047448589000194933, "loss": 3.0610501766204834, "step": 6408, "token_acc": 0.2923797729398758 }, { "epoch": 3.7569627675168573, "grad_norm": 0.22821101279186334, "learning_rate": 0.00047447522497285293, "loss": 3.068467378616333, "step": 6409, "token_acc": 0.2908708526031471 }, { "epoch": 3.757549105834066, "grad_norm": 0.21073411015822335, "learning_rate": 0.0004744645578351143, "loss": 3.0901501178741455, "step": 6410, "token_acc": 0.2897971497306761 }, { "epoch": 3.758135444151275, "grad_norm": 0.2386400768893923, "learning_rate": 0.00047445388858883365, "loss": 3.0981240272521973, "step": 6411, "token_acc": 0.28805684893728917 }, { "epoch": 3.758721782468484, "grad_norm": 0.18918978538703182, "learning_rate": 0.0004744432172341111, "loss": 3.0546884536743164, "step": 6412, "token_acc": 0.2939091992608808 }, { "epoch": 3.7593081207856933, "grad_norm": 0.22136922806183532, "learning_rate": 0.00047443254377104696, "loss": 3.0653867721557617, "step": 6413, "token_acc": 0.2926235496743938 }, { "epoch": 3.7598944591029024, "grad_norm": 0.23143306661745558, "learning_rate": 0.00047442186819974153, "loss": 3.071683406829834, "step": 6414, "token_acc": 0.29083673474597255 }, { "epoch": 3.7604807974201115, "grad_norm": 0.19948785699427196, "learning_rate": 0.00047441119052029506, "loss": 3.1045453548431396, "step": 6415, "token_acc": 0.28719680603790093 }, { "epoch": 3.7610671357373207, "grad_norm": 0.22361873721942832, "learning_rate": 0.00047440051073280786, "loss": 3.0922703742980957, "step": 6416, "token_acc": 0.2884903137288043 }, { "epoch": 3.7616534740545293, "grad_norm": 0.25970467791759433, "learning_rate": 0.00047438982883738027, "loss": 3.0833253860473633, "step": 6417, "token_acc": 0.28823003101547645 }, { "epoch": 3.7622398123717384, "grad_norm": 0.2119238599325054, "learning_rate": 0.00047437914483411256, "loss": 3.1086997985839844, "step": 6418, "token_acc": 0.28556019394819404 }, { "epoch": 3.7628261506889475, "grad_norm": 0.22027045724409705, "learning_rate": 0.00047436845872310515, "loss": 3.0741004943847656, "step": 6419, "token_acc": 0.28981572669198075 }, { "epoch": 3.7634124890061567, "grad_norm": 0.3317358814104254, "learning_rate": 0.00047435777050445837, "loss": 3.130650281906128, "step": 6420, "token_acc": 0.28348512022221 }, { "epoch": 3.7639988273233653, "grad_norm": 0.30222877529653575, "learning_rate": 0.0004743470801782728, "loss": 3.079010248184204, "step": 6421, "token_acc": 0.29010901143420664 }, { "epoch": 3.7645851656405744, "grad_norm": 0.19628348017431524, "learning_rate": 0.0004743363877446486, "loss": 3.09476900100708, "step": 6422, "token_acc": 0.2883022733332805 }, { "epoch": 3.7651715039577835, "grad_norm": 0.26057818981682146, "learning_rate": 0.00047432569320368634, "loss": 3.0595431327819824, "step": 6423, "token_acc": 0.29281757611582604 }, { "epoch": 3.7657578422749927, "grad_norm": 0.17855140797385025, "learning_rate": 0.0004743149965554865, "loss": 3.045809268951416, "step": 6424, "token_acc": 0.29422164249697486 }, { "epoch": 3.7663441805922018, "grad_norm": 0.2415857122525715, "learning_rate": 0.0004743042978001495, "loss": 3.0594403743743896, "step": 6425, "token_acc": 0.29193847765309344 }, { "epoch": 3.766930518909411, "grad_norm": 0.18545875682024152, "learning_rate": 0.00047429359693777594, "loss": 3.1017227172851562, "step": 6426, "token_acc": 0.28646010519980114 }, { "epoch": 3.76751685722662, "grad_norm": 0.24179820240991265, "learning_rate": 0.0004742828939684662, "loss": 3.0467870235443115, "step": 6427, "token_acc": 0.29418214248242575 }, { "epoch": 3.7681031955438287, "grad_norm": 0.24844337877755568, "learning_rate": 0.000474272188892321, "loss": 3.058894634246826, "step": 6428, "token_acc": 0.29369662228731896 }, { "epoch": 3.7686895338610378, "grad_norm": 0.19880144880719225, "learning_rate": 0.00047426148170944075, "loss": 3.068110704421997, "step": 6429, "token_acc": 0.29187325720622626 }, { "epoch": 3.769275872178247, "grad_norm": 0.2244749915506914, "learning_rate": 0.0004742507724199261, "loss": 3.1019911766052246, "step": 6430, "token_acc": 0.2860349432984469 }, { "epoch": 3.769862210495456, "grad_norm": 0.2127816384180002, "learning_rate": 0.00047424006102387753, "loss": 3.074827194213867, "step": 6431, "token_acc": 0.29164547686620623 }, { "epoch": 3.7704485488126647, "grad_norm": 0.2502440320856774, "learning_rate": 0.0004742293475213958, "loss": 3.106818199157715, "step": 6432, "token_acc": 0.2859799743688592 }, { "epoch": 3.771034887129874, "grad_norm": 0.21142667348637867, "learning_rate": 0.0004742186319125815, "loss": 3.056962251663208, "step": 6433, "token_acc": 0.2914921739867505 }, { "epoch": 3.771621225447083, "grad_norm": 0.23724458027285325, "learning_rate": 0.00047420791419753536, "loss": 3.102792739868164, "step": 6434, "token_acc": 0.2866748665457308 }, { "epoch": 3.772207563764292, "grad_norm": 0.21776103294564458, "learning_rate": 0.0004741971943763579, "loss": 3.0702145099639893, "step": 6435, "token_acc": 0.29200041749518946 }, { "epoch": 3.772793902081501, "grad_norm": 0.2079951551446765, "learning_rate": 0.00047418647244915, "loss": 3.0796360969543457, "step": 6436, "token_acc": 0.2909337385895468 }, { "epoch": 3.7733802403987102, "grad_norm": 0.22327151160181286, "learning_rate": 0.0004741757484160122, "loss": 3.109640598297119, "step": 6437, "token_acc": 0.2856802150598958 }, { "epoch": 3.7739665787159193, "grad_norm": 0.21142913617489068, "learning_rate": 0.0004741650222770454, "loss": 3.0803279876708984, "step": 6438, "token_acc": 0.2881597604752542 }, { "epoch": 3.774552917033128, "grad_norm": 0.21314802950040948, "learning_rate": 0.00047415429403235024, "loss": 3.0818324089050293, "step": 6439, "token_acc": 0.2883151509414614 }, { "epoch": 3.775139255350337, "grad_norm": 0.20301888300831222, "learning_rate": 0.0004741435636820275, "loss": 3.088108539581299, "step": 6440, "token_acc": 0.28967277778366857 }, { "epoch": 3.7757255936675462, "grad_norm": 0.2032300205702744, "learning_rate": 0.000474132831226178, "loss": 3.0700416564941406, "step": 6441, "token_acc": 0.2909704760338088 }, { "epoch": 3.7763119319847553, "grad_norm": 0.22832430446296118, "learning_rate": 0.0004741220966649027, "loss": 3.0908679962158203, "step": 6442, "token_acc": 0.28638404355181585 }, { "epoch": 3.776898270301964, "grad_norm": 0.2042237975450884, "learning_rate": 0.00047411135999830226, "loss": 3.100489377975464, "step": 6443, "token_acc": 0.2858693717743371 }, { "epoch": 3.777484608619173, "grad_norm": 0.2039097274087466, "learning_rate": 0.0004741006212264775, "loss": 3.081758975982666, "step": 6444, "token_acc": 0.2884134408417561 }, { "epoch": 3.7780709469363822, "grad_norm": 0.23049677035636082, "learning_rate": 0.0004740898803495295, "loss": 3.1115474700927734, "step": 6445, "token_acc": 0.28419950706278324 }, { "epoch": 3.7786572852535913, "grad_norm": 0.24178161173432677, "learning_rate": 0.0004740791373675589, "loss": 3.0616371631622314, "step": 6446, "token_acc": 0.29035810233875503 }, { "epoch": 3.7792436235708005, "grad_norm": 0.22665243486901956, "learning_rate": 0.0004740683922806669, "loss": 3.1025235652923584, "step": 6447, "token_acc": 0.2864875354687115 }, { "epoch": 3.7798299618880096, "grad_norm": 0.20542575778739877, "learning_rate": 0.0004740576450889542, "loss": 3.0961828231811523, "step": 6448, "token_acc": 0.2876179284788418 }, { "epoch": 3.7804163002052187, "grad_norm": 0.19107041311984885, "learning_rate": 0.0004740468957925219, "loss": 3.1155200004577637, "step": 6449, "token_acc": 0.28589409907411356 }, { "epoch": 3.7810026385224274, "grad_norm": 0.2247804287160029, "learning_rate": 0.0004740361443914709, "loss": 3.0904228687286377, "step": 6450, "token_acc": 0.28790667654722346 }, { "epoch": 3.7815889768396365, "grad_norm": 0.23899753968672394, "learning_rate": 0.00047402539088590225, "loss": 3.044929027557373, "step": 6451, "token_acc": 0.29441679674038634 }, { "epoch": 3.7821753151568456, "grad_norm": 0.2408823728109118, "learning_rate": 0.00047401463527591687, "loss": 3.0716428756713867, "step": 6452, "token_acc": 0.2925693322861847 }, { "epoch": 3.7827616534740547, "grad_norm": 0.20254956700910243, "learning_rate": 0.0004740038775616159, "loss": 3.0462749004364014, "step": 6453, "token_acc": 0.29393332596452176 }, { "epoch": 3.7833479917912634, "grad_norm": 0.2471268919872209, "learning_rate": 0.0004739931177431003, "loss": 3.0643296241760254, "step": 6454, "token_acc": 0.2912173373391048 }, { "epoch": 3.7839343301084725, "grad_norm": 0.21312562596555237, "learning_rate": 0.00047398235582047125, "loss": 3.134032726287842, "step": 6455, "token_acc": 0.2825558718935436 }, { "epoch": 3.7845206684256816, "grad_norm": 0.21554699290830556, "learning_rate": 0.00047397159179382977, "loss": 3.068225622177124, "step": 6456, "token_acc": 0.2912419383107729 }, { "epoch": 3.7851070067428907, "grad_norm": 0.1895343276454945, "learning_rate": 0.0004739608256632769, "loss": 3.0592827796936035, "step": 6457, "token_acc": 0.29140623544844724 }, { "epoch": 3.7856933450601, "grad_norm": 0.21787192590093057, "learning_rate": 0.00047395005742891395, "loss": 3.063180685043335, "step": 6458, "token_acc": 0.291065568369028 }, { "epoch": 3.786279683377309, "grad_norm": 0.20634922715191942, "learning_rate": 0.000473939287090842, "loss": 3.0723299980163574, "step": 6459, "token_acc": 0.2908520807645281 }, { "epoch": 3.7868660216945176, "grad_norm": 0.19582456614616287, "learning_rate": 0.0004739285146491622, "loss": 3.0688743591308594, "step": 6460, "token_acc": 0.2917738851857593 }, { "epoch": 3.7874523600117267, "grad_norm": 0.21620687349128492, "learning_rate": 0.00047391774010397574, "loss": 3.0666770935058594, "step": 6461, "token_acc": 0.29123699326161606 }, { "epoch": 3.788038698328936, "grad_norm": 0.20895968063006662, "learning_rate": 0.00047390696345538385, "loss": 3.082728385925293, "step": 6462, "token_acc": 0.2874165174108898 }, { "epoch": 3.788625036646145, "grad_norm": 0.19742636998017737, "learning_rate": 0.00047389618470348777, "loss": 3.1156840324401855, "step": 6463, "token_acc": 0.2852937095573859 }, { "epoch": 3.7892113749633536, "grad_norm": 0.19149777652974273, "learning_rate": 0.00047388540384838877, "loss": 3.0431203842163086, "step": 6464, "token_acc": 0.29476156438272233 }, { "epoch": 3.7897977132805627, "grad_norm": 0.20829704014445763, "learning_rate": 0.000473874620890188, "loss": 3.1093311309814453, "step": 6465, "token_acc": 0.28736350480199974 }, { "epoch": 3.790384051597772, "grad_norm": 0.19347030684133373, "learning_rate": 0.00047386383582898685, "loss": 3.098940372467041, "step": 6466, "token_acc": 0.28861558588538233 }, { "epoch": 3.790970389914981, "grad_norm": 0.18723717596538453, "learning_rate": 0.0004738530486648867, "loss": 3.0609235763549805, "step": 6467, "token_acc": 0.29272230436849833 }, { "epoch": 3.79155672823219, "grad_norm": 0.2127756737010464, "learning_rate": 0.00047384225939798875, "loss": 3.038447856903076, "step": 6468, "token_acc": 0.2954359274429491 }, { "epoch": 3.792143066549399, "grad_norm": 0.19724976284962384, "learning_rate": 0.0004738314680283944, "loss": 3.0177078247070312, "step": 6469, "token_acc": 0.29762839975371025 }, { "epoch": 3.7927294048666083, "grad_norm": 0.2296480241058165, "learning_rate": 0.000473820674556205, "loss": 3.0890488624572754, "step": 6470, "token_acc": 0.29041833476589823 }, { "epoch": 3.793315743183817, "grad_norm": 0.3002770702722004, "learning_rate": 0.00047380987898152207, "loss": 3.0694825649261475, "step": 6471, "token_acc": 0.292006917090979 }, { "epoch": 3.793902081501026, "grad_norm": 0.5011007192051179, "learning_rate": 0.0004737990813044468, "loss": 3.0852646827697754, "step": 6472, "token_acc": 0.28961570149224625 }, { "epoch": 3.794488419818235, "grad_norm": 0.44467004824027656, "learning_rate": 0.0004737882815250808, "loss": 3.063803195953369, "step": 6473, "token_acc": 0.2918106986294631 }, { "epoch": 3.7950747581354443, "grad_norm": 0.269909424964999, "learning_rate": 0.00047377747964352546, "loss": 3.090294599533081, "step": 6474, "token_acc": 0.28807092329080797 }, { "epoch": 3.795661096452653, "grad_norm": 0.27585784792092855, "learning_rate": 0.0004737666756598822, "loss": 3.07590389251709, "step": 6475, "token_acc": 0.2892968926960855 }, { "epoch": 3.796247434769862, "grad_norm": 0.2599556050868869, "learning_rate": 0.0004737558695742526, "loss": 3.1060409545898438, "step": 6476, "token_acc": 0.28603567659399215 }, { "epoch": 3.796833773087071, "grad_norm": 0.21055743592283394, "learning_rate": 0.0004737450613867381, "loss": 3.086583137512207, "step": 6477, "token_acc": 0.2880370040182436 }, { "epoch": 3.7974201114042803, "grad_norm": 0.23334809332973078, "learning_rate": 0.0004737342510974402, "loss": 3.067042350769043, "step": 6478, "token_acc": 0.2917798184339272 }, { "epoch": 3.7980064497214894, "grad_norm": 0.20076907788305, "learning_rate": 0.00047372343870646054, "loss": 3.078303098678589, "step": 6479, "token_acc": 0.2908017994046746 }, { "epoch": 3.7985927880386985, "grad_norm": 0.19508491214327245, "learning_rate": 0.00047371262421390067, "loss": 3.0965092182159424, "step": 6480, "token_acc": 0.2875405176082633 }, { "epoch": 3.7991791263559076, "grad_norm": 0.2377572860038879, "learning_rate": 0.00047370180761986214, "loss": 3.0716793537139893, "step": 6481, "token_acc": 0.2893204686708239 }, { "epoch": 3.7997654646731163, "grad_norm": 0.20704379405681425, "learning_rate": 0.0004736909889244465, "loss": 3.0547895431518555, "step": 6482, "token_acc": 0.29292929292929293 }, { "epoch": 3.8003518029903254, "grad_norm": 0.21962113699705885, "learning_rate": 0.0004736801681277555, "loss": 3.093174934387207, "step": 6483, "token_acc": 0.28729403437020723 }, { "epoch": 3.8009381413075345, "grad_norm": 0.20243848394263678, "learning_rate": 0.00047366934522989076, "loss": 3.061690330505371, "step": 6484, "token_acc": 0.29224018216349784 }, { "epoch": 3.8015244796247436, "grad_norm": 0.21849963634397454, "learning_rate": 0.0004736585202309539, "loss": 3.118579387664795, "step": 6485, "token_acc": 0.2847922819960533 }, { "epoch": 3.8021108179419523, "grad_norm": 0.2230702448403142, "learning_rate": 0.0004736476931310466, "loss": 3.1301307678222656, "step": 6486, "token_acc": 0.2817265838260558 }, { "epoch": 3.8026971562591614, "grad_norm": 0.22013017880315414, "learning_rate": 0.00047363686393027063, "loss": 3.0648999214172363, "step": 6487, "token_acc": 0.2918241609691424 }, { "epoch": 3.8032834945763705, "grad_norm": 0.23059748764978694, "learning_rate": 0.0004736260326287276, "loss": 3.096896171569824, "step": 6488, "token_acc": 0.2884981139252544 }, { "epoch": 3.8038698328935796, "grad_norm": 0.19967932274814715, "learning_rate": 0.00047361519922651943, "loss": 3.072824239730835, "step": 6489, "token_acc": 0.29092197609964804 }, { "epoch": 3.8044561712107887, "grad_norm": 0.2074536231331756, "learning_rate": 0.00047360436372374776, "loss": 3.0848183631896973, "step": 6490, "token_acc": 0.2897681732866636 }, { "epoch": 3.805042509527998, "grad_norm": 0.2328604757541851, "learning_rate": 0.0004735935261205144, "loss": 3.0796430110931396, "step": 6491, "token_acc": 0.28943674055124863 }, { "epoch": 3.805628847845207, "grad_norm": 0.2645724881621736, "learning_rate": 0.00047358268641692114, "loss": 3.0948429107666016, "step": 6492, "token_acc": 0.2874423172367184 }, { "epoch": 3.8062151861624156, "grad_norm": 0.2283438658322231, "learning_rate": 0.00047357184461306986, "loss": 3.1003668308258057, "step": 6493, "token_acc": 0.28553309471686916 }, { "epoch": 3.8068015244796247, "grad_norm": 0.21773807805972234, "learning_rate": 0.0004735610007090623, "loss": 3.0615758895874023, "step": 6494, "token_acc": 0.2921427667917528 }, { "epoch": 3.807387862796834, "grad_norm": 0.21669628908162353, "learning_rate": 0.0004735501547050005, "loss": 3.0590713024139404, "step": 6495, "token_acc": 0.29293807839086244 }, { "epoch": 3.807974201114043, "grad_norm": 0.21885551846769633, "learning_rate": 0.0004735393066009861, "loss": 3.0455322265625, "step": 6496, "token_acc": 0.2948628939083981 }, { "epoch": 3.8085605394312516, "grad_norm": 0.23225083455986617, "learning_rate": 0.00047352845639712124, "loss": 3.1016952991485596, "step": 6497, "token_acc": 0.286676443703354 }, { "epoch": 3.8091468777484607, "grad_norm": 0.1831727015065005, "learning_rate": 0.0004735176040935077, "loss": 3.0895891189575195, "step": 6498, "token_acc": 0.2894352227497856 }, { "epoch": 3.80973321606567, "grad_norm": 0.22051646500312877, "learning_rate": 0.00047350674969024744, "loss": 3.062586784362793, "step": 6499, "token_acc": 0.2925988560595452 }, { "epoch": 3.810319554382879, "grad_norm": 0.18062216756519453, "learning_rate": 0.00047349589318744246, "loss": 3.084185838699341, "step": 6500, "token_acc": 0.2876098957268452 }, { "epoch": 3.810905892700088, "grad_norm": 0.2258568872461433, "learning_rate": 0.0004734850345851948, "loss": 3.1014063358306885, "step": 6501, "token_acc": 0.2865528095907843 }, { "epoch": 3.811492231017297, "grad_norm": 0.19692213567732791, "learning_rate": 0.00047347417388360623, "loss": 3.0633296966552734, "step": 6502, "token_acc": 0.291835774289748 }, { "epoch": 3.8120785693345063, "grad_norm": 0.23134858135010314, "learning_rate": 0.0004734633110827791, "loss": 3.0930023193359375, "step": 6503, "token_acc": 0.2886753879670835 }, { "epoch": 3.812664907651715, "grad_norm": 0.19789121786995423, "learning_rate": 0.00047345244618281527, "loss": 3.0659589767456055, "step": 6504, "token_acc": 0.2923300924082196 }, { "epoch": 3.813251245968924, "grad_norm": 0.23491367590369172, "learning_rate": 0.0004734415791838167, "loss": 3.092395544052124, "step": 6505, "token_acc": 0.289600235628029 }, { "epoch": 3.813837584286133, "grad_norm": 0.21326482938713123, "learning_rate": 0.00047343071008588565, "loss": 3.1044740676879883, "step": 6506, "token_acc": 0.2871264075861485 }, { "epoch": 3.8144239226033423, "grad_norm": 0.18654998183816066, "learning_rate": 0.0004734198388891241, "loss": 3.039532423019409, "step": 6507, "token_acc": 0.2936162147004144 }, { "epoch": 3.815010260920551, "grad_norm": 0.19397634738348812, "learning_rate": 0.0004734089655936343, "loss": 3.059903144836426, "step": 6508, "token_acc": 0.2942210043560794 }, { "epoch": 3.81559659923776, "grad_norm": 0.19844929419940502, "learning_rate": 0.0004733980901995183, "loss": 3.107282876968384, "step": 6509, "token_acc": 0.28612871093950565 }, { "epoch": 3.816182937554969, "grad_norm": 0.18815098749156828, "learning_rate": 0.00047338721270687823, "loss": 3.0847420692443848, "step": 6510, "token_acc": 0.29008106625490154 }, { "epoch": 3.8167692758721783, "grad_norm": 0.19994557916635425, "learning_rate": 0.0004733763331158164, "loss": 3.082430839538574, "step": 6511, "token_acc": 0.28864543489106936 }, { "epoch": 3.8173556141893874, "grad_norm": 0.22114834635248928, "learning_rate": 0.0004733654514264348, "loss": 3.0741260051727295, "step": 6512, "token_acc": 0.2899823313458219 }, { "epoch": 3.8179419525065965, "grad_norm": 0.21245523930134028, "learning_rate": 0.0004733545676388359, "loss": 3.074143886566162, "step": 6513, "token_acc": 0.28932991414873455 }, { "epoch": 3.818528290823805, "grad_norm": 0.20524472016876247, "learning_rate": 0.0004733436817531218, "loss": 3.069697856903076, "step": 6514, "token_acc": 0.2906101816455664 }, { "epoch": 3.8191146291410143, "grad_norm": 0.2047996736642631, "learning_rate": 0.0004733327937693947, "loss": 3.0679783821105957, "step": 6515, "token_acc": 0.29301700080067783 }, { "epoch": 3.8197009674582234, "grad_norm": 0.20203877463515277, "learning_rate": 0.00047332190368775697, "loss": 3.036182403564453, "step": 6516, "token_acc": 0.2948909535875945 }, { "epoch": 3.8202873057754325, "grad_norm": 0.21285683850501752, "learning_rate": 0.00047331101150831093, "loss": 3.114842176437378, "step": 6517, "token_acc": 0.28343772679342233 }, { "epoch": 3.820873644092641, "grad_norm": 0.2033666317722894, "learning_rate": 0.0004733001172311587, "loss": 3.068603515625, "step": 6518, "token_acc": 0.2915196086052936 }, { "epoch": 3.8214599824098503, "grad_norm": 0.19465744217443753, "learning_rate": 0.00047328922085640294, "loss": 3.059149980545044, "step": 6519, "token_acc": 0.28956177841132774 }, { "epoch": 3.8220463207270594, "grad_norm": 0.21655080022368167, "learning_rate": 0.0004732783223841458, "loss": 3.090820074081421, "step": 6520, "token_acc": 0.28815267119341453 }, { "epoch": 3.8226326590442685, "grad_norm": 0.2320489651383197, "learning_rate": 0.0004732674218144897, "loss": 3.0795536041259766, "step": 6521, "token_acc": 0.2905472324790747 }, { "epoch": 3.8232189973614776, "grad_norm": 0.23110586104112646, "learning_rate": 0.000473256519147537, "loss": 3.0489437580108643, "step": 6522, "token_acc": 0.2935650760405758 }, { "epoch": 3.8238053356786867, "grad_norm": 0.20862953240589457, "learning_rate": 0.0004732456143833901, "loss": 3.104154586791992, "step": 6523, "token_acc": 0.28494704464568493 }, { "epoch": 3.824391673995896, "grad_norm": 0.23405414922468074, "learning_rate": 0.00047323470752215155, "loss": 3.0823616981506348, "step": 6524, "token_acc": 0.2902636870545099 }, { "epoch": 3.8249780123131045, "grad_norm": 0.25539323628937666, "learning_rate": 0.00047322379856392375, "loss": 3.1174960136413574, "step": 6525, "token_acc": 0.2827509546476958 }, { "epoch": 3.8255643506303136, "grad_norm": 0.2671591046573203, "learning_rate": 0.0004732128875088091, "loss": 3.0943658351898193, "step": 6526, "token_acc": 0.28748294733293656 }, { "epoch": 3.8261506889475227, "grad_norm": 0.22001468083453776, "learning_rate": 0.0004732019743569101, "loss": 3.075218915939331, "step": 6527, "token_acc": 0.2895261355719754 }, { "epoch": 3.826737027264732, "grad_norm": 0.2478147625609848, "learning_rate": 0.00047319105910832937, "loss": 3.14341402053833, "step": 6528, "token_acc": 0.28149194657579246 }, { "epoch": 3.8273233655819405, "grad_norm": 0.2301154088190999, "learning_rate": 0.0004731801417631695, "loss": 3.0601937770843506, "step": 6529, "token_acc": 0.29323342986792117 }, { "epoch": 3.8279097038991496, "grad_norm": 0.2034744061773359, "learning_rate": 0.00047316922232153283, "loss": 3.0827488899230957, "step": 6530, "token_acc": 0.28918183774701356 }, { "epoch": 3.8284960422163588, "grad_norm": 0.22890530251673363, "learning_rate": 0.00047315830078352206, "loss": 3.0690417289733887, "step": 6531, "token_acc": 0.28920489635623686 }, { "epoch": 3.829082380533568, "grad_norm": 0.2996797010799379, "learning_rate": 0.0004731473771492397, "loss": 3.116313934326172, "step": 6532, "token_acc": 0.28445849818789787 }, { "epoch": 3.829668718850777, "grad_norm": 0.230619664187333, "learning_rate": 0.00047313645141878856, "loss": 3.0436506271362305, "step": 6533, "token_acc": 0.2950313994900217 }, { "epoch": 3.830255057167986, "grad_norm": 0.21000301867424817, "learning_rate": 0.00047312552359227107, "loss": 3.0266683101654053, "step": 6534, "token_acc": 0.2978534411167993 }, { "epoch": 3.830841395485195, "grad_norm": 0.24053419385368047, "learning_rate": 0.00047311459366978993, "loss": 3.120404005050659, "step": 6535, "token_acc": 0.2826958726316137 }, { "epoch": 3.831427733802404, "grad_norm": 0.19532967110024352, "learning_rate": 0.00047310366165144793, "loss": 3.077125072479248, "step": 6536, "token_acc": 0.29160118781699207 }, { "epoch": 3.832014072119613, "grad_norm": 0.2793489616461439, "learning_rate": 0.0004730927275373476, "loss": 3.0455527305603027, "step": 6537, "token_acc": 0.2933173607324871 }, { "epoch": 3.832600410436822, "grad_norm": 0.2370049855899673, "learning_rate": 0.00047308179132759165, "loss": 3.047926187515259, "step": 6538, "token_acc": 0.2907201490515255 }, { "epoch": 3.833186748754031, "grad_norm": 0.22055934054741122, "learning_rate": 0.00047307085302228293, "loss": 3.1643500328063965, "step": 6539, "token_acc": 0.27806982179605755 }, { "epoch": 3.83377308707124, "grad_norm": 0.23909417091680155, "learning_rate": 0.00047305991262152415, "loss": 3.111429214477539, "step": 6540, "token_acc": 0.284384459095244 }, { "epoch": 3.834359425388449, "grad_norm": 0.18388801127496174, "learning_rate": 0.00047304897012541804, "loss": 3.0450832843780518, "step": 6541, "token_acc": 0.29290060131726775 }, { "epoch": 3.834945763705658, "grad_norm": 0.2773612622918213, "learning_rate": 0.00047303802553406743, "loss": 3.096778392791748, "step": 6542, "token_acc": 0.285249784338647 }, { "epoch": 3.835532102022867, "grad_norm": 0.25169660268409194, "learning_rate": 0.0004730270788475751, "loss": 3.1223127841949463, "step": 6543, "token_acc": 0.2845925330767889 }, { "epoch": 3.8361184403400763, "grad_norm": 0.20499225958037362, "learning_rate": 0.0004730161300660439, "loss": 3.1041648387908936, "step": 6544, "token_acc": 0.28591700819672133 }, { "epoch": 3.8367047786572854, "grad_norm": 0.25350700847426316, "learning_rate": 0.0004730051791895767, "loss": 3.098144054412842, "step": 6545, "token_acc": 0.28842203970941216 }, { "epoch": 3.8372911169744945, "grad_norm": 0.17981883159819664, "learning_rate": 0.00047299422621827644, "loss": 3.081315040588379, "step": 6546, "token_acc": 0.2910331669295521 }, { "epoch": 3.837877455291703, "grad_norm": 0.23043088958405447, "learning_rate": 0.00047298327115224585, "loss": 3.1111674308776855, "step": 6547, "token_acc": 0.2841437827356066 }, { "epoch": 3.8384637936089123, "grad_norm": 0.18195702762820298, "learning_rate": 0.0004729723139915878, "loss": 3.1058735847473145, "step": 6548, "token_acc": 0.28672605005772656 }, { "epoch": 3.8390501319261214, "grad_norm": 0.1961388587724037, "learning_rate": 0.0004729613547364054, "loss": 3.093325614929199, "step": 6549, "token_acc": 0.28827461053815384 }, { "epoch": 3.8396364702433305, "grad_norm": 0.205869630926981, "learning_rate": 0.0004729503933868015, "loss": 3.0579991340637207, "step": 6550, "token_acc": 0.2918355589925108 }, { "epoch": 3.840222808560539, "grad_norm": 0.19026519566255432, "learning_rate": 0.00047293942994287906, "loss": 3.1072189807891846, "step": 6551, "token_acc": 0.2878851114020194 }, { "epoch": 3.8408091468777483, "grad_norm": 0.19030965677443198, "learning_rate": 0.0004729284644047411, "loss": 3.0612363815307617, "step": 6552, "token_acc": 0.29345670679705116 }, { "epoch": 3.8413954851949574, "grad_norm": 0.19645996335555976, "learning_rate": 0.0004729174967724907, "loss": 3.098538875579834, "step": 6553, "token_acc": 0.2879619081617118 }, { "epoch": 3.8419818235121665, "grad_norm": 0.2046378485035818, "learning_rate": 0.0004729065270462307, "loss": 3.106729507446289, "step": 6554, "token_acc": 0.2858357212042941 }, { "epoch": 3.8425681618293757, "grad_norm": 0.2205577951262936, "learning_rate": 0.0004728955552260643, "loss": 3.081890106201172, "step": 6555, "token_acc": 0.2882507317254391 }, { "epoch": 3.8431545001465848, "grad_norm": 0.21198051209583782, "learning_rate": 0.0004728845813120945, "loss": 3.060849189758301, "step": 6556, "token_acc": 0.2910310688464795 }, { "epoch": 3.843740838463794, "grad_norm": 0.226368656236823, "learning_rate": 0.00047287360530442443, "loss": 3.038071632385254, "step": 6557, "token_acc": 0.29528084337575805 }, { "epoch": 3.8443271767810026, "grad_norm": 0.18747209968932652, "learning_rate": 0.0004728626272031571, "loss": 3.047152280807495, "step": 6558, "token_acc": 0.294955650313103 }, { "epoch": 3.8449135150982117, "grad_norm": 0.19978446971679106, "learning_rate": 0.0004728516470083958, "loss": 3.114880323410034, "step": 6559, "token_acc": 0.28567120211325064 }, { "epoch": 3.8454998534154208, "grad_norm": 0.22774678731462242, "learning_rate": 0.00047284066472024345, "loss": 3.097235679626465, "step": 6560, "token_acc": 0.28715421389053014 }, { "epoch": 3.84608619173263, "grad_norm": 0.26506366283926763, "learning_rate": 0.0004728296803388034, "loss": 3.068356513977051, "step": 6561, "token_acc": 0.29219619318808676 }, { "epoch": 3.8466725300498386, "grad_norm": 0.23603790353083118, "learning_rate": 0.00047281869386417875, "loss": 3.095968723297119, "step": 6562, "token_acc": 0.2864278909287535 }, { "epoch": 3.8472588683670477, "grad_norm": 0.18312453819402238, "learning_rate": 0.00047280770529647276, "loss": 3.0661392211914062, "step": 6563, "token_acc": 0.2942461014983282 }, { "epoch": 3.847845206684257, "grad_norm": 0.23315685128152697, "learning_rate": 0.0004727967146357885, "loss": 3.050487518310547, "step": 6564, "token_acc": 0.29299231371153256 }, { "epoch": 3.848431545001466, "grad_norm": 0.20862164346485312, "learning_rate": 0.0004727857218822295, "loss": 3.0750577449798584, "step": 6565, "token_acc": 0.2884535644825305 }, { "epoch": 3.849017883318675, "grad_norm": 0.20626026952355725, "learning_rate": 0.00047277472703589874, "loss": 3.107205629348755, "step": 6566, "token_acc": 0.2853362428407178 }, { "epoch": 3.849604221635884, "grad_norm": 0.26122708225945274, "learning_rate": 0.00047276373009689967, "loss": 3.0458974838256836, "step": 6567, "token_acc": 0.29335063570616826 }, { "epoch": 3.850190559953093, "grad_norm": 0.2638426466898938, "learning_rate": 0.0004727527310653355, "loss": 3.071479558944702, "step": 6568, "token_acc": 0.2896536302012049 }, { "epoch": 3.850776898270302, "grad_norm": 0.19358775029135447, "learning_rate": 0.0004727417299413096, "loss": 3.1301212310791016, "step": 6569, "token_acc": 0.2827535450973177 }, { "epoch": 3.851363236587511, "grad_norm": 0.30818062047642736, "learning_rate": 0.0004727307267249253, "loss": 3.040440559387207, "step": 6570, "token_acc": 0.29502927665121365 }, { "epoch": 3.85194957490472, "grad_norm": 0.2805472515363853, "learning_rate": 0.00047271972141628595, "loss": 3.0688462257385254, "step": 6571, "token_acc": 0.29137527361743837 }, { "epoch": 3.852535913221929, "grad_norm": 0.2248302058377536, "learning_rate": 0.00047270871401549486, "loss": 3.1054718494415283, "step": 6572, "token_acc": 0.2837317915121461 }, { "epoch": 3.853122251539138, "grad_norm": 0.282010960062093, "learning_rate": 0.0004726977045226556, "loss": 3.0875115394592285, "step": 6573, "token_acc": 0.2897645362806343 }, { "epoch": 3.853708589856347, "grad_norm": 0.20278909014136015, "learning_rate": 0.00047268669293787146, "loss": 3.0533828735351562, "step": 6574, "token_acc": 0.293884980223319 }, { "epoch": 3.854294928173556, "grad_norm": 0.2322292968067153, "learning_rate": 0.0004726756792612459, "loss": 3.092543840408325, "step": 6575, "token_acc": 0.2870271025499754 }, { "epoch": 3.8548812664907652, "grad_norm": 0.20052266194500518, "learning_rate": 0.00047266466349288246, "loss": 3.095717430114746, "step": 6576, "token_acc": 0.2881497202818433 }, { "epoch": 3.8554676048079743, "grad_norm": 0.27372754082444634, "learning_rate": 0.00047265364563288447, "loss": 3.0760579109191895, "step": 6577, "token_acc": 0.29151160048800995 }, { "epoch": 3.8560539431251835, "grad_norm": 0.22984783695492186, "learning_rate": 0.00047264262568135553, "loss": 3.063521385192871, "step": 6578, "token_acc": 0.29177435929374396 }, { "epoch": 3.856640281442392, "grad_norm": 0.20161924254011016, "learning_rate": 0.0004726316036383992, "loss": 3.0870859622955322, "step": 6579, "token_acc": 0.2887328250770495 }, { "epoch": 3.8572266197596012, "grad_norm": 0.2632783487658834, "learning_rate": 0.00047262057950411883, "loss": 3.059124231338501, "step": 6580, "token_acc": 0.2925985187656033 }, { "epoch": 3.8578129580768104, "grad_norm": 0.22564075988844529, "learning_rate": 0.0004726095532786182, "loss": 3.086447238922119, "step": 6581, "token_acc": 0.2890474520488818 }, { "epoch": 3.8583992963940195, "grad_norm": 0.26638932945937466, "learning_rate": 0.0004725985249620008, "loss": 3.0860652923583984, "step": 6582, "token_acc": 0.2888075649398318 }, { "epoch": 3.858985634711228, "grad_norm": 0.21260337165845883, "learning_rate": 0.0004725874945543702, "loss": 3.06182861328125, "step": 6583, "token_acc": 0.2926865382337422 }, { "epoch": 3.8595719730284372, "grad_norm": 0.23834517377292713, "learning_rate": 0.00047257646205582995, "loss": 3.0690417289733887, "step": 6584, "token_acc": 0.2918016885411135 }, { "epoch": 3.8601583113456464, "grad_norm": 0.1994599165650327, "learning_rate": 0.00047256542746648385, "loss": 3.0980920791625977, "step": 6585, "token_acc": 0.28810130235964154 }, { "epoch": 3.8607446496628555, "grad_norm": 0.21346721053231602, "learning_rate": 0.0004725543907864354, "loss": 3.0762486457824707, "step": 6586, "token_acc": 0.2911234339060318 }, { "epoch": 3.8613309879800646, "grad_norm": 0.18167562928045092, "learning_rate": 0.0004725433520157884, "loss": 3.0722007751464844, "step": 6587, "token_acc": 0.291313488705997 }, { "epoch": 3.8619173262972737, "grad_norm": 0.2314529977260273, "learning_rate": 0.00047253231115464644, "loss": 3.0874311923980713, "step": 6588, "token_acc": 0.2885219368889976 }, { "epoch": 3.862503664614483, "grad_norm": 0.19894689811674457, "learning_rate": 0.00047252126820311336, "loss": 3.1121835708618164, "step": 6589, "token_acc": 0.28474272079317897 }, { "epoch": 3.8630900029316915, "grad_norm": 0.21301419417547374, "learning_rate": 0.00047251022316129276, "loss": 3.080428123474121, "step": 6590, "token_acc": 0.28893414171333365 }, { "epoch": 3.8636763412489006, "grad_norm": 0.24213155116539303, "learning_rate": 0.0004724991760292885, "loss": 3.0988974571228027, "step": 6591, "token_acc": 0.2875001645581285 }, { "epoch": 3.8642626795661097, "grad_norm": 0.1813269200159823, "learning_rate": 0.0004724881268072042, "loss": 3.0734262466430664, "step": 6592, "token_acc": 0.2902757034951811 }, { "epoch": 3.864849017883319, "grad_norm": 0.23740536213107427, "learning_rate": 0.00047247707549514384, "loss": 3.0526695251464844, "step": 6593, "token_acc": 0.29156929134070025 }, { "epoch": 3.8654353562005275, "grad_norm": 0.222665593870044, "learning_rate": 0.0004724660220932111, "loss": 3.048471212387085, "step": 6594, "token_acc": 0.29532492881111216 }, { "epoch": 3.8660216945177366, "grad_norm": 0.24227285623643968, "learning_rate": 0.0004724549666015099, "loss": 3.0528934001922607, "step": 6595, "token_acc": 0.2916868835997644 }, { "epoch": 3.8666080328349457, "grad_norm": 0.2625336529629332, "learning_rate": 0.00047244390902014406, "loss": 3.090010166168213, "step": 6596, "token_acc": 0.2877753058377543 }, { "epoch": 3.867194371152155, "grad_norm": 0.20896348809560866, "learning_rate": 0.0004724328493492174, "loss": 3.082413911819458, "step": 6597, "token_acc": 0.28744775373938647 }, { "epoch": 3.867780709469364, "grad_norm": 0.22484978918597365, "learning_rate": 0.0004724217875888339, "loss": 3.065814256668091, "step": 6598, "token_acc": 0.29059318667452494 }, { "epoch": 3.868367047786573, "grad_norm": 0.26274849125285965, "learning_rate": 0.0004724107237390974, "loss": 3.0292577743530273, "step": 6599, "token_acc": 0.2969701901144676 }, { "epoch": 3.868953386103782, "grad_norm": 0.21049425378758016, "learning_rate": 0.0004723996578001118, "loss": 3.115936040878296, "step": 6600, "token_acc": 0.28475933354311056 }, { "epoch": 3.869539724420991, "grad_norm": 0.22910782562178267, "learning_rate": 0.00047238858977198116, "loss": 3.0805931091308594, "step": 6601, "token_acc": 0.28930428191688107 }, { "epoch": 3.8701260627382, "grad_norm": 0.27888540159794756, "learning_rate": 0.00047237751965480937, "loss": 3.0922207832336426, "step": 6602, "token_acc": 0.2892300266020226 }, { "epoch": 3.870712401055409, "grad_norm": 0.190703335552826, "learning_rate": 0.00047236644744870043, "loss": 3.054598808288574, "step": 6603, "token_acc": 0.293918493039673 }, { "epoch": 3.871298739372618, "grad_norm": 0.22559187780460158, "learning_rate": 0.0004723553731537584, "loss": 3.0338029861450195, "step": 6604, "token_acc": 0.2950637235195297 }, { "epoch": 3.871885077689827, "grad_norm": 0.1950661274654235, "learning_rate": 0.00047234429677008727, "loss": 3.074355125427246, "step": 6605, "token_acc": 0.291761875156438 }, { "epoch": 3.872471416007036, "grad_norm": 0.25366873194543654, "learning_rate": 0.00047233321829779105, "loss": 3.054440975189209, "step": 6606, "token_acc": 0.29201840399432516 }, { "epoch": 3.873057754324245, "grad_norm": 0.20492641193232897, "learning_rate": 0.00047232213773697385, "loss": 3.0460822582244873, "step": 6607, "token_acc": 0.2945879268343036 }, { "epoch": 3.873644092641454, "grad_norm": 0.33904724963182903, "learning_rate": 0.00047231105508773976, "loss": 3.125011920928955, "step": 6608, "token_acc": 0.28189973614775726 }, { "epoch": 3.8742304309586633, "grad_norm": 0.2904293628297362, "learning_rate": 0.00047229997035019286, "loss": 3.0500335693359375, "step": 6609, "token_acc": 0.2941685198912775 }, { "epoch": 3.8748167692758724, "grad_norm": 0.26096504833578144, "learning_rate": 0.0004722888835244373, "loss": 3.072169065475464, "step": 6610, "token_acc": 0.2909984600787071 }, { "epoch": 3.875403107593081, "grad_norm": 0.21706438398825628, "learning_rate": 0.00047227779461057716, "loss": 3.1308698654174805, "step": 6611, "token_acc": 0.2819087091180627 }, { "epoch": 3.87598944591029, "grad_norm": 0.2932457600914625, "learning_rate": 0.0004722667036087167, "loss": 3.1182332038879395, "step": 6612, "token_acc": 0.28420195017150923 }, { "epoch": 3.8765757842274993, "grad_norm": 0.2203285559027362, "learning_rate": 0.00047225561051896013, "loss": 3.1260557174682617, "step": 6613, "token_acc": 0.28366002873269447 }, { "epoch": 3.8771621225447084, "grad_norm": 0.2879904457535955, "learning_rate": 0.00047224451534141155, "loss": 3.0877788066864014, "step": 6614, "token_acc": 0.2880016908070393 }, { "epoch": 3.8777484608619175, "grad_norm": 0.2136169308749751, "learning_rate": 0.0004722334180761752, "loss": 3.121661424636841, "step": 6615, "token_acc": 0.2827520355101356 }, { "epoch": 3.878334799179126, "grad_norm": 0.24961190861872978, "learning_rate": 0.00047222231872335544, "loss": 3.0897679328918457, "step": 6616, "token_acc": 0.28644471020922546 }, { "epoch": 3.8789211374963353, "grad_norm": 0.21968356416794158, "learning_rate": 0.00047221121728305634, "loss": 3.0652503967285156, "step": 6617, "token_acc": 0.29132681881292133 }, { "epoch": 3.8795074758135444, "grad_norm": 0.228852570329781, "learning_rate": 0.0004722001137553823, "loss": 3.076927661895752, "step": 6618, "token_acc": 0.28941257527814634 }, { "epoch": 3.8800938141307535, "grad_norm": 0.18276597737584713, "learning_rate": 0.0004721890081404376, "loss": 3.0959906578063965, "step": 6619, "token_acc": 0.28629836094585237 }, { "epoch": 3.8806801524479626, "grad_norm": 0.2107761990609545, "learning_rate": 0.00047217790043832666, "loss": 3.0804977416992188, "step": 6620, "token_acc": 0.28996709778985885 }, { "epoch": 3.8812664907651717, "grad_norm": 0.18060748877366736, "learning_rate": 0.00047216679064915367, "loss": 3.065890312194824, "step": 6621, "token_acc": 0.29160001230542054 }, { "epoch": 3.8818528290823804, "grad_norm": 0.2097903401983441, "learning_rate": 0.00047215567877302307, "loss": 3.116609573364258, "step": 6622, "token_acc": 0.2834456233143385 }, { "epoch": 3.8824391673995895, "grad_norm": 0.2140896843330736, "learning_rate": 0.0004721445648100392, "loss": 3.0928001403808594, "step": 6623, "token_acc": 0.2887632620327886 }, { "epoch": 3.8830255057167986, "grad_norm": 0.23193868868851975, "learning_rate": 0.0004721334487603065, "loss": 3.081294059753418, "step": 6624, "token_acc": 0.2885319060956676 }, { "epoch": 3.8836118440340077, "grad_norm": 0.23821497865142813, "learning_rate": 0.0004721223306239294, "loss": 3.06607985496521, "step": 6625, "token_acc": 0.2908129125140673 }, { "epoch": 3.8841981823512164, "grad_norm": 0.2242050472440741, "learning_rate": 0.00047211121040101236, "loss": 3.0999040603637695, "step": 6626, "token_acc": 0.2860724946250026 }, { "epoch": 3.8847845206684255, "grad_norm": 0.18887521783447783, "learning_rate": 0.0004721000880916597, "loss": 3.111179828643799, "step": 6627, "token_acc": 0.28545246754328646 }, { "epoch": 3.8853708589856346, "grad_norm": 0.26754673878816626, "learning_rate": 0.00047208896369597606, "loss": 3.1077122688293457, "step": 6628, "token_acc": 0.2852491088896631 }, { "epoch": 3.8859571973028437, "grad_norm": 0.1955636962346107, "learning_rate": 0.0004720778372140658, "loss": 3.0622434616088867, "step": 6629, "token_acc": 0.29373639062571494 }, { "epoch": 3.886543535620053, "grad_norm": 0.26239880346720634, "learning_rate": 0.00047206670864603355, "loss": 3.108665704727173, "step": 6630, "token_acc": 0.28626711181814374 }, { "epoch": 3.887129873937262, "grad_norm": 0.25029463222093773, "learning_rate": 0.00047205557799198384, "loss": 3.09226131439209, "step": 6631, "token_acc": 0.2878963485953682 }, { "epoch": 3.887716212254471, "grad_norm": 0.18668712804382437, "learning_rate": 0.00047204444525202115, "loss": 3.0789241790771484, "step": 6632, "token_acc": 0.289724443848765 }, { "epoch": 3.8883025505716797, "grad_norm": 0.21656257362302248, "learning_rate": 0.0004720333104262502, "loss": 3.0601563453674316, "step": 6633, "token_acc": 0.29314690672285004 }, { "epoch": 3.888888888888889, "grad_norm": 0.21055114654414064, "learning_rate": 0.0004720221735147754, "loss": 3.051942825317383, "step": 6634, "token_acc": 0.29334907979394226 }, { "epoch": 3.889475227206098, "grad_norm": 0.22650309723016437, "learning_rate": 0.0004720110345177015, "loss": 3.146289825439453, "step": 6635, "token_acc": 0.28176377157643395 }, { "epoch": 3.890061565523307, "grad_norm": 0.22260194201710884, "learning_rate": 0.00047199989343513313, "loss": 3.05391263961792, "step": 6636, "token_acc": 0.2921983101013782 }, { "epoch": 3.8906479038405157, "grad_norm": 0.2311845208979968, "learning_rate": 0.0004719887502671748, "loss": 3.0894980430603027, "step": 6637, "token_acc": 0.28979457179076873 }, { "epoch": 3.891234242157725, "grad_norm": 0.21589241681931762, "learning_rate": 0.00047197760501393137, "loss": 3.1009857654571533, "step": 6638, "token_acc": 0.2871004505131141 }, { "epoch": 3.891820580474934, "grad_norm": 0.1973022616475723, "learning_rate": 0.0004719664576755075, "loss": 3.087038278579712, "step": 6639, "token_acc": 0.28951127008891764 }, { "epoch": 3.892406918792143, "grad_norm": 0.24438284751710712, "learning_rate": 0.00047195530825200777, "loss": 3.077129364013672, "step": 6640, "token_acc": 0.28843225798055777 }, { "epoch": 3.892993257109352, "grad_norm": 0.22235985272724695, "learning_rate": 0.00047194415674353706, "loss": 3.0899295806884766, "step": 6641, "token_acc": 0.28781617357207934 }, { "epoch": 3.8935795954265613, "grad_norm": 0.21728930185112144, "learning_rate": 0.00047193300315020005, "loss": 3.063229560852051, "step": 6642, "token_acc": 0.2920231135881848 }, { "epoch": 3.8941659337437704, "grad_norm": 0.20445500399529878, "learning_rate": 0.00047192184747210154, "loss": 3.1142172813415527, "step": 6643, "token_acc": 0.28463625994729735 }, { "epoch": 3.894752272060979, "grad_norm": 0.224680959905295, "learning_rate": 0.00047191068970934636, "loss": 3.04136323928833, "step": 6644, "token_acc": 0.2942601804838942 }, { "epoch": 3.895338610378188, "grad_norm": 0.2301327263517826, "learning_rate": 0.0004718995298620392, "loss": 3.056760787963867, "step": 6645, "token_acc": 0.2930707821115284 }, { "epoch": 3.8959249486953973, "grad_norm": 0.2114690610331847, "learning_rate": 0.000471888367930285, "loss": 3.062340259552002, "step": 6646, "token_acc": 0.2911979309039679 }, { "epoch": 3.8965112870126064, "grad_norm": 0.19176960364255283, "learning_rate": 0.00047187720391418864, "loss": 3.077188014984131, "step": 6647, "token_acc": 0.2890829650157272 }, { "epoch": 3.897097625329815, "grad_norm": 0.1934051542163186, "learning_rate": 0.00047186603781385484, "loss": 3.085202693939209, "step": 6648, "token_acc": 0.2897958028135665 }, { "epoch": 3.897683963647024, "grad_norm": 0.20521224089781526, "learning_rate": 0.00047185486962938864, "loss": 3.1122426986694336, "step": 6649, "token_acc": 0.28319009221542046 }, { "epoch": 3.8982703019642333, "grad_norm": 0.23689514530386674, "learning_rate": 0.0004718436993608949, "loss": 3.054687023162842, "step": 6650, "token_acc": 0.29526064294276216 }, { "epoch": 3.8988566402814424, "grad_norm": 0.24640207631283378, "learning_rate": 0.00047183252700847846, "loss": 3.1443819999694824, "step": 6651, "token_acc": 0.2806144912995098 }, { "epoch": 3.8994429785986515, "grad_norm": 0.2172801293456974, "learning_rate": 0.00047182135257224444, "loss": 3.0730905532836914, "step": 6652, "token_acc": 0.2901232624213109 }, { "epoch": 3.9000293169158606, "grad_norm": 0.31618468315127146, "learning_rate": 0.0004718101760522977, "loss": 3.084804058074951, "step": 6653, "token_acc": 0.28772287812826064 }, { "epoch": 3.9006156552330697, "grad_norm": 0.3115187915492716, "learning_rate": 0.00047179899744874323, "loss": 3.061718702316284, "step": 6654, "token_acc": 0.291010838468874 }, { "epoch": 3.9012019935502784, "grad_norm": 0.1972923325538355, "learning_rate": 0.0004717878167616861, "loss": 3.0482168197631836, "step": 6655, "token_acc": 0.2950436670513188 }, { "epoch": 3.9017883318674875, "grad_norm": 0.2406114219717918, "learning_rate": 0.0004717766339912313, "loss": 3.070314884185791, "step": 6656, "token_acc": 0.2913224322146255 }, { "epoch": 3.9023746701846966, "grad_norm": 0.23090866711494307, "learning_rate": 0.00047176544913748374, "loss": 3.109677791595459, "step": 6657, "token_acc": 0.28549114664005915 }, { "epoch": 3.9029610085019057, "grad_norm": 0.20389602364079093, "learning_rate": 0.0004717542622005487, "loss": 3.088420867919922, "step": 6658, "token_acc": 0.2884563597339057 }, { "epoch": 3.9035473468191144, "grad_norm": 0.22644741911959915, "learning_rate": 0.00047174307318053124, "loss": 3.0168306827545166, "step": 6659, "token_acc": 0.2983666408414081 }, { "epoch": 3.9041336851363235, "grad_norm": 0.18561159863861174, "learning_rate": 0.00047173188207753637, "loss": 3.0381031036376953, "step": 6660, "token_acc": 0.2962874368690301 }, { "epoch": 3.9047200234535326, "grad_norm": 0.22997217115589738, "learning_rate": 0.0004717206888916693, "loss": 3.058438777923584, "step": 6661, "token_acc": 0.29140573555248084 }, { "epoch": 3.9053063617707418, "grad_norm": 0.1961829691021727, "learning_rate": 0.00047170949362303503, "loss": 3.084615707397461, "step": 6662, "token_acc": 0.2887559142777623 }, { "epoch": 3.905892700087951, "grad_norm": 0.24062653256321054, "learning_rate": 0.00047169829627173885, "loss": 3.0795886516571045, "step": 6663, "token_acc": 0.2888947914905947 }, { "epoch": 3.90647903840516, "grad_norm": 0.19869335816548647, "learning_rate": 0.000471687096837886, "loss": 3.0801472663879395, "step": 6664, "token_acc": 0.291593985074664 }, { "epoch": 3.9070653767223686, "grad_norm": 0.1844416488017622, "learning_rate": 0.00047167589532158153, "loss": 3.0913257598876953, "step": 6665, "token_acc": 0.2882596024650267 }, { "epoch": 3.9076517150395778, "grad_norm": 0.19289318339101477, "learning_rate": 0.0004716646917229308, "loss": 3.097916841506958, "step": 6666, "token_acc": 0.2879944866335058 }, { "epoch": 3.908238053356787, "grad_norm": 0.19245358137353094, "learning_rate": 0.00047165348604203897, "loss": 3.0841991901397705, "step": 6667, "token_acc": 0.2881381636104728 }, { "epoch": 3.908824391673996, "grad_norm": 0.20040711571177733, "learning_rate": 0.00047164227827901125, "loss": 3.0799715518951416, "step": 6668, "token_acc": 0.28834200243252334 }, { "epoch": 3.909410729991205, "grad_norm": 0.19668353139336017, "learning_rate": 0.00047163106843395303, "loss": 3.07839298248291, "step": 6669, "token_acc": 0.2892456514587114 }, { "epoch": 3.9099970683084138, "grad_norm": 0.1986881082462936, "learning_rate": 0.00047161985650696957, "loss": 3.1098709106445312, "step": 6670, "token_acc": 0.2855512325762848 }, { "epoch": 3.910583406625623, "grad_norm": 0.17520790625033966, "learning_rate": 0.0004716086424981662, "loss": 3.1099987030029297, "step": 6671, "token_acc": 0.28613849039545675 }, { "epoch": 3.911169744942832, "grad_norm": 0.18384721935889844, "learning_rate": 0.00047159742640764826, "loss": 3.0757803916931152, "step": 6672, "token_acc": 0.2894037955781979 }, { "epoch": 3.911756083260041, "grad_norm": 0.18310671805957543, "learning_rate": 0.00047158620823552113, "loss": 3.096815586090088, "step": 6673, "token_acc": 0.28833197792543713 }, { "epoch": 3.91234242157725, "grad_norm": 0.19080713888393488, "learning_rate": 0.00047157498798189014, "loss": 3.0587055683135986, "step": 6674, "token_acc": 0.2923268690791718 }, { "epoch": 3.9129287598944593, "grad_norm": 0.1926956903404848, "learning_rate": 0.00047156376564686073, "loss": 3.0705649852752686, "step": 6675, "token_acc": 0.2913416011881339 }, { "epoch": 3.913515098211668, "grad_norm": 0.17238272652262301, "learning_rate": 0.0004715525412305383, "loss": 3.089484691619873, "step": 6676, "token_acc": 0.28848837357619356 }, { "epoch": 3.914101436528877, "grad_norm": 0.17690938059305153, "learning_rate": 0.0004715413147330282, "loss": 3.0427112579345703, "step": 6677, "token_acc": 0.29562446132569 }, { "epoch": 3.914687774846086, "grad_norm": 0.19549704463123602, "learning_rate": 0.000471530086154436, "loss": 3.02778697013855, "step": 6678, "token_acc": 0.29731599927329466 }, { "epoch": 3.9152741131632953, "grad_norm": 0.20729477990740128, "learning_rate": 0.00047151885549486726, "loss": 3.084157943725586, "step": 6679, "token_acc": 0.28948299294398006 }, { "epoch": 3.915860451480504, "grad_norm": 0.23673077945689236, "learning_rate": 0.00047150762275442737, "loss": 3.0899147987365723, "step": 6680, "token_acc": 0.2878961967140095 }, { "epoch": 3.916446789797713, "grad_norm": 0.267893165845178, "learning_rate": 0.0004714963879332218, "loss": 3.0904488563537598, "step": 6681, "token_acc": 0.2869433251622813 }, { "epoch": 3.917033128114922, "grad_norm": 0.23562682574382318, "learning_rate": 0.00047148515103135615, "loss": 3.1031858921051025, "step": 6682, "token_acc": 0.28527509060260176 }, { "epoch": 3.9176194664321313, "grad_norm": 0.19066435141456595, "learning_rate": 0.00047147391204893597, "loss": 3.1164937019348145, "step": 6683, "token_acc": 0.2850263950135685 }, { "epoch": 3.9182058047493404, "grad_norm": 0.16830948674467125, "learning_rate": 0.00047146267098606675, "loss": 3.1011576652526855, "step": 6684, "token_acc": 0.28734436867432916 }, { "epoch": 3.9187921430665495, "grad_norm": 0.18699898519794278, "learning_rate": 0.0004714514278428542, "loss": 3.0827369689941406, "step": 6685, "token_acc": 0.28942741818030504 }, { "epoch": 3.9193784813837587, "grad_norm": 0.1932032427962044, "learning_rate": 0.000471440182619404, "loss": 3.0541751384735107, "step": 6686, "token_acc": 0.29583533814488105 }, { "epoch": 3.9199648197009673, "grad_norm": 0.20142223281160146, "learning_rate": 0.00047142893531582156, "loss": 3.0695598125457764, "step": 6687, "token_acc": 0.29027564885297863 }, { "epoch": 3.9205511580181764, "grad_norm": 0.2168516087780529, "learning_rate": 0.0004714176859322127, "loss": 3.0429539680480957, "step": 6688, "token_acc": 0.29459460830825906 }, { "epoch": 3.9211374963353856, "grad_norm": 0.27909404390062775, "learning_rate": 0.00047140643446868304, "loss": 3.0664031505584717, "step": 6689, "token_acc": 0.29037023701099274 }, { "epoch": 3.9217238346525947, "grad_norm": 0.2264269048157196, "learning_rate": 0.00047139518092533824, "loss": 3.0987868309020996, "step": 6690, "token_acc": 0.28385851823877245 }, { "epoch": 3.9223101729698033, "grad_norm": 0.1750092583580957, "learning_rate": 0.00047138392530228404, "loss": 3.110069751739502, "step": 6691, "token_acc": 0.28680917373813397 }, { "epoch": 3.9228965112870124, "grad_norm": 0.2504495721367425, "learning_rate": 0.00047137266759962626, "loss": 3.111448287963867, "step": 6692, "token_acc": 0.28527862419322947 }, { "epoch": 3.9234828496042216, "grad_norm": 0.21087969825901873, "learning_rate": 0.0004713614078174705, "loss": 3.0733635425567627, "step": 6693, "token_acc": 0.28992635461949007 }, { "epoch": 3.9240691879214307, "grad_norm": 0.23317930476711, "learning_rate": 0.00047135014595592263, "loss": 3.0887603759765625, "step": 6694, "token_acc": 0.2877039509654398 }, { "epoch": 3.92465552623864, "grad_norm": 0.29169340309731334, "learning_rate": 0.00047133888201508837, "loss": 3.1063990592956543, "step": 6695, "token_acc": 0.2845426268012442 }, { "epoch": 3.925241864555849, "grad_norm": 0.19998608384548078, "learning_rate": 0.0004713276159950737, "loss": 3.0589139461517334, "step": 6696, "token_acc": 0.29228350170997286 }, { "epoch": 3.925828202873058, "grad_norm": 0.21339411901816419, "learning_rate": 0.0004713163478959842, "loss": 3.0778064727783203, "step": 6697, "token_acc": 0.289695921062147 }, { "epoch": 3.9264145411902667, "grad_norm": 0.2408294773973168, "learning_rate": 0.00047130507771792583, "loss": 3.0697922706604004, "step": 6698, "token_acc": 0.28958039007027137 }, { "epoch": 3.927000879507476, "grad_norm": 0.1901120536550186, "learning_rate": 0.00047129380546100455, "loss": 3.0792102813720703, "step": 6699, "token_acc": 0.2903775738999856 }, { "epoch": 3.927587217824685, "grad_norm": 0.20833930039569232, "learning_rate": 0.00047128253112532607, "loss": 3.0582709312438965, "step": 6700, "token_acc": 0.2941672634678125 }, { "epoch": 3.928173556141894, "grad_norm": 0.19728328258371358, "learning_rate": 0.0004712712547109965, "loss": 3.1104788780212402, "step": 6701, "token_acc": 0.2825135660142311 }, { "epoch": 3.9287598944591027, "grad_norm": 0.1864014634407815, "learning_rate": 0.00047125997621812155, "loss": 3.1384544372558594, "step": 6702, "token_acc": 0.2813310686017579 }, { "epoch": 3.929346232776312, "grad_norm": 0.19204618316889274, "learning_rate": 0.0004712486956468073, "loss": 3.1339359283447266, "step": 6703, "token_acc": 0.28313880447971135 }, { "epoch": 3.929932571093521, "grad_norm": 0.19061345588990544, "learning_rate": 0.0004712374129971598, "loss": 3.090369701385498, "step": 6704, "token_acc": 0.2881379738800427 }, { "epoch": 3.93051890941073, "grad_norm": 0.18025205903266708, "learning_rate": 0.0004712261282692848, "loss": 3.0653324127197266, "step": 6705, "token_acc": 0.29257056292103645 }, { "epoch": 3.931105247727939, "grad_norm": 0.18393953680834926, "learning_rate": 0.0004712148414632885, "loss": 3.0820493698120117, "step": 6706, "token_acc": 0.2885569581201635 }, { "epoch": 3.9316915860451482, "grad_norm": 0.17902241570905314, "learning_rate": 0.00047120355257927683, "loss": 3.0954947471618652, "step": 6707, "token_acc": 0.28560615682909823 }, { "epoch": 3.9322779243623573, "grad_norm": 0.2072126774081727, "learning_rate": 0.00047119226161735587, "loss": 3.1053380966186523, "step": 6708, "token_acc": 0.28607547595139043 }, { "epoch": 3.932864262679566, "grad_norm": 0.22218547408543435, "learning_rate": 0.0004711809685776316, "loss": 3.085787773132324, "step": 6709, "token_acc": 0.28791855027784613 }, { "epoch": 3.933450600996775, "grad_norm": 0.22864912477353647, "learning_rate": 0.0004711696734602103, "loss": 3.1162140369415283, "step": 6710, "token_acc": 0.2849529153529111 }, { "epoch": 3.9340369393139842, "grad_norm": 0.2284841786202647, "learning_rate": 0.0004711583762651979, "loss": 3.09869122505188, "step": 6711, "token_acc": 0.28728169667766984 }, { "epoch": 3.9346232776311933, "grad_norm": 0.21334651403748614, "learning_rate": 0.00047114707699270057, "loss": 3.0433740615844727, "step": 6712, "token_acc": 0.2946870030729585 }, { "epoch": 3.935209615948402, "grad_norm": 0.18712087450012704, "learning_rate": 0.0004711357756428244, "loss": 3.052543878555298, "step": 6713, "token_acc": 0.2936395098920748 }, { "epoch": 3.935795954265611, "grad_norm": 0.20664953464546496, "learning_rate": 0.0004711244722156756, "loss": 3.1054911613464355, "step": 6714, "token_acc": 0.286683716236779 }, { "epoch": 3.9363822925828202, "grad_norm": 0.27965961443222476, "learning_rate": 0.0004711131667113604, "loss": 3.066488742828369, "step": 6715, "token_acc": 0.290917580497182 }, { "epoch": 3.9369686309000294, "grad_norm": 0.3294615803960452, "learning_rate": 0.00047110185912998496, "loss": 3.098814010620117, "step": 6716, "token_acc": 0.2860231841067874 }, { "epoch": 3.9375549692172385, "grad_norm": 0.30108595466984267, "learning_rate": 0.0004710905494716555, "loss": 3.068279266357422, "step": 6717, "token_acc": 0.29092357680394354 }, { "epoch": 3.9381413075344476, "grad_norm": 0.21202038541135715, "learning_rate": 0.0004710792377364782, "loss": 3.126347780227661, "step": 6718, "token_acc": 0.28441802007287337 }, { "epoch": 3.9387276458516562, "grad_norm": 0.2818223415135372, "learning_rate": 0.0004710679239245594, "loss": 3.0310590267181396, "step": 6719, "token_acc": 0.2964558670247819 }, { "epoch": 3.9393139841688654, "grad_norm": 0.21207809280685008, "learning_rate": 0.0004710566080360053, "loss": 3.0929980278015137, "step": 6720, "token_acc": 0.2853044711816319 }, { "epoch": 3.9399003224860745, "grad_norm": 0.264754360033794, "learning_rate": 0.0004710452900709223, "loss": 3.1281919479370117, "step": 6721, "token_acc": 0.2829935386068905 }, { "epoch": 3.9404866608032836, "grad_norm": 0.2337898588152632, "learning_rate": 0.0004710339700294167, "loss": 3.1238808631896973, "step": 6722, "token_acc": 0.28449085426296733 }, { "epoch": 3.9410729991204922, "grad_norm": 0.2607217411675205, "learning_rate": 0.00047102264791159474, "loss": 3.078977108001709, "step": 6723, "token_acc": 0.29015102897598416 }, { "epoch": 3.9416593374377014, "grad_norm": 0.22530027258467475, "learning_rate": 0.0004710113237175628, "loss": 3.1104190349578857, "step": 6724, "token_acc": 0.2849929465203283 }, { "epoch": 3.9422456757549105, "grad_norm": 0.21371778336103323, "learning_rate": 0.00047099999744742733, "loss": 3.0787291526794434, "step": 6725, "token_acc": 0.2910668828515029 }, { "epoch": 3.9428320140721196, "grad_norm": 0.2018423944719916, "learning_rate": 0.0004709886691012947, "loss": 3.065816879272461, "step": 6726, "token_acc": 0.29119826014848194 }, { "epoch": 3.9434183523893287, "grad_norm": 0.16915372324712086, "learning_rate": 0.0004709773386792713, "loss": 3.109806537628174, "step": 6727, "token_acc": 0.2851704836183772 }, { "epoch": 3.944004690706538, "grad_norm": 0.18813686835216847, "learning_rate": 0.00047096600618146356, "loss": 3.0517992973327637, "step": 6728, "token_acc": 0.29324062791197764 }, { "epoch": 3.944591029023747, "grad_norm": 0.19255171692010578, "learning_rate": 0.000470954671607978, "loss": 3.0422275066375732, "step": 6729, "token_acc": 0.2946406188215409 }, { "epoch": 3.9451773673409556, "grad_norm": 0.20497771430921866, "learning_rate": 0.00047094333495892093, "loss": 3.0861008167266846, "step": 6730, "token_acc": 0.28956807012539065 }, { "epoch": 3.9457637056581647, "grad_norm": 0.1743050333160798, "learning_rate": 0.0004709319962343991, "loss": 3.0805163383483887, "step": 6731, "token_acc": 0.2893749725706644 }, { "epoch": 3.946350043975374, "grad_norm": 0.20666380378787028, "learning_rate": 0.0004709206554345188, "loss": 3.0975799560546875, "step": 6732, "token_acc": 0.28546363650054973 }, { "epoch": 3.946936382292583, "grad_norm": 0.217184219916497, "learning_rate": 0.0004709093125593866, "loss": 3.103559732437134, "step": 6733, "token_acc": 0.2869196325821773 }, { "epoch": 3.9475227206097916, "grad_norm": 0.18637974034769236, "learning_rate": 0.0004708979676091091, "loss": 3.063138008117676, "step": 6734, "token_acc": 0.2917454825552763 }, { "epoch": 3.9481090589270007, "grad_norm": 0.2253756441975982, "learning_rate": 0.0004708866205837929, "loss": 3.0463500022888184, "step": 6735, "token_acc": 0.2941732241669874 }, { "epoch": 3.94869539724421, "grad_norm": 0.18825481872750727, "learning_rate": 0.0004708752714835445, "loss": 3.06581974029541, "step": 6736, "token_acc": 0.2916749156069951 }, { "epoch": 3.949281735561419, "grad_norm": 0.20139931050227008, "learning_rate": 0.00047086392030847057, "loss": 3.0891823768615723, "step": 6737, "token_acc": 0.28980619062276103 }, { "epoch": 3.949868073878628, "grad_norm": 0.21386398880494314, "learning_rate": 0.0004708525670586778, "loss": 3.1262433528900146, "step": 6738, "token_acc": 0.2829819986005979 }, { "epoch": 3.950454412195837, "grad_norm": 0.22337405754738288, "learning_rate": 0.0004708412117342727, "loss": 3.0991740226745605, "step": 6739, "token_acc": 0.2864382899618747 }, { "epoch": 3.9510407505130463, "grad_norm": 0.23147787570889594, "learning_rate": 0.000470829854335362, "loss": 3.1200380325317383, "step": 6740, "token_acc": 0.28283799903555745 }, { "epoch": 3.951627088830255, "grad_norm": 0.22148111901866932, "learning_rate": 0.0004708184948620524, "loss": 3.125523805618286, "step": 6741, "token_acc": 0.2820963581649573 }, { "epoch": 3.952213427147464, "grad_norm": 0.21309266386302453, "learning_rate": 0.0004708071333144506, "loss": 3.0885539054870605, "step": 6742, "token_acc": 0.2885979125457683 }, { "epoch": 3.952799765464673, "grad_norm": 0.3642746592695076, "learning_rate": 0.00047079576969266337, "loss": 3.0962767601013184, "step": 6743, "token_acc": 0.28905832836008416 }, { "epoch": 3.9533861037818823, "grad_norm": 0.4193620614459352, "learning_rate": 0.00047078440399679736, "loss": 3.120298385620117, "step": 6744, "token_acc": 0.28218164521840233 }, { "epoch": 3.953972442099091, "grad_norm": 0.19240679293953977, "learning_rate": 0.0004707730362269593, "loss": 3.061475992202759, "step": 6745, "token_acc": 0.2909522862721706 }, { "epoch": 3.9545587804163, "grad_norm": 0.32427776247222884, "learning_rate": 0.0004707616663832562, "loss": 3.0943586826324463, "step": 6746, "token_acc": 0.28898758270642616 }, { "epoch": 3.955145118733509, "grad_norm": 0.24097182711340676, "learning_rate": 0.00047075029446579466, "loss": 3.089096784591675, "step": 6747, "token_acc": 0.2896286352988994 }, { "epoch": 3.9557314570507183, "grad_norm": 0.24937544750280952, "learning_rate": 0.00047073892047468156, "loss": 3.0960211753845215, "step": 6748, "token_acc": 0.28681687376503506 }, { "epoch": 3.9563177953679274, "grad_norm": 0.16521302171777708, "learning_rate": 0.00047072754441002373, "loss": 3.0952634811401367, "step": 6749, "token_acc": 0.28801745642686727 }, { "epoch": 3.9569041336851365, "grad_norm": 0.24804867504687367, "learning_rate": 0.0004707161662719281, "loss": 3.0993199348449707, "step": 6750, "token_acc": 0.28588505061618585 }, { "epoch": 3.9574904720023456, "grad_norm": 0.21366757986462223, "learning_rate": 0.00047070478606050153, "loss": 3.1269121170043945, "step": 6751, "token_acc": 0.28364530085751855 }, { "epoch": 3.9580768103195543, "grad_norm": 0.23719457887400647, "learning_rate": 0.0004706934037758509, "loss": 3.071544885635376, "step": 6752, "token_acc": 0.2888691991334231 }, { "epoch": 3.9586631486367634, "grad_norm": 0.18695313419638268, "learning_rate": 0.00047068201941808307, "loss": 3.0645318031311035, "step": 6753, "token_acc": 0.29317200246795094 }, { "epoch": 3.9592494869539725, "grad_norm": 0.20038386258425406, "learning_rate": 0.00047067063298730506, "loss": 3.0458388328552246, "step": 6754, "token_acc": 0.29502805390824094 }, { "epoch": 3.9598358252711816, "grad_norm": 0.22383552498805998, "learning_rate": 0.00047065924448362384, "loss": 3.119743824005127, "step": 6755, "token_acc": 0.28255059594370324 }, { "epoch": 3.9604221635883903, "grad_norm": 0.2151942809580209, "learning_rate": 0.00047064785390714636, "loss": 3.0881083011627197, "step": 6756, "token_acc": 0.2888687023967577 }, { "epoch": 3.9610085019055994, "grad_norm": 0.221555034772601, "learning_rate": 0.0004706364612579796, "loss": 3.0781450271606445, "step": 6757, "token_acc": 0.2892811857479881 }, { "epoch": 3.9615948402228085, "grad_norm": 0.194438632915114, "learning_rate": 0.0004706250665362306, "loss": 3.048779010772705, "step": 6758, "token_acc": 0.29320898150257885 }, { "epoch": 3.9621811785400176, "grad_norm": 0.23622624132996456, "learning_rate": 0.00047061366974200636, "loss": 3.067814826965332, "step": 6759, "token_acc": 0.2906316948230569 }, { "epoch": 3.9627675168572267, "grad_norm": 0.2005839072078433, "learning_rate": 0.0004706022708754141, "loss": 3.0776000022888184, "step": 6760, "token_acc": 0.28964207366956557 }, { "epoch": 3.963353855174436, "grad_norm": 0.25833832365958964, "learning_rate": 0.0004705908699365606, "loss": 3.0757744312286377, "step": 6761, "token_acc": 0.2906780718451912 }, { "epoch": 3.963940193491645, "grad_norm": 0.20596521905517404, "learning_rate": 0.0004705794669255532, "loss": 3.0538580417633057, "step": 6762, "token_acc": 0.2933118959273293 }, { "epoch": 3.9645265318088536, "grad_norm": 0.2781311027725708, "learning_rate": 0.00047056806184249893, "loss": 3.0851030349731445, "step": 6763, "token_acc": 0.289389573242414 }, { "epoch": 3.9651128701260627, "grad_norm": 0.26257372782398836, "learning_rate": 0.00047055665468750496, "loss": 3.0718703269958496, "step": 6764, "token_acc": 0.2888256227758007 }, { "epoch": 3.965699208443272, "grad_norm": 0.23733675173602187, "learning_rate": 0.0004705452454606785, "loss": 3.0409162044525146, "step": 6765, "token_acc": 0.29440663558655356 }, { "epoch": 3.966285546760481, "grad_norm": 0.22161716716514832, "learning_rate": 0.00047053383416212645, "loss": 3.015023708343506, "step": 6766, "token_acc": 0.2993078864517472 }, { "epoch": 3.9668718850776896, "grad_norm": 0.22152293597944567, "learning_rate": 0.0004705224207919564, "loss": 3.107854127883911, "step": 6767, "token_acc": 0.2845793659098043 }, { "epoch": 3.9674582233948987, "grad_norm": 0.24453977971760799, "learning_rate": 0.00047051100535027524, "loss": 3.119394302368164, "step": 6768, "token_acc": 0.28394418300453217 }, { "epoch": 3.968044561712108, "grad_norm": 0.21700064635332458, "learning_rate": 0.00047049958783719037, "loss": 3.1139888763427734, "step": 6769, "token_acc": 0.28529597459994127 }, { "epoch": 3.968630900029317, "grad_norm": 0.24974585073482308, "learning_rate": 0.00047048816825280906, "loss": 3.0812134742736816, "step": 6770, "token_acc": 0.2896397440969049 }, { "epoch": 3.969217238346526, "grad_norm": 0.22022902088480992, "learning_rate": 0.0004704767465972384, "loss": 3.0303401947021484, "step": 6771, "token_acc": 0.2948009923417107 }, { "epoch": 3.969803576663735, "grad_norm": 0.22068937344967937, "learning_rate": 0.00047046532287058583, "loss": 3.0402743816375732, "step": 6772, "token_acc": 0.2952332460224559 }, { "epoch": 3.970389914980944, "grad_norm": 0.22039579356326613, "learning_rate": 0.00047045389707295874, "loss": 3.080740213394165, "step": 6773, "token_acc": 0.28981163513705727 }, { "epoch": 3.970976253298153, "grad_norm": 0.2606676557694435, "learning_rate": 0.0004704424692044642, "loss": 3.03606915473938, "step": 6774, "token_acc": 0.29545551772471396 }, { "epoch": 3.971562591615362, "grad_norm": 0.21043388557794823, "learning_rate": 0.0004704310392652098, "loss": 3.0216474533081055, "step": 6775, "token_acc": 0.29878209977201997 }, { "epoch": 3.972148929932571, "grad_norm": 0.25791143071491207, "learning_rate": 0.00047041960725530287, "loss": 3.078902244567871, "step": 6776, "token_acc": 0.290062340518225 }, { "epoch": 3.97273526824978, "grad_norm": 0.19272324955183281, "learning_rate": 0.0004704081731748506, "loss": 3.126561403274536, "step": 6777, "token_acc": 0.28126656033584685 }, { "epoch": 3.973321606566989, "grad_norm": 0.29041820023243975, "learning_rate": 0.0004703967370239607, "loss": 3.0903172492980957, "step": 6778, "token_acc": 0.2903547974345865 }, { "epoch": 3.973907944884198, "grad_norm": 0.19181091816741253, "learning_rate": 0.0004703852988027404, "loss": 3.0639896392822266, "step": 6779, "token_acc": 0.29090201209321787 }, { "epoch": 3.974494283201407, "grad_norm": 0.23078965544665764, "learning_rate": 0.0004703738585112971, "loss": 3.1098828315734863, "step": 6780, "token_acc": 0.28441182772659096 }, { "epoch": 3.9750806215186163, "grad_norm": 0.17526521185359176, "learning_rate": 0.0004703624161497384, "loss": 3.049208641052246, "step": 6781, "token_acc": 0.2939239083220482 }, { "epoch": 3.9756669598358254, "grad_norm": 0.25784573234595515, "learning_rate": 0.0004703509717181718, "loss": 3.161747455596924, "step": 6782, "token_acc": 0.2798242044020569 }, { "epoch": 3.9762532981530345, "grad_norm": 0.19943207011031283, "learning_rate": 0.0004703395252167048, "loss": 3.075469493865967, "step": 6783, "token_acc": 0.28981551131825267 }, { "epoch": 3.976839636470243, "grad_norm": 0.23025895079809544, "learning_rate": 0.00047032807664544477, "loss": 3.0728116035461426, "step": 6784, "token_acc": 0.2889932158389606 }, { "epoch": 3.9774259747874523, "grad_norm": 0.2037055254242142, "learning_rate": 0.0004703166260044993, "loss": 3.1167564392089844, "step": 6785, "token_acc": 0.28381103058487617 }, { "epoch": 3.9780123131046614, "grad_norm": 0.23559765457279833, "learning_rate": 0.0004703051732939761, "loss": 3.0963549613952637, "step": 6786, "token_acc": 0.2870102681686771 }, { "epoch": 3.9785986514218705, "grad_norm": 0.21976918916749097, "learning_rate": 0.0004702937185139826, "loss": 3.068171977996826, "step": 6787, "token_acc": 0.28993141390446336 }, { "epoch": 3.979184989739079, "grad_norm": 0.2061312057577153, "learning_rate": 0.0004702822616646265, "loss": 3.087235927581787, "step": 6788, "token_acc": 0.28833241505484625 }, { "epoch": 3.9797713280562883, "grad_norm": 0.22580428526740257, "learning_rate": 0.0004702708027460154, "loss": 3.0525827407836914, "step": 6789, "token_acc": 0.29281732523030274 }, { "epoch": 3.9803576663734974, "grad_norm": 0.19661689762957427, "learning_rate": 0.00047025934175825695, "loss": 3.065704822540283, "step": 6790, "token_acc": 0.29177415572232646 }, { "epoch": 3.9809440046907065, "grad_norm": 0.19885967618202607, "learning_rate": 0.0004702478787014588, "loss": 3.0729615688323975, "step": 6791, "token_acc": 0.2898957642327846 }, { "epoch": 3.9815303430079156, "grad_norm": 0.21685628523747688, "learning_rate": 0.00047023641357572853, "loss": 3.087904453277588, "step": 6792, "token_acc": 0.2873452241358908 }, { "epoch": 3.9821166813251248, "grad_norm": 0.19689846946269074, "learning_rate": 0.000470224946381174, "loss": 3.068155288696289, "step": 6793, "token_acc": 0.2914459558946129 }, { "epoch": 3.982703019642334, "grad_norm": 0.18966152394711594, "learning_rate": 0.0004702134771179028, "loss": 3.107975959777832, "step": 6794, "token_acc": 0.2882968528385786 }, { "epoch": 3.9832893579595425, "grad_norm": 0.20914988185962483, "learning_rate": 0.0004702020057860228, "loss": 3.064915180206299, "step": 6795, "token_acc": 0.2908122308983434 }, { "epoch": 3.9838756962767516, "grad_norm": 0.1944886030313796, "learning_rate": 0.0004701905323856416, "loss": 3.0708730220794678, "step": 6796, "token_acc": 0.29118811358598123 }, { "epoch": 3.9844620345939608, "grad_norm": 0.2371724988431959, "learning_rate": 0.0004701790569168671, "loss": 3.069450855255127, "step": 6797, "token_acc": 0.2915478681309151 }, { "epoch": 3.98504837291117, "grad_norm": 0.22705857027517132, "learning_rate": 0.00047016757937980706, "loss": 3.084702491760254, "step": 6798, "token_acc": 0.29040560833043855 }, { "epoch": 3.9856347112283785, "grad_norm": 0.2573477318946264, "learning_rate": 0.00047015609977456925, "loss": 3.1077802181243896, "step": 6799, "token_acc": 0.2852639290370868 }, { "epoch": 3.9862210495455876, "grad_norm": 0.2162666094342554, "learning_rate": 0.0004701446181012615, "loss": 3.0677242279052734, "step": 6800, "token_acc": 0.2913358303465076 }, { "epoch": 3.9868073878627968, "grad_norm": 0.2195683330686813, "learning_rate": 0.00047013313435999185, "loss": 3.1398673057556152, "step": 6801, "token_acc": 0.28236704127812573 }, { "epoch": 3.987393726180006, "grad_norm": 0.1856475485866107, "learning_rate": 0.00047012164855086795, "loss": 3.1162350177764893, "step": 6802, "token_acc": 0.2846689500183677 }, { "epoch": 3.987980064497215, "grad_norm": 0.20089717677320684, "learning_rate": 0.0004701101606739978, "loss": 3.0914692878723145, "step": 6803, "token_acc": 0.2884379928890315 }, { "epoch": 3.988566402814424, "grad_norm": 0.2482854516135699, "learning_rate": 0.0004700986707294893, "loss": 3.0586884021759033, "step": 6804, "token_acc": 0.2918337332610883 }, { "epoch": 3.989152741131633, "grad_norm": 0.20052617053776917, "learning_rate": 0.0004700871787174504, "loss": 2.9862594604492188, "step": 6805, "token_acc": 0.30325846234855014 }, { "epoch": 3.989739079448842, "grad_norm": 0.20058594257541457, "learning_rate": 0.000470075684637989, "loss": 3.0808024406433105, "step": 6806, "token_acc": 0.2891893821921162 }, { "epoch": 3.990325417766051, "grad_norm": 0.16283201469828032, "learning_rate": 0.00047006418849121305, "loss": 3.0789926052093506, "step": 6807, "token_acc": 0.2884958781558867 }, { "epoch": 3.99091175608326, "grad_norm": 0.204195383943405, "learning_rate": 0.00047005269027723066, "loss": 3.13724422454834, "step": 6808, "token_acc": 0.28270051005225816 }, { "epoch": 3.991498094400469, "grad_norm": 0.18817203242199967, "learning_rate": 0.00047004118999614976, "loss": 3.068647861480713, "step": 6809, "token_acc": 0.2913964300063135 }, { "epoch": 3.992084432717678, "grad_norm": 0.23832667940108215, "learning_rate": 0.00047002968764807835, "loss": 3.067481279373169, "step": 6810, "token_acc": 0.2912233357989168 }, { "epoch": 3.992670771034887, "grad_norm": 0.26409117100894863, "learning_rate": 0.0004700181832331245, "loss": 3.0568323135375977, "step": 6811, "token_acc": 0.29265277608250206 }, { "epoch": 3.993257109352096, "grad_norm": 0.20437327768526597, "learning_rate": 0.00047000667675139643, "loss": 3.0584967136383057, "step": 6812, "token_acc": 0.2944079284149313 }, { "epoch": 3.993843447669305, "grad_norm": 0.20678632204099828, "learning_rate": 0.00046999516820300194, "loss": 3.0637078285217285, "step": 6813, "token_acc": 0.2916622487955375 }, { "epoch": 3.9944297859865143, "grad_norm": 0.22612083609515282, "learning_rate": 0.0004699836575880494, "loss": 3.064908742904663, "step": 6814, "token_acc": 0.2922555640035923 }, { "epoch": 3.9950161243037234, "grad_norm": 0.1843691557535468, "learning_rate": 0.0004699721449066468, "loss": 3.0699219703674316, "step": 6815, "token_acc": 0.2931559206413617 }, { "epoch": 3.9956024626209325, "grad_norm": 0.19022653115325808, "learning_rate": 0.0004699606301589022, "loss": 3.0397629737854004, "step": 6816, "token_acc": 0.2974636737121637 }, { "epoch": 3.996188800938141, "grad_norm": 0.21282574312559707, "learning_rate": 0.000469949113344924, "loss": 3.0314745903015137, "step": 6817, "token_acc": 0.29501121538596625 }, { "epoch": 3.9967751392553503, "grad_norm": 0.21333608177095004, "learning_rate": 0.0004699375944648203, "loss": 3.09308123588562, "step": 6818, "token_acc": 0.2859053517283609 }, { "epoch": 3.9973614775725594, "grad_norm": 0.19307933035407412, "learning_rate": 0.0004699260735186992, "loss": 3.114429473876953, "step": 6819, "token_acc": 0.28392725876489444 }, { "epoch": 3.9979478158897686, "grad_norm": 0.2531383379432577, "learning_rate": 0.000469914550506669, "loss": 3.096020221710205, "step": 6820, "token_acc": 0.2849527158987264 }, { "epoch": 3.998534154206977, "grad_norm": 0.22700210949037603, "learning_rate": 0.0004699030254288379, "loss": 3.0789248943328857, "step": 6821, "token_acc": 0.2893205798244749 }, { "epoch": 3.9991204925241863, "grad_norm": 0.2120301985441921, "learning_rate": 0.0004698914982853142, "loss": 3.077389717102051, "step": 6822, "token_acc": 0.2903556090918613 }, { "epoch": 3.9997068308413954, "grad_norm": 0.24120719983359898, "learning_rate": 0.0004698799690762063, "loss": 3.0784013271331787, "step": 6823, "token_acc": 0.29167256869966407 }, { "epoch": 4.0, "grad_norm": 0.2698352357714906, "learning_rate": 0.00046986843780162223, "loss": 3.1038906574249268, "step": 6824, "token_acc": 0.2831471489728196 }, { "epoch": 4.0, "eval_loss": 3.089488983154297, "eval_runtime": 6.5439, "eval_samples_per_second": 39.121, "eval_steps_per_second": 4.89, "eval_token_acc": 0.2882056610713456, "step": 6824 }, { "epoch": 4.000586338317209, "grad_norm": 0.22260690071440387, "learning_rate": 0.0004698569044616706, "loss": 3.0069327354431152, "step": 6825, "token_acc": 0.298171431771723 }, { "epoch": 4.001172676634418, "grad_norm": 0.2655844088085972, "learning_rate": 0.00046984536905645953, "loss": 3.030156135559082, "step": 6826, "token_acc": 0.2951415601891875 }, { "epoch": 4.001759014951627, "grad_norm": 0.268079784754666, "learning_rate": 0.00046983383158609747, "loss": 2.9917855262756348, "step": 6827, "token_acc": 0.3012787279564827 }, { "epoch": 4.0023453532688364, "grad_norm": 0.20488151570251312, "learning_rate": 0.0004698222920506928, "loss": 3.061344623565674, "step": 6828, "token_acc": 0.29035067229963035 }, { "epoch": 4.002931691586046, "grad_norm": 0.23190414435654447, "learning_rate": 0.0004698107504503539, "loss": 3.0224814414978027, "step": 6829, "token_acc": 0.2965082577122922 }, { "epoch": 4.003518029903254, "grad_norm": 0.2534718398324014, "learning_rate": 0.0004697992067851893, "loss": 2.957138776779175, "step": 6830, "token_acc": 0.3075330827729512 }, { "epoch": 4.004104368220463, "grad_norm": 0.18137690513984497, "learning_rate": 0.0004697876610553072, "loss": 2.981412649154663, "step": 6831, "token_acc": 0.30177488602664543 }, { "epoch": 4.004690706537672, "grad_norm": 0.22079347468450836, "learning_rate": 0.00046977611326081624, "loss": 3.004589080810547, "step": 6832, "token_acc": 0.29844672013202495 }, { "epoch": 4.005277044854881, "grad_norm": 0.1813443221535867, "learning_rate": 0.0004697645634018248, "loss": 2.9939393997192383, "step": 6833, "token_acc": 0.2994442928318364 }, { "epoch": 4.00586338317209, "grad_norm": 0.2529627652999093, "learning_rate": 0.0004697530114784415, "loss": 3.054389715194702, "step": 6834, "token_acc": 0.29264113875534936 }, { "epoch": 4.006449721489299, "grad_norm": 0.23235954691165187, "learning_rate": 0.0004697414574907748, "loss": 3.0710723400115967, "step": 6835, "token_acc": 0.2920870531901194 }, { "epoch": 4.0070360598065085, "grad_norm": 0.20611726338978273, "learning_rate": 0.0004697299014389331, "loss": 2.97102689743042, "step": 6836, "token_acc": 0.3029358014804776 }, { "epoch": 4.007622398123718, "grad_norm": 0.27159685179124016, "learning_rate": 0.0004697183433230252, "loss": 3.025338649749756, "step": 6837, "token_acc": 0.2946773093330347 }, { "epoch": 4.008208736440927, "grad_norm": 0.19514061563943455, "learning_rate": 0.00046970678314315944, "loss": 3.015861749649048, "step": 6838, "token_acc": 0.29786056615720413 }, { "epoch": 4.008795074758136, "grad_norm": 0.22763630656875633, "learning_rate": 0.0004696952208994446, "loss": 2.989206075668335, "step": 6839, "token_acc": 0.3004810393446307 }, { "epoch": 4.009381413075345, "grad_norm": 0.20991918224800166, "learning_rate": 0.0004696836565919891, "loss": 3.0400190353393555, "step": 6840, "token_acc": 0.2946971602264031 }, { "epoch": 4.009967751392553, "grad_norm": 0.2245715301180232, "learning_rate": 0.00046967209022090174, "loss": 3.0174708366394043, "step": 6841, "token_acc": 0.29670940401849466 }, { "epoch": 4.010554089709762, "grad_norm": 0.23340546889696623, "learning_rate": 0.00046966052178629104, "loss": 3.0225179195404053, "step": 6842, "token_acc": 0.296744903911174 }, { "epoch": 4.011140428026971, "grad_norm": 0.1753424505645774, "learning_rate": 0.0004696489512882658, "loss": 3.0634422302246094, "step": 6843, "token_acc": 0.2912659258365157 }, { "epoch": 4.0117267663441805, "grad_norm": 0.18240601105511994, "learning_rate": 0.0004696373787269346, "loss": 2.9633193016052246, "step": 6844, "token_acc": 0.304967215783787 }, { "epoch": 4.01231310466139, "grad_norm": 0.18507481245384436, "learning_rate": 0.0004696258041024062, "loss": 3.045401096343994, "step": 6845, "token_acc": 0.2939066720067367 }, { "epoch": 4.012899442978599, "grad_norm": 0.1792942214824011, "learning_rate": 0.00046961422741478935, "loss": 2.9911468029022217, "step": 6846, "token_acc": 0.30138896366083445 }, { "epoch": 4.013485781295808, "grad_norm": 0.20668944854740126, "learning_rate": 0.0004696026486641928, "loss": 3.0343358516693115, "step": 6847, "token_acc": 0.2941754755545524 }, { "epoch": 4.014072119613017, "grad_norm": 0.1994219588846641, "learning_rate": 0.00046959106785072514, "loss": 3.062152624130249, "step": 6848, "token_acc": 0.2924575678086985 }, { "epoch": 4.014658457930226, "grad_norm": 0.1785525985589352, "learning_rate": 0.00046957948497449543, "loss": 3.0140161514282227, "step": 6849, "token_acc": 0.2967031013048901 }, { "epoch": 4.015244796247435, "grad_norm": 0.18257138021147615, "learning_rate": 0.0004695679000356123, "loss": 3.0283353328704834, "step": 6850, "token_acc": 0.29580266924615367 }, { "epoch": 4.015831134564644, "grad_norm": 0.198966707388853, "learning_rate": 0.00046955631303418466, "loss": 3.016545295715332, "step": 6851, "token_acc": 0.29634337463181054 }, { "epoch": 4.0164174728818525, "grad_norm": 0.18327198870616615, "learning_rate": 0.0004695447239703212, "loss": 3.009713888168335, "step": 6852, "token_acc": 0.29964154593828946 }, { "epoch": 4.017003811199062, "grad_norm": 0.2168668193096582, "learning_rate": 0.000469533132844131, "loss": 3.051368474960327, "step": 6853, "token_acc": 0.2934052877563573 }, { "epoch": 4.017590149516271, "grad_norm": 0.24085387395127703, "learning_rate": 0.00046952153965572287, "loss": 3.037550210952759, "step": 6854, "token_acc": 0.29297129872932454 }, { "epoch": 4.01817648783348, "grad_norm": 0.18981365391191862, "learning_rate": 0.00046950994440520563, "loss": 3.0249505043029785, "step": 6855, "token_acc": 0.29575096425681285 }, { "epoch": 4.018762826150689, "grad_norm": 0.19621160798260576, "learning_rate": 0.00046949834709268825, "loss": 3.012082576751709, "step": 6856, "token_acc": 0.2975827049689125 }, { "epoch": 4.019349164467898, "grad_norm": 0.2565647922835809, "learning_rate": 0.00046948674771827973, "loss": 3.0411696434020996, "step": 6857, "token_acc": 0.2940476996627707 }, { "epoch": 4.019935502785107, "grad_norm": 0.21604388463244778, "learning_rate": 0.0004694751462820889, "loss": 3.024933338165283, "step": 6858, "token_acc": 0.2972631805119115 }, { "epoch": 4.020521841102316, "grad_norm": 0.18801112042431076, "learning_rate": 0.0004694635427842249, "loss": 3.0511531829833984, "step": 6859, "token_acc": 0.2917154490918698 }, { "epoch": 4.021108179419525, "grad_norm": 0.2210499019985458, "learning_rate": 0.0004694519372247965, "loss": 2.994570255279541, "step": 6860, "token_acc": 0.3013219154232064 }, { "epoch": 4.0216945177367345, "grad_norm": 0.16799811551388025, "learning_rate": 0.000469440329603913, "loss": 3.0238254070281982, "step": 6861, "token_acc": 0.29599403763741383 }, { "epoch": 4.022280856053943, "grad_norm": 0.19582023390415662, "learning_rate": 0.0004694287199216833, "loss": 3.0360264778137207, "step": 6862, "token_acc": 0.29391213439336034 }, { "epoch": 4.022867194371152, "grad_norm": 0.2260617593650311, "learning_rate": 0.0004694171081782164, "loss": 3.0060901641845703, "step": 6863, "token_acc": 0.2989718281996033 }, { "epoch": 4.023453532688361, "grad_norm": 0.21839909724678994, "learning_rate": 0.00046940549437362146, "loss": 3.000239849090576, "step": 6864, "token_acc": 0.2988024769691664 }, { "epoch": 4.02403987100557, "grad_norm": 0.23015586094481597, "learning_rate": 0.0004693938785080076, "loss": 3.0355401039123535, "step": 6865, "token_acc": 0.29435923529839353 }, { "epoch": 4.024626209322779, "grad_norm": 0.2102867049700502, "learning_rate": 0.0004693822605814838, "loss": 3.021564245223999, "step": 6866, "token_acc": 0.2970450480691062 }, { "epoch": 4.025212547639988, "grad_norm": 0.2960152409563197, "learning_rate": 0.00046937064059415936, "loss": 3.0535082817077637, "step": 6867, "token_acc": 0.2923429562631644 }, { "epoch": 4.025798885957197, "grad_norm": 0.2861481980028111, "learning_rate": 0.00046935901854614333, "loss": 3.020144462585449, "step": 6868, "token_acc": 0.29523312839874344 }, { "epoch": 4.0263852242744065, "grad_norm": 0.20496457856794262, "learning_rate": 0.00046934739443754486, "loss": 3.0126473903656006, "step": 6869, "token_acc": 0.2960968938425475 }, { "epoch": 4.026971562591616, "grad_norm": 0.22771350289531408, "learning_rate": 0.00046933576826847323, "loss": 2.9915900230407715, "step": 6870, "token_acc": 0.30009344188344467 }, { "epoch": 4.027557900908825, "grad_norm": 0.19861864730551765, "learning_rate": 0.0004693241400390377, "loss": 2.9985527992248535, "step": 6871, "token_acc": 0.2989354973629714 }, { "epoch": 4.028144239226034, "grad_norm": 0.3103334622836779, "learning_rate": 0.00046931250974934733, "loss": 3.0265254974365234, "step": 6872, "token_acc": 0.2956640482547322 }, { "epoch": 4.028730577543242, "grad_norm": 0.25773326840042843, "learning_rate": 0.0004693008773995114, "loss": 3.0148262977600098, "step": 6873, "token_acc": 0.2984690942184378 }, { "epoch": 4.029316915860451, "grad_norm": 0.2649035462786855, "learning_rate": 0.0004692892429896394, "loss": 3.0531787872314453, "step": 6874, "token_acc": 0.29167547952571105 }, { "epoch": 4.02990325417766, "grad_norm": 0.278222421407958, "learning_rate": 0.0004692776065198403, "loss": 3.005423069000244, "step": 6875, "token_acc": 0.29826530234133897 }, { "epoch": 4.030489592494869, "grad_norm": 0.24209249537953462, "learning_rate": 0.00046926596799022364, "loss": 3.0543293952941895, "step": 6876, "token_acc": 0.2919332278853318 }, { "epoch": 4.0310759308120785, "grad_norm": 0.2978605655946107, "learning_rate": 0.00046925432740089866, "loss": 2.9957046508789062, "step": 6877, "token_acc": 0.29955352697965193 }, { "epoch": 4.031662269129288, "grad_norm": 0.18059172446797572, "learning_rate": 0.0004692426847519747, "loss": 3.038783073425293, "step": 6878, "token_acc": 0.29602598444556594 }, { "epoch": 4.032248607446497, "grad_norm": 0.21756620978469665, "learning_rate": 0.0004692310400435612, "loss": 3.0213725566864014, "step": 6879, "token_acc": 0.29742703734387615 }, { "epoch": 4.032834945763706, "grad_norm": 0.21685630435933803, "learning_rate": 0.0004692193932757676, "loss": 3.037707567214966, "step": 6880, "token_acc": 0.29347050211276043 }, { "epoch": 4.033421284080915, "grad_norm": 0.21359526064619144, "learning_rate": 0.000469207744448703, "loss": 3.0247395038604736, "step": 6881, "token_acc": 0.2965832871782505 }, { "epoch": 4.034007622398124, "grad_norm": 0.22520771511111365, "learning_rate": 0.00046919609356247717, "loss": 3.0610897541046143, "step": 6882, "token_acc": 0.29090531007896603 }, { "epoch": 4.034593960715333, "grad_norm": 0.20290081837452523, "learning_rate": 0.00046918444061719934, "loss": 2.9987564086914062, "step": 6883, "token_acc": 0.29979073384058197 }, { "epoch": 4.035180299032541, "grad_norm": 0.2451934884490686, "learning_rate": 0.00046917278561297904, "loss": 3.043059825897217, "step": 6884, "token_acc": 0.292794214786145 }, { "epoch": 4.0357666373497505, "grad_norm": 0.22227876744965416, "learning_rate": 0.00046916112854992575, "loss": 2.9726884365081787, "step": 6885, "token_acc": 0.30394178741136824 }, { "epoch": 4.03635297566696, "grad_norm": 0.2576036925090188, "learning_rate": 0.000469149469428149, "loss": 3.020151376724243, "step": 6886, "token_acc": 0.29739871353971087 }, { "epoch": 4.036939313984169, "grad_norm": 0.1998107795688115, "learning_rate": 0.00046913780824775833, "loss": 3.0467567443847656, "step": 6887, "token_acc": 0.2910934332431466 }, { "epoch": 4.037525652301378, "grad_norm": 0.23337729140136795, "learning_rate": 0.0004691261450088632, "loss": 3.01007342338562, "step": 6888, "token_acc": 0.29989450085718056 }, { "epoch": 4.038111990618587, "grad_norm": 0.19617003535014987, "learning_rate": 0.00046911447971157317, "loss": 3.027754783630371, "step": 6889, "token_acc": 0.2948005332786381 }, { "epoch": 4.038698328935796, "grad_norm": 0.18972949392499389, "learning_rate": 0.00046910281235599795, "loss": 3.038025379180908, "step": 6890, "token_acc": 0.2946342430105396 }, { "epoch": 4.039284667253005, "grad_norm": 0.20987356005107077, "learning_rate": 0.00046909114294224705, "loss": 3.0281636714935303, "step": 6891, "token_acc": 0.29545131724047097 }, { "epoch": 4.039871005570214, "grad_norm": 0.20144081539719003, "learning_rate": 0.00046907947147043004, "loss": 3.0218405723571777, "step": 6892, "token_acc": 0.29522746489039203 }, { "epoch": 4.040457343887423, "grad_norm": 0.17852191284728103, "learning_rate": 0.0004690677979406566, "loss": 3.032956600189209, "step": 6893, "token_acc": 0.29439937922192044 }, { "epoch": 4.0410436822046325, "grad_norm": 0.206433891291475, "learning_rate": 0.0004690561223530364, "loss": 3.0238828659057617, "step": 6894, "token_acc": 0.2953048607508236 }, { "epoch": 4.041630020521841, "grad_norm": 0.22214700093232284, "learning_rate": 0.0004690444447076792, "loss": 3.0033318996429443, "step": 6895, "token_acc": 0.2993316463979356 }, { "epoch": 4.04221635883905, "grad_norm": 0.17651990440778284, "learning_rate": 0.0004690327650046945, "loss": 3.054837226867676, "step": 6896, "token_acc": 0.2911309698899148 }, { "epoch": 4.042802697156259, "grad_norm": 0.24616805696331384, "learning_rate": 0.0004690210832441922, "loss": 3.043307304382324, "step": 6897, "token_acc": 0.2925626425989864 }, { "epoch": 4.043389035473468, "grad_norm": 0.2441945725380238, "learning_rate": 0.000469009399426282, "loss": 3.0131282806396484, "step": 6898, "token_acc": 0.2974898018180066 }, { "epoch": 4.043975373790677, "grad_norm": 0.175712746485911, "learning_rate": 0.00046899771355107345, "loss": 2.9759538173675537, "step": 6899, "token_acc": 0.3021990449542427 }, { "epoch": 4.044561712107886, "grad_norm": 0.24009947878923604, "learning_rate": 0.00046898602561867657, "loss": 3.000570297241211, "step": 6900, "token_acc": 0.29952857427842466 }, { "epoch": 4.045148050425095, "grad_norm": 0.2324834753336778, "learning_rate": 0.0004689743356292011, "loss": 3.0600855350494385, "step": 6901, "token_acc": 0.29177262843092344 }, { "epoch": 4.0457343887423045, "grad_norm": 0.183741354758939, "learning_rate": 0.00046896264358275676, "loss": 2.99874210357666, "step": 6902, "token_acc": 0.2999199936650274 }, { "epoch": 4.046320727059514, "grad_norm": 0.26727244927978455, "learning_rate": 0.0004689509494794535, "loss": 3.0150675773620605, "step": 6903, "token_acc": 0.29761275062625375 }, { "epoch": 4.046907065376723, "grad_norm": 0.25032008672551587, "learning_rate": 0.0004689392533194011, "loss": 3.0060577392578125, "step": 6904, "token_acc": 0.29764915118869306 }, { "epoch": 4.047493403693931, "grad_norm": 0.18609127997940217, "learning_rate": 0.00046892755510270935, "loss": 2.997628927230835, "step": 6905, "token_acc": 0.2988568791614477 }, { "epoch": 4.04807974201114, "grad_norm": 0.2354162334673799, "learning_rate": 0.00046891585482948826, "loss": 3.01798677444458, "step": 6906, "token_acc": 0.2965549797217386 }, { "epoch": 4.048666080328349, "grad_norm": 0.24765067703019808, "learning_rate": 0.0004689041524998478, "loss": 3.042515277862549, "step": 6907, "token_acc": 0.29236058516881774 }, { "epoch": 4.049252418645558, "grad_norm": 0.19434951343111195, "learning_rate": 0.00046889244811389776, "loss": 3.006464958190918, "step": 6908, "token_acc": 0.29977633958313504 }, { "epoch": 4.049838756962767, "grad_norm": 0.21516852405378936, "learning_rate": 0.0004688807416717481, "loss": 3.0086557865142822, "step": 6909, "token_acc": 0.29720220116237567 }, { "epoch": 4.0504250952799765, "grad_norm": 0.1889444532997563, "learning_rate": 0.0004688690331735088, "loss": 3.012896776199341, "step": 6910, "token_acc": 0.298572178612132 }, { "epoch": 4.051011433597186, "grad_norm": 0.19430157426751662, "learning_rate": 0.00046885732261928994, "loss": 3.023897409439087, "step": 6911, "token_acc": 0.29463307222130825 }, { "epoch": 4.051597771914395, "grad_norm": 0.22192942669809293, "learning_rate": 0.0004688456100092015, "loss": 3.054649829864502, "step": 6912, "token_acc": 0.2913921472684714 }, { "epoch": 4.052184110231604, "grad_norm": 0.19648577572836648, "learning_rate": 0.00046883389534335327, "loss": 3.0402438640594482, "step": 6913, "token_acc": 0.295317558497618 }, { "epoch": 4.052770448548813, "grad_norm": 0.2226932974454573, "learning_rate": 0.00046882217862185557, "loss": 3.0250353813171387, "step": 6914, "token_acc": 0.29640651755874853 }, { "epoch": 4.053356786866022, "grad_norm": 0.2060785075528739, "learning_rate": 0.00046881045984481837, "loss": 3.006999969482422, "step": 6915, "token_acc": 0.29778659851071165 }, { "epoch": 4.05394312518323, "grad_norm": 0.19013061283197213, "learning_rate": 0.00046879873901235173, "loss": 2.949985980987549, "step": 6916, "token_acc": 0.30683229477546314 }, { "epoch": 4.054529463500439, "grad_norm": 0.22219645702940816, "learning_rate": 0.00046878701612456574, "loss": 3.033959150314331, "step": 6917, "token_acc": 0.29532168252842006 }, { "epoch": 4.0551158018176485, "grad_norm": 0.19965449878892974, "learning_rate": 0.0004687752911815706, "loss": 3.0350377559661865, "step": 6918, "token_acc": 0.293743314583287 }, { "epoch": 4.055702140134858, "grad_norm": 0.19549447919118917, "learning_rate": 0.00046876356418347644, "loss": 3.0276265144348145, "step": 6919, "token_acc": 0.29501917717719295 }, { "epoch": 4.056288478452067, "grad_norm": 0.18212262711844912, "learning_rate": 0.00046875183513039333, "loss": 3.014735221862793, "step": 6920, "token_acc": 0.29793146753711786 }, { "epoch": 4.056874816769276, "grad_norm": 0.18243033166319184, "learning_rate": 0.0004687401040224315, "loss": 3.0087013244628906, "step": 6921, "token_acc": 0.29906942969297123 }, { "epoch": 4.057461155086485, "grad_norm": 0.19626669876873382, "learning_rate": 0.0004687283708597012, "loss": 3.0438191890716553, "step": 6922, "token_acc": 0.2931826073536788 }, { "epoch": 4.058047493403694, "grad_norm": 0.22150554911158846, "learning_rate": 0.00046871663564231254, "loss": 3.0479063987731934, "step": 6923, "token_acc": 0.2920175835010887 }, { "epoch": 4.058633831720903, "grad_norm": 0.22735947314716726, "learning_rate": 0.00046870489837037583, "loss": 3.063234329223633, "step": 6924, "token_acc": 0.28998517458207657 }, { "epoch": 4.059220170038112, "grad_norm": 0.22102872460792763, "learning_rate": 0.0004686931590440013, "loss": 3.068086624145508, "step": 6925, "token_acc": 0.2878088784921083 }, { "epoch": 4.059806508355321, "grad_norm": 0.19411663310453908, "learning_rate": 0.00046868141766329927, "loss": 3.0074307918548584, "step": 6926, "token_acc": 0.29875983600295397 }, { "epoch": 4.06039284667253, "grad_norm": 0.17444384316267805, "learning_rate": 0.0004686696742283799, "loss": 2.984783172607422, "step": 6927, "token_acc": 0.3023880326092179 }, { "epoch": 4.060979184989739, "grad_norm": 0.19375440610173533, "learning_rate": 0.0004686579287393537, "loss": 3.0217232704162598, "step": 6928, "token_acc": 0.2973614188444067 }, { "epoch": 4.061565523306948, "grad_norm": 0.17997940963603093, "learning_rate": 0.0004686461811963309, "loss": 3.051995277404785, "step": 6929, "token_acc": 0.2927326467586518 }, { "epoch": 4.062151861624157, "grad_norm": 0.2288387475781029, "learning_rate": 0.00046863443159942184, "loss": 2.970278263092041, "step": 6930, "token_acc": 0.30419211444258354 }, { "epoch": 4.062738199941366, "grad_norm": 0.2366988918868131, "learning_rate": 0.0004686226799487369, "loss": 3.0362985134124756, "step": 6931, "token_acc": 0.29453910708291375 }, { "epoch": 4.063324538258575, "grad_norm": 0.18709339768117442, "learning_rate": 0.0004686109262443865, "loss": 3.0148675441741943, "step": 6932, "token_acc": 0.2977506943243436 }, { "epoch": 4.063910876575784, "grad_norm": 0.19991857977372324, "learning_rate": 0.00046859917048648093, "loss": 2.981264114379883, "step": 6933, "token_acc": 0.300840048358559 }, { "epoch": 4.064497214892993, "grad_norm": 0.21963186365571524, "learning_rate": 0.00046858741267513085, "loss": 3.0716636180877686, "step": 6934, "token_acc": 0.28713313762203063 }, { "epoch": 4.0650835532102025, "grad_norm": 0.2070139372725275, "learning_rate": 0.0004685756528104465, "loss": 2.9902968406677246, "step": 6935, "token_acc": 0.2996819271682306 }, { "epoch": 4.065669891527412, "grad_norm": 0.20291388398468088, "learning_rate": 0.00046856389089253847, "loss": 3.016145706176758, "step": 6936, "token_acc": 0.29670733619119183 }, { "epoch": 4.066256229844621, "grad_norm": 0.2087869225600144, "learning_rate": 0.00046855212692151714, "loss": 3.0176455974578857, "step": 6937, "token_acc": 0.29703672375761964 }, { "epoch": 4.066842568161829, "grad_norm": 0.2625364593450508, "learning_rate": 0.0004685403608974932, "loss": 3.0330357551574707, "step": 6938, "token_acc": 0.29709014992177546 }, { "epoch": 4.067428906479038, "grad_norm": 0.24001909845134076, "learning_rate": 0.000468528592820577, "loss": 3.039304733276367, "step": 6939, "token_acc": 0.29373897683558337 }, { "epoch": 4.068015244796247, "grad_norm": 0.18300065820668468, "learning_rate": 0.0004685168226908791, "loss": 3.045700788497925, "step": 6940, "token_acc": 0.2937364389721068 }, { "epoch": 4.068601583113456, "grad_norm": 0.2226301329618281, "learning_rate": 0.0004685050505085101, "loss": 3.0371899604797363, "step": 6941, "token_acc": 0.2920033768357562 }, { "epoch": 4.069187921430665, "grad_norm": 0.24995116450718977, "learning_rate": 0.0004684932762735806, "loss": 3.035311698913574, "step": 6942, "token_acc": 0.29509077037182757 }, { "epoch": 4.0697742597478745, "grad_norm": 0.21497200172938793, "learning_rate": 0.0004684814999862013, "loss": 3.020979404449463, "step": 6943, "token_acc": 0.29604287568821225 }, { "epoch": 4.070360598065084, "grad_norm": 0.19988610615377478, "learning_rate": 0.0004684697216464826, "loss": 2.9862046241760254, "step": 6944, "token_acc": 0.3006142949677721 }, { "epoch": 4.070946936382293, "grad_norm": 0.22694268163892065, "learning_rate": 0.0004684579412545353, "loss": 2.987281322479248, "step": 6945, "token_acc": 0.30181648812296225 }, { "epoch": 4.071533274699502, "grad_norm": 0.18285552289015322, "learning_rate": 0.00046844615881047, "loss": 2.980762243270874, "step": 6946, "token_acc": 0.3022050025639565 }, { "epoch": 4.072119613016711, "grad_norm": 0.2054839152647431, "learning_rate": 0.0004684343743143974, "loss": 3.023253917694092, "step": 6947, "token_acc": 0.2975228989944751 }, { "epoch": 4.07270595133392, "grad_norm": 0.1736171316412354, "learning_rate": 0.0004684225877664282, "loss": 3.0151586532592773, "step": 6948, "token_acc": 0.29745865161076057 }, { "epoch": 4.073292289651128, "grad_norm": 0.22389806306314175, "learning_rate": 0.0004684107991666731, "loss": 3.0390310287475586, "step": 6949, "token_acc": 0.2945940274800726 }, { "epoch": 4.073878627968337, "grad_norm": 0.21184294745580443, "learning_rate": 0.00046839900851524286, "loss": 2.989704132080078, "step": 6950, "token_acc": 0.30116297356587474 }, { "epoch": 4.0744649662855466, "grad_norm": 0.20746140767417867, "learning_rate": 0.00046838721581224824, "loss": 3.040753126144409, "step": 6951, "token_acc": 0.29387476718868866 }, { "epoch": 4.075051304602756, "grad_norm": 0.29710964299337256, "learning_rate": 0.0004683754210578, "loss": 3.0222530364990234, "step": 6952, "token_acc": 0.2965594844159549 }, { "epoch": 4.075637642919965, "grad_norm": 0.18860422709293817, "learning_rate": 0.00046836362425200893, "loss": 3.063882827758789, "step": 6953, "token_acc": 0.29041648997473846 }, { "epoch": 4.076223981237174, "grad_norm": 0.276509277095217, "learning_rate": 0.0004683518253949859, "loss": 2.996743679046631, "step": 6954, "token_acc": 0.3003923220727244 }, { "epoch": 4.076810319554383, "grad_norm": 0.26764628636433185, "learning_rate": 0.00046834002448684173, "loss": 3.0060930252075195, "step": 6955, "token_acc": 0.29875557158683946 }, { "epoch": 4.077396657871592, "grad_norm": 0.1932403849426315, "learning_rate": 0.00046832822152768715, "loss": 2.9837794303894043, "step": 6956, "token_acc": 0.30224323337039616 }, { "epoch": 4.077982996188801, "grad_norm": 0.23859170421072165, "learning_rate": 0.00046831641651763324, "loss": 3.035407781600952, "step": 6957, "token_acc": 0.2947849489134109 }, { "epoch": 4.07856933450601, "grad_norm": 0.2052574764075219, "learning_rate": 0.00046830460945679065, "loss": 3.0555951595306396, "step": 6958, "token_acc": 0.29304932392079996 }, { "epoch": 4.0791556728232194, "grad_norm": 0.2280025103484422, "learning_rate": 0.00046829280034527056, "loss": 3.0079140663146973, "step": 6959, "token_acc": 0.2987068084633035 }, { "epoch": 4.079742011140428, "grad_norm": 0.2635089767326323, "learning_rate": 0.00046828098918318367, "loss": 3.005866050720215, "step": 6960, "token_acc": 0.299281689315988 }, { "epoch": 4.080328349457637, "grad_norm": 0.19025951773023753, "learning_rate": 0.00046826917597064105, "loss": 3.0000319480895996, "step": 6961, "token_acc": 0.3006623697418174 }, { "epoch": 4.080914687774846, "grad_norm": 0.2016138841141072, "learning_rate": 0.0004682573607077537, "loss": 3.0290088653564453, "step": 6962, "token_acc": 0.29619968493963134 }, { "epoch": 4.081501026092055, "grad_norm": 0.20047344519557195, "learning_rate": 0.00046824554339463246, "loss": 3.019904136657715, "step": 6963, "token_acc": 0.29873826306209683 }, { "epoch": 4.082087364409264, "grad_norm": 0.1820617028158984, "learning_rate": 0.0004682337240313884, "loss": 3.007856845855713, "step": 6964, "token_acc": 0.2977153723639424 }, { "epoch": 4.082673702726473, "grad_norm": 0.19729080801125376, "learning_rate": 0.0004682219026181327, "loss": 3.0237984657287598, "step": 6965, "token_acc": 0.29659095075235814 }, { "epoch": 4.083260041043682, "grad_norm": 0.18015351188053685, "learning_rate": 0.00046821007915497624, "loss": 2.9836111068725586, "step": 6966, "token_acc": 0.3020407948608047 }, { "epoch": 4.0838463793608915, "grad_norm": 0.21627990643049952, "learning_rate": 0.0004681982536420301, "loss": 3.01456356048584, "step": 6967, "token_acc": 0.2977896329492328 }, { "epoch": 4.084432717678101, "grad_norm": 0.18436026984896042, "learning_rate": 0.0004681864260794054, "loss": 3.0086987018585205, "step": 6968, "token_acc": 0.29730919068635697 }, { "epoch": 4.08501905599531, "grad_norm": 0.1944022629982658, "learning_rate": 0.00046817459646721326, "loss": 3.0141165256500244, "step": 6969, "token_acc": 0.2971052221941911 }, { "epoch": 4.085605394312518, "grad_norm": 0.1918470188300405, "learning_rate": 0.00046816276480556474, "loss": 3.0507915019989014, "step": 6970, "token_acc": 0.29311558603364973 }, { "epoch": 4.086191732629727, "grad_norm": 0.1740556453643377, "learning_rate": 0.0004681509310945711, "loss": 3.0208933353424072, "step": 6971, "token_acc": 0.29482196426242335 }, { "epoch": 4.086778070946936, "grad_norm": 0.19032179316072878, "learning_rate": 0.00046813909533434335, "loss": 3.01246976852417, "step": 6972, "token_acc": 0.2965107650488066 }, { "epoch": 4.087364409264145, "grad_norm": 0.21728345570591184, "learning_rate": 0.00046812725752499274, "loss": 3.0037271976470947, "step": 6973, "token_acc": 0.2987636518999172 }, { "epoch": 4.087950747581354, "grad_norm": 0.2524870985994271, "learning_rate": 0.0004681154176666305, "loss": 3.028642177581787, "step": 6974, "token_acc": 0.2961405392507755 }, { "epoch": 4.0885370858985635, "grad_norm": 0.18524157869353852, "learning_rate": 0.00046810357575936785, "loss": 3.0291829109191895, "step": 6975, "token_acc": 0.2956931013254993 }, { "epoch": 4.089123424215773, "grad_norm": 0.17659972104074667, "learning_rate": 0.000468091731803316, "loss": 3.018859624862671, "step": 6976, "token_acc": 0.29561614632574973 }, { "epoch": 4.089709762532982, "grad_norm": 0.22539728764593853, "learning_rate": 0.00046807988579858616, "loss": 2.992833137512207, "step": 6977, "token_acc": 0.2999649081763949 }, { "epoch": 4.090296100850191, "grad_norm": 0.30569418253105196, "learning_rate": 0.00046806803774528973, "loss": 2.9887354373931885, "step": 6978, "token_acc": 0.3009159266741334 }, { "epoch": 4.0908824391674, "grad_norm": 0.2621945734935609, "learning_rate": 0.00046805618764353787, "loss": 3.0711047649383545, "step": 6979, "token_acc": 0.2881126315707986 }, { "epoch": 4.091468777484609, "grad_norm": 0.20531612116435735, "learning_rate": 0.000468044335493442, "loss": 3.019559860229492, "step": 6980, "token_acc": 0.29795473704688413 }, { "epoch": 4.092055115801817, "grad_norm": 0.22166480049157664, "learning_rate": 0.0004680324812951134, "loss": 3.047178030014038, "step": 6981, "token_acc": 0.2938977152407935 }, { "epoch": 4.092641454119026, "grad_norm": 0.2121211297742218, "learning_rate": 0.00046802062504866347, "loss": 3.0059547424316406, "step": 6982, "token_acc": 0.2987444083228393 }, { "epoch": 4.0932277924362355, "grad_norm": 0.18462009581558061, "learning_rate": 0.00046800876675420354, "loss": 3.023655414581299, "step": 6983, "token_acc": 0.2969322815833917 }, { "epoch": 4.093814130753445, "grad_norm": 0.21173537085481642, "learning_rate": 0.000467996906411845, "loss": 3.017996072769165, "step": 6984, "token_acc": 0.2972103843806563 }, { "epoch": 4.094400469070654, "grad_norm": 0.20464311722094766, "learning_rate": 0.0004679850440216993, "loss": 3.0208888053894043, "step": 6985, "token_acc": 0.2974608540494931 }, { "epoch": 4.094986807387863, "grad_norm": 0.1949518577052013, "learning_rate": 0.0004679731795838779, "loss": 3.0158894062042236, "step": 6986, "token_acc": 0.29754929260761226 }, { "epoch": 4.095573145705072, "grad_norm": 0.2683485014340852, "learning_rate": 0.00046796131309849204, "loss": 2.9732635021209717, "step": 6987, "token_acc": 0.30163195490339517 }, { "epoch": 4.096159484022281, "grad_norm": 0.19544464452482588, "learning_rate": 0.00046794944456565344, "loss": 3.0257205963134766, "step": 6988, "token_acc": 0.2946038875513207 }, { "epoch": 4.09674582233949, "grad_norm": 0.23640717720282306, "learning_rate": 0.00046793757398547355, "loss": 3.0117897987365723, "step": 6989, "token_acc": 0.2983151154517819 }, { "epoch": 4.097332160656699, "grad_norm": 0.2707466030490996, "learning_rate": 0.0004679257013580638, "loss": 3.0323681831359863, "step": 6990, "token_acc": 0.29341487952858564 }, { "epoch": 4.097918498973908, "grad_norm": 0.1836026728924073, "learning_rate": 0.0004679138266835357, "loss": 2.995007038116455, "step": 6991, "token_acc": 0.3000697303927806 }, { "epoch": 4.098504837291117, "grad_norm": 0.2678182527150261, "learning_rate": 0.0004679019499620009, "loss": 3.0316834449768066, "step": 6992, "token_acc": 0.29499220623026834 }, { "epoch": 4.099091175608326, "grad_norm": 0.2301636007508659, "learning_rate": 0.00046789007119357084, "loss": 3.0489447116851807, "step": 6993, "token_acc": 0.29207601971565733 }, { "epoch": 4.099677513925535, "grad_norm": 0.21162966333927336, "learning_rate": 0.0004678781903783572, "loss": 3.0323922634124756, "step": 6994, "token_acc": 0.29403135457397767 }, { "epoch": 4.100263852242744, "grad_norm": 0.3333828792077415, "learning_rate": 0.0004678663075164716, "loss": 3.0274620056152344, "step": 6995, "token_acc": 0.29581163590056564 }, { "epoch": 4.100850190559953, "grad_norm": 0.1681058846455195, "learning_rate": 0.0004678544226080256, "loss": 3.010754108428955, "step": 6996, "token_acc": 0.2970315127586459 }, { "epoch": 4.101436528877162, "grad_norm": 0.267796528364808, "learning_rate": 0.00046784253565313084, "loss": 3.020566940307617, "step": 6997, "token_acc": 0.29677590623714145 }, { "epoch": 4.102022867194371, "grad_norm": 0.23824032227667175, "learning_rate": 0.000467830646651899, "loss": 3.040004253387451, "step": 6998, "token_acc": 0.29354891818114226 }, { "epoch": 4.10260920551158, "grad_norm": 0.23235772502746654, "learning_rate": 0.00046781875560444175, "loss": 3.039081573486328, "step": 6999, "token_acc": 0.2932035022768879 }, { "epoch": 4.1031955438287895, "grad_norm": 0.22219244939826613, "learning_rate": 0.00046780686251087086, "loss": 3.031935691833496, "step": 7000, "token_acc": 0.29485961799394633 }, { "epoch": 4.103781882145999, "grad_norm": 0.18284440256275608, "learning_rate": 0.0004677949673712979, "loss": 3.0427985191345215, "step": 7001, "token_acc": 0.29369891272108317 }, { "epoch": 4.104368220463208, "grad_norm": 0.21760512164415677, "learning_rate": 0.0004677830701858348, "loss": 3.030557155609131, "step": 7002, "token_acc": 0.2954305546258297 }, { "epoch": 4.104954558780416, "grad_norm": 0.1733596256474521, "learning_rate": 0.0004677711709545932, "loss": 3.024519443511963, "step": 7003, "token_acc": 0.2940926970405417 }, { "epoch": 4.105540897097625, "grad_norm": 0.25388201387907494, "learning_rate": 0.0004677592696776849, "loss": 3.040799379348755, "step": 7004, "token_acc": 0.2931021595925974 }, { "epoch": 4.106127235414834, "grad_norm": 0.2078727164356633, "learning_rate": 0.0004677473663552217, "loss": 3.069350242614746, "step": 7005, "token_acc": 0.2919334588006818 }, { "epoch": 4.106713573732043, "grad_norm": 0.20615525699195483, "learning_rate": 0.0004677354609873153, "loss": 3.0088462829589844, "step": 7006, "token_acc": 0.2999498756967656 }, { "epoch": 4.107299912049252, "grad_norm": 0.21538929123990208, "learning_rate": 0.0004677235535740778, "loss": 3.0167691707611084, "step": 7007, "token_acc": 0.29699274894030075 }, { "epoch": 4.1078862503664615, "grad_norm": 0.2331754784166338, "learning_rate": 0.00046771164411562086, "loss": 3.0611116886138916, "step": 7008, "token_acc": 0.29120035903216074 }, { "epoch": 4.108472588683671, "grad_norm": 0.245061151178424, "learning_rate": 0.00046769973261205633, "loss": 2.997422218322754, "step": 7009, "token_acc": 0.29964422325504875 }, { "epoch": 4.10905892700088, "grad_norm": 0.21695938857743266, "learning_rate": 0.0004676878190634963, "loss": 3.0566868782043457, "step": 7010, "token_acc": 0.2918127681372863 }, { "epoch": 4.109645265318089, "grad_norm": 0.23389780342873942, "learning_rate": 0.0004676759034700524, "loss": 3.025515079498291, "step": 7011, "token_acc": 0.2969554855078035 }, { "epoch": 4.110231603635298, "grad_norm": 0.21530993723750178, "learning_rate": 0.0004676639858318368, "loss": 3.0238678455352783, "step": 7012, "token_acc": 0.2953092622668832 }, { "epoch": 4.110817941952506, "grad_norm": 0.2092232079236262, "learning_rate": 0.0004676520661489613, "loss": 2.995426654815674, "step": 7013, "token_acc": 0.30052392801218386 }, { "epoch": 4.111404280269715, "grad_norm": 0.22172985277176535, "learning_rate": 0.00046764014442153795, "loss": 3.0704421997070312, "step": 7014, "token_acc": 0.290688261338855 }, { "epoch": 4.111990618586924, "grad_norm": 0.1970558360186922, "learning_rate": 0.00046762822064967875, "loss": 3.048707962036133, "step": 7015, "token_acc": 0.29292835890713476 }, { "epoch": 4.1125769569041335, "grad_norm": 0.2358926141520634, "learning_rate": 0.00046761629483349563, "loss": 3.0217857360839844, "step": 7016, "token_acc": 0.29722266626101573 }, { "epoch": 4.113163295221343, "grad_norm": 0.2029791189883375, "learning_rate": 0.0004676043669731007, "loss": 3.0441980361938477, "step": 7017, "token_acc": 0.29347729864557354 }, { "epoch": 4.113749633538552, "grad_norm": 0.19396491898229826, "learning_rate": 0.00046759243706860594, "loss": 3.020205020904541, "step": 7018, "token_acc": 0.2940790661610851 }, { "epoch": 4.114335971855761, "grad_norm": 0.21385889856847917, "learning_rate": 0.00046758050512012346, "loss": 3.0264432430267334, "step": 7019, "token_acc": 0.2973115362123375 }, { "epoch": 4.11492231017297, "grad_norm": 0.21052951369067843, "learning_rate": 0.00046756857112776527, "loss": 3.0004782676696777, "step": 7020, "token_acc": 0.2981847379604314 }, { "epoch": 4.115508648490179, "grad_norm": 0.2276907459739103, "learning_rate": 0.0004675566350916436, "loss": 3.0024402141571045, "step": 7021, "token_acc": 0.3000336590692036 }, { "epoch": 4.116094986807388, "grad_norm": 0.17915423417343723, "learning_rate": 0.0004675446970118705, "loss": 3.0221309661865234, "step": 7022, "token_acc": 0.29533178398225673 }, { "epoch": 4.116681325124597, "grad_norm": 0.2214559681794065, "learning_rate": 0.0004675327568885581, "loss": 2.99198579788208, "step": 7023, "token_acc": 0.30182483857600695 }, { "epoch": 4.1172676634418055, "grad_norm": 0.1704442329849183, "learning_rate": 0.0004675208147218186, "loss": 3.045954465866089, "step": 7024, "token_acc": 0.2944355650576526 }, { "epoch": 4.117854001759015, "grad_norm": 0.20964180464879437, "learning_rate": 0.0004675088705117642, "loss": 2.953301429748535, "step": 7025, "token_acc": 0.3070579103117392 }, { "epoch": 4.118440340076224, "grad_norm": 0.2237840178751719, "learning_rate": 0.00046749692425850694, "loss": 3.055589199066162, "step": 7026, "token_acc": 0.29000910147223524 }, { "epoch": 4.119026678393433, "grad_norm": 0.20040454860254014, "learning_rate": 0.00046748497596215923, "loss": 3.0536410808563232, "step": 7027, "token_acc": 0.29176031449787554 }, { "epoch": 4.119613016710642, "grad_norm": 0.22960995696203798, "learning_rate": 0.0004674730256228332, "loss": 3.030405044555664, "step": 7028, "token_acc": 0.295961574259788 }, { "epoch": 4.120199355027851, "grad_norm": 0.24602932714665643, "learning_rate": 0.0004674610732406411, "loss": 3.0607151985168457, "step": 7029, "token_acc": 0.2915940105653545 }, { "epoch": 4.12078569334506, "grad_norm": 0.21155961842110704, "learning_rate": 0.0004674491188156954, "loss": 2.997239828109741, "step": 7030, "token_acc": 0.29983294129190347 }, { "epoch": 4.121372031662269, "grad_norm": 0.18861416969373043, "learning_rate": 0.0004674371623481081, "loss": 3.010213851928711, "step": 7031, "token_acc": 0.2983006104603201 }, { "epoch": 4.121958369979478, "grad_norm": 0.21770415666674248, "learning_rate": 0.00046742520383799183, "loss": 2.9993700981140137, "step": 7032, "token_acc": 0.29977478067303914 }, { "epoch": 4.1225447082966875, "grad_norm": 0.1922978996459676, "learning_rate": 0.00046741324328545853, "loss": 2.988560676574707, "step": 7033, "token_acc": 0.30240451702234267 }, { "epoch": 4.123131046613897, "grad_norm": 0.2106380183464224, "learning_rate": 0.0004674012806906209, "loss": 3.0431597232818604, "step": 7034, "token_acc": 0.29275645190565003 }, { "epoch": 4.123717384931105, "grad_norm": 0.2521084749734736, "learning_rate": 0.0004673893160535912, "loss": 3.027672290802002, "step": 7035, "token_acc": 0.2934456837460867 }, { "epoch": 4.124303723248314, "grad_norm": 0.3203196909258401, "learning_rate": 0.00046737734937448176, "loss": 3.071023941040039, "step": 7036, "token_acc": 0.28911407835364134 }, { "epoch": 4.124890061565523, "grad_norm": 0.22570587478439844, "learning_rate": 0.00046736538065340514, "loss": 3.0172128677368164, "step": 7037, "token_acc": 0.2966379727850771 }, { "epoch": 4.125476399882732, "grad_norm": 0.24113098085033072, "learning_rate": 0.0004673534098904736, "loss": 3.0400915145874023, "step": 7038, "token_acc": 0.2960601091014103 }, { "epoch": 4.126062738199941, "grad_norm": 0.26401919596547946, "learning_rate": 0.0004673414370857997, "loss": 3.007387161254883, "step": 7039, "token_acc": 0.29966184856119876 }, { "epoch": 4.12664907651715, "grad_norm": 0.18716402660398995, "learning_rate": 0.0004673294622394958, "loss": 3.003145217895508, "step": 7040, "token_acc": 0.2997829803163932 }, { "epoch": 4.1272354148343595, "grad_norm": 0.2716082962372313, "learning_rate": 0.0004673174853516745, "loss": 3.008039712905884, "step": 7041, "token_acc": 0.2982904955524663 }, { "epoch": 4.127821753151569, "grad_norm": 0.18188170870095105, "learning_rate": 0.00046730550642244825, "loss": 3.0684940814971924, "step": 7042, "token_acc": 0.2893988849151192 }, { "epoch": 4.128408091468778, "grad_norm": 0.2125056665408206, "learning_rate": 0.0004672935254519296, "loss": 3.0241904258728027, "step": 7043, "token_acc": 0.29768451519536904 }, { "epoch": 4.128994429785987, "grad_norm": 0.18690014248234924, "learning_rate": 0.000467281542440231, "loss": 3.008909225463867, "step": 7044, "token_acc": 0.2992523205675567 }, { "epoch": 4.129580768103196, "grad_norm": 0.21416336044173756, "learning_rate": 0.00046726955738746526, "loss": 3.0233187675476074, "step": 7045, "token_acc": 0.2950428836288544 }, { "epoch": 4.130167106420404, "grad_norm": 0.1846573960326613, "learning_rate": 0.00046725757029374474, "loss": 2.9919557571411133, "step": 7046, "token_acc": 0.30108723905343776 }, { "epoch": 4.130753444737613, "grad_norm": 0.20571309395340406, "learning_rate": 0.000467245581159182, "loss": 3.003404378890991, "step": 7047, "token_acc": 0.29898791080082937 }, { "epoch": 4.131339783054822, "grad_norm": 0.18782420717819068, "learning_rate": 0.0004672335899838899, "loss": 3.0275511741638184, "step": 7048, "token_acc": 0.29568912852931023 }, { "epoch": 4.1319261213720315, "grad_norm": 0.2154965341010058, "learning_rate": 0.00046722159676798093, "loss": 3.103482246398926, "step": 7049, "token_acc": 0.28561526489936306 }, { "epoch": 4.132512459689241, "grad_norm": 0.18755705767215644, "learning_rate": 0.0004672096015115678, "loss": 3.064627170562744, "step": 7050, "token_acc": 0.2891123361867217 }, { "epoch": 4.13309879800645, "grad_norm": 0.21327065598708966, "learning_rate": 0.00046719760421476305, "loss": 3.0556278228759766, "step": 7051, "token_acc": 0.29301282518779365 }, { "epoch": 4.133685136323659, "grad_norm": 0.19109782160511382, "learning_rate": 0.0004671856048776795, "loss": 3.037001609802246, "step": 7052, "token_acc": 0.292851874168883 }, { "epoch": 4.134271474640868, "grad_norm": 0.19425716655822833, "learning_rate": 0.00046717360350042995, "loss": 3.078153371810913, "step": 7053, "token_acc": 0.2885069093360297 }, { "epoch": 4.134857812958077, "grad_norm": 0.19570132400152884, "learning_rate": 0.0004671616000831269, "loss": 3.007575750350952, "step": 7054, "token_acc": 0.3003465111767369 }, { "epoch": 4.135444151275286, "grad_norm": 0.18609806584699723, "learning_rate": 0.00046714959462588335, "loss": 3.0401804447174072, "step": 7055, "token_acc": 0.2939394256804529 }, { "epoch": 4.136030489592494, "grad_norm": 0.1965627896488159, "learning_rate": 0.00046713758712881194, "loss": 3.090794086456299, "step": 7056, "token_acc": 0.2864947534795053 }, { "epoch": 4.1366168279097035, "grad_norm": 0.1876064957362089, "learning_rate": 0.00046712557759202557, "loss": 2.988605499267578, "step": 7057, "token_acc": 0.3021640287459731 }, { "epoch": 4.137203166226913, "grad_norm": 0.19365874979699654, "learning_rate": 0.0004671135660156369, "loss": 3.0327749252319336, "step": 7058, "token_acc": 0.2958143847057601 }, { "epoch": 4.137789504544122, "grad_norm": 0.17517389753847912, "learning_rate": 0.00046710155239975884, "loss": 3.0815582275390625, "step": 7059, "token_acc": 0.28933563457413514 }, { "epoch": 4.138375842861331, "grad_norm": 0.20282444417773865, "learning_rate": 0.00046708953674450427, "loss": 3.0498476028442383, "step": 7060, "token_acc": 0.29249855181867984 }, { "epoch": 4.13896218117854, "grad_norm": 0.19830425263450888, "learning_rate": 0.000467077519049986, "loss": 2.9706664085388184, "step": 7061, "token_acc": 0.3044392066985272 }, { "epoch": 4.139548519495749, "grad_norm": 0.17767338353855427, "learning_rate": 0.000467065499316317, "loss": 3.0146965980529785, "step": 7062, "token_acc": 0.2995778556351987 }, { "epoch": 4.140134857812958, "grad_norm": 0.20777925721691495, "learning_rate": 0.00046705347754361006, "loss": 3.0099711418151855, "step": 7063, "token_acc": 0.29751095883508843 }, { "epoch": 4.140721196130167, "grad_norm": 0.22985953041035598, "learning_rate": 0.0004670414537319783, "loss": 3.0828890800476074, "step": 7064, "token_acc": 0.2888711591282717 }, { "epoch": 4.141307534447376, "grad_norm": 0.2043645571169984, "learning_rate": 0.00046702942788153445, "loss": 3.066344976425171, "step": 7065, "token_acc": 0.29120096442598004 }, { "epoch": 4.1418938727645855, "grad_norm": 0.2160954783105623, "learning_rate": 0.00046701739999239155, "loss": 3.0320119857788086, "step": 7066, "token_acc": 0.2943595892196215 }, { "epoch": 4.142480211081795, "grad_norm": 0.2503227641533591, "learning_rate": 0.0004670053700646627, "loss": 3.0036163330078125, "step": 7067, "token_acc": 0.2991095165578298 }, { "epoch": 4.143066549399003, "grad_norm": 0.23345882267355128, "learning_rate": 0.00046699333809846076, "loss": 3.035076379776001, "step": 7068, "token_acc": 0.2954892258412869 }, { "epoch": 4.143652887716212, "grad_norm": 0.19076298634248695, "learning_rate": 0.0004669813040938988, "loss": 3.04569673538208, "step": 7069, "token_acc": 0.29291245929452975 }, { "epoch": 4.144239226033421, "grad_norm": 0.1773726042855761, "learning_rate": 0.00046696926805108997, "loss": 3.0345423221588135, "step": 7070, "token_acc": 0.2948005656510018 }, { "epoch": 4.14482556435063, "grad_norm": 0.21569106611411484, "learning_rate": 0.00046695722997014725, "loss": 3.0541744232177734, "step": 7071, "token_acc": 0.2906861043619114 }, { "epoch": 4.145411902667839, "grad_norm": 0.20188609011027933, "learning_rate": 0.00046694518985118357, "loss": 3.021017551422119, "step": 7072, "token_acc": 0.29739049564647213 }, { "epoch": 4.145998240985048, "grad_norm": 0.23569023334218667, "learning_rate": 0.00046693314769431223, "loss": 3.00738525390625, "step": 7073, "token_acc": 0.29773470565602156 }, { "epoch": 4.1465845793022575, "grad_norm": 0.1960733928332109, "learning_rate": 0.00046692110349964627, "loss": 3.0366311073303223, "step": 7074, "token_acc": 0.2959329238114647 }, { "epoch": 4.147170917619467, "grad_norm": 0.18832683100035275, "learning_rate": 0.00046690905726729887, "loss": 3.0316362380981445, "step": 7075, "token_acc": 0.2950518953956778 }, { "epoch": 4.147757255936676, "grad_norm": 0.2179836225519626, "learning_rate": 0.0004668970089973832, "loss": 3.041081190109253, "step": 7076, "token_acc": 0.2932194132334582 }, { "epoch": 4.148343594253885, "grad_norm": 0.22847330709256103, "learning_rate": 0.00046688495869001234, "loss": 3.0492234230041504, "step": 7077, "token_acc": 0.2919259295491465 }, { "epoch": 4.148929932571093, "grad_norm": 0.1904728149121686, "learning_rate": 0.00046687290634529955, "loss": 3.019866466522217, "step": 7078, "token_acc": 0.2959847791732513 }, { "epoch": 4.149516270888302, "grad_norm": 0.27838198988043333, "learning_rate": 0.0004668608519633581, "loss": 3.0496110916137695, "step": 7079, "token_acc": 0.2933138531013415 }, { "epoch": 4.150102609205511, "grad_norm": 0.2767449097879659, "learning_rate": 0.00046684879554430113, "loss": 3.046294689178467, "step": 7080, "token_acc": 0.29320437770425045 }, { "epoch": 4.15068894752272, "grad_norm": 0.19460546435211695, "learning_rate": 0.00046683673708824194, "loss": 3.066836357116699, "step": 7081, "token_acc": 0.29037366385479796 }, { "epoch": 4.1512752858399296, "grad_norm": 0.29151963843918316, "learning_rate": 0.0004668246765952938, "loss": 3.0040342807769775, "step": 7082, "token_acc": 0.30145942575398404 }, { "epoch": 4.151861624157139, "grad_norm": 0.1929812410587892, "learning_rate": 0.00046681261406557003, "loss": 3.041063070297241, "step": 7083, "token_acc": 0.29300945325202726 }, { "epoch": 4.152447962474348, "grad_norm": 0.2536166281757521, "learning_rate": 0.0004668005494991838, "loss": 3.023494243621826, "step": 7084, "token_acc": 0.2951332657688249 }, { "epoch": 4.153034300791557, "grad_norm": 0.21373170274499917, "learning_rate": 0.00046678848289624864, "loss": 3.047337055206299, "step": 7085, "token_acc": 0.29454262446589813 }, { "epoch": 4.153620639108766, "grad_norm": 0.23353646972111555, "learning_rate": 0.00046677641425687784, "loss": 3.0630502700805664, "step": 7086, "token_acc": 0.29203935254736096 }, { "epoch": 4.154206977425975, "grad_norm": 0.22203336151340522, "learning_rate": 0.0004667643435811847, "loss": 2.994072675704956, "step": 7087, "token_acc": 0.3005077729144656 }, { "epoch": 4.154793315743184, "grad_norm": 0.20928959904605512, "learning_rate": 0.00046675227086928264, "loss": 3.023728847503662, "step": 7088, "token_acc": 0.29455682188212307 }, { "epoch": 4.1553796540603924, "grad_norm": 0.24338924103312093, "learning_rate": 0.00046674019612128506, "loss": 2.979199171066284, "step": 7089, "token_acc": 0.30235288530541604 }, { "epoch": 4.155965992377602, "grad_norm": 0.1736921816887968, "learning_rate": 0.0004667281193373054, "loss": 3.018723487854004, "step": 7090, "token_acc": 0.29572209611741046 }, { "epoch": 4.156552330694811, "grad_norm": 0.22540200906335275, "learning_rate": 0.0004667160405174571, "loss": 3.010500431060791, "step": 7091, "token_acc": 0.29890123550203757 }, { "epoch": 4.15713866901202, "grad_norm": 0.19587881689285558, "learning_rate": 0.0004667039596618536, "loss": 3.0391292572021484, "step": 7092, "token_acc": 0.2947037821894736 }, { "epoch": 4.157725007329229, "grad_norm": 0.21982108721514407, "learning_rate": 0.0004666918767706085, "loss": 3.0136666297912598, "step": 7093, "token_acc": 0.29763448975260015 }, { "epoch": 4.158311345646438, "grad_norm": 0.20567943770917999, "learning_rate": 0.0004666797918438352, "loss": 3.0618886947631836, "step": 7094, "token_acc": 0.29082116268928365 }, { "epoch": 4.158897683963647, "grad_norm": 0.19781248613019312, "learning_rate": 0.00046666770488164723, "loss": 3.0552635192871094, "step": 7095, "token_acc": 0.29241186048371903 }, { "epoch": 4.159484022280856, "grad_norm": 0.19881244603717121, "learning_rate": 0.0004666556158841581, "loss": 3.0654823780059814, "step": 7096, "token_acc": 0.29076996264521604 }, { "epoch": 4.160070360598065, "grad_norm": 0.17761953760147184, "learning_rate": 0.00046664352485148143, "loss": 3.0315709114074707, "step": 7097, "token_acc": 0.2944346580662697 }, { "epoch": 4.1606566989152745, "grad_norm": 0.1978985990562107, "learning_rate": 0.0004666314317837307, "loss": 3.014829635620117, "step": 7098, "token_acc": 0.29787647538965834 }, { "epoch": 4.161243037232484, "grad_norm": 0.1674233731702359, "learning_rate": 0.00046661933668101964, "loss": 3.0356192588806152, "step": 7099, "token_acc": 0.29375117559440594 }, { "epoch": 4.161829375549692, "grad_norm": 0.20094298637790062, "learning_rate": 0.00046660723954346185, "loss": 3.0140483379364014, "step": 7100, "token_acc": 0.2983582694337694 }, { "epoch": 4.162415713866901, "grad_norm": 0.19347567926127904, "learning_rate": 0.0004665951403711709, "loss": 3.027526378631592, "step": 7101, "token_acc": 0.2953698009011814 }, { "epoch": 4.16300205218411, "grad_norm": 0.18566759674894454, "learning_rate": 0.00046658303916426045, "loss": 3.0216879844665527, "step": 7102, "token_acc": 0.29696816612704463 }, { "epoch": 4.163588390501319, "grad_norm": 0.17922073572211084, "learning_rate": 0.0004665709359228442, "loss": 3.0508852005004883, "step": 7103, "token_acc": 0.29356801446509706 }, { "epoch": 4.164174728818528, "grad_norm": 0.21629321281797556, "learning_rate": 0.00046655883064703586, "loss": 3.0434796810150146, "step": 7104, "token_acc": 0.2948495562527062 }, { "epoch": 4.164761067135737, "grad_norm": 0.2015907229433968, "learning_rate": 0.00046654672333694907, "loss": 3.0592262744903564, "step": 7105, "token_acc": 0.2897117433006819 }, { "epoch": 4.1653474054529465, "grad_norm": 0.1931371653354883, "learning_rate": 0.0004665346139926977, "loss": 3.0275039672851562, "step": 7106, "token_acc": 0.29688584577746463 }, { "epoch": 4.165933743770156, "grad_norm": 0.17821961288112476, "learning_rate": 0.00046652250261439534, "loss": 3.042079210281372, "step": 7107, "token_acc": 0.2939778858865489 }, { "epoch": 4.166520082087365, "grad_norm": 0.20033317855731325, "learning_rate": 0.0004665103892021558, "loss": 3.016655921936035, "step": 7108, "token_acc": 0.29708529414793383 }, { "epoch": 4.167106420404574, "grad_norm": 0.18525657417228694, "learning_rate": 0.00046649827375609297, "loss": 3.030959367752075, "step": 7109, "token_acc": 0.29468318558720774 }, { "epoch": 4.167692758721783, "grad_norm": 0.18083164013408412, "learning_rate": 0.0004664861562763206, "loss": 3.0290822982788086, "step": 7110, "token_acc": 0.2948144123519374 }, { "epoch": 4.168279097038991, "grad_norm": 0.17839237629758914, "learning_rate": 0.0004664740367629525, "loss": 3.0278515815734863, "step": 7111, "token_acc": 0.2947525082488638 }, { "epoch": 4.1688654353562, "grad_norm": 0.19663337642508003, "learning_rate": 0.0004664619152161026, "loss": 3.042304039001465, "step": 7112, "token_acc": 0.29429628803576846 }, { "epoch": 4.169451773673409, "grad_norm": 0.21056685268497685, "learning_rate": 0.00046644979163588463, "loss": 3.0030055046081543, "step": 7113, "token_acc": 0.2994610327748917 }, { "epoch": 4.1700381119906185, "grad_norm": 0.26487086465418436, "learning_rate": 0.00046643766602241256, "loss": 3.047046661376953, "step": 7114, "token_acc": 0.29298914799351833 }, { "epoch": 4.170624450307828, "grad_norm": 0.32481344596130146, "learning_rate": 0.00046642553837580023, "loss": 3.0168404579162598, "step": 7115, "token_acc": 0.2981899785073117 }, { "epoch": 4.171210788625037, "grad_norm": 0.272552402031005, "learning_rate": 0.00046641340869616154, "loss": 3.038266181945801, "step": 7116, "token_acc": 0.2937408621261285 }, { "epoch": 4.171797126942246, "grad_norm": 0.17470484100002326, "learning_rate": 0.0004664012769836106, "loss": 3.0391368865966797, "step": 7117, "token_acc": 0.293547087939927 }, { "epoch": 4.172383465259455, "grad_norm": 0.21957628140690602, "learning_rate": 0.00046638914323826125, "loss": 3.051262378692627, "step": 7118, "token_acc": 0.29323716209746914 }, { "epoch": 4.172969803576664, "grad_norm": 0.18030448739028657, "learning_rate": 0.0004663770074602275, "loss": 3.036210775375366, "step": 7119, "token_acc": 0.29414135185425594 }, { "epoch": 4.173556141893873, "grad_norm": 0.19974349358637666, "learning_rate": 0.00046636486964962335, "loss": 3.0618629455566406, "step": 7120, "token_acc": 0.2913911763328877 }, { "epoch": 4.174142480211081, "grad_norm": 0.18861560344242462, "learning_rate": 0.00046635272980656274, "loss": 3.043172836303711, "step": 7121, "token_acc": 0.29310756205503175 }, { "epoch": 4.1747288185282905, "grad_norm": 0.20889792893229253, "learning_rate": 0.00046634058793115974, "loss": 3.0008678436279297, "step": 7122, "token_acc": 0.300017460147864 }, { "epoch": 4.1753151568455, "grad_norm": 0.22265419460742056, "learning_rate": 0.0004663284440235285, "loss": 3.0406363010406494, "step": 7123, "token_acc": 0.2936238716725119 }, { "epoch": 4.175901495162709, "grad_norm": 0.1951362379442764, "learning_rate": 0.00046631629808378307, "loss": 3.0189366340637207, "step": 7124, "token_acc": 0.29693681535455996 }, { "epoch": 4.176487833479918, "grad_norm": 0.18581102207241978, "learning_rate": 0.00046630415011203745, "loss": 3.053821325302124, "step": 7125, "token_acc": 0.29320057474543654 }, { "epoch": 4.177074171797127, "grad_norm": 0.22939984417511636, "learning_rate": 0.0004662920001084059, "loss": 3.0178940296173096, "step": 7126, "token_acc": 0.2960817797643874 }, { "epoch": 4.177660510114336, "grad_norm": 0.18712370128417088, "learning_rate": 0.0004662798480730024, "loss": 3.011500597000122, "step": 7127, "token_acc": 0.29905122108818927 }, { "epoch": 4.178246848431545, "grad_norm": 0.2380529736075332, "learning_rate": 0.0004662676940059412, "loss": 3.0373952388763428, "step": 7128, "token_acc": 0.29279643973225794 }, { "epoch": 4.178833186748754, "grad_norm": 0.18541249519328332, "learning_rate": 0.00046625553790733635, "loss": 2.9727001190185547, "step": 7129, "token_acc": 0.30440173428434775 }, { "epoch": 4.179419525065963, "grad_norm": 0.20667337262809457, "learning_rate": 0.0004662433797773022, "loss": 3.0099425315856934, "step": 7130, "token_acc": 0.29811988236495895 }, { "epoch": 4.1800058633831725, "grad_norm": 0.2039504039482547, "learning_rate": 0.0004662312196159528, "loss": 3.020404815673828, "step": 7131, "token_acc": 0.29621003161538323 }, { "epoch": 4.180592201700381, "grad_norm": 0.2057135749111697, "learning_rate": 0.0004662190574234026, "loss": 3.0423197746276855, "step": 7132, "token_acc": 0.29315194392683824 }, { "epoch": 4.18117854001759, "grad_norm": 0.23412507896396903, "learning_rate": 0.00046620689319976566, "loss": 3.0398218631744385, "step": 7133, "token_acc": 0.29487629861911135 }, { "epoch": 4.181764878334799, "grad_norm": 0.17660227602515383, "learning_rate": 0.0004661947269451563, "loss": 2.978708505630493, "step": 7134, "token_acc": 0.30259389807749415 }, { "epoch": 4.182351216652008, "grad_norm": 0.23714571563875728, "learning_rate": 0.0004661825586596888, "loss": 3.0459704399108887, "step": 7135, "token_acc": 0.29141898107729874 }, { "epoch": 4.182937554969217, "grad_norm": 0.23386796260139422, "learning_rate": 0.00046617038834347746, "loss": 2.98483943939209, "step": 7136, "token_acc": 0.30154882762721946 }, { "epoch": 4.183523893286426, "grad_norm": 0.16988005683655433, "learning_rate": 0.00046615821599663664, "loss": 3.0512070655822754, "step": 7137, "token_acc": 0.29314000773369797 }, { "epoch": 4.184110231603635, "grad_norm": 0.21942275141055242, "learning_rate": 0.00046614604161928063, "loss": 3.0374364852905273, "step": 7138, "token_acc": 0.2942677130894467 }, { "epoch": 4.1846965699208445, "grad_norm": 0.20198881199602095, "learning_rate": 0.0004661338652115239, "loss": 3.001643657684326, "step": 7139, "token_acc": 0.2995079069323014 }, { "epoch": 4.185282908238054, "grad_norm": 0.20345284515402953, "learning_rate": 0.00046612168677348065, "loss": 3.0409388542175293, "step": 7140, "token_acc": 0.29268401764738383 }, { "epoch": 4.185869246555263, "grad_norm": 0.19391012433348753, "learning_rate": 0.00046610950630526543, "loss": 3.0369417667388916, "step": 7141, "token_acc": 0.2938946158167732 }, { "epoch": 4.186455584872472, "grad_norm": 0.19226466492808938, "learning_rate": 0.0004660973238069926, "loss": 3.012075901031494, "step": 7142, "token_acc": 0.2986068955583221 }, { "epoch": 4.18704192318968, "grad_norm": 0.2528403017866318, "learning_rate": 0.0004660851392787766, "loss": 3.0370397567749023, "step": 7143, "token_acc": 0.2939726342537161 }, { "epoch": 4.187628261506889, "grad_norm": 0.23609561243943936, "learning_rate": 0.0004660729527207319, "loss": 3.0025415420532227, "step": 7144, "token_acc": 0.2994409318643597 }, { "epoch": 4.188214599824098, "grad_norm": 0.1965381717174718, "learning_rate": 0.000466060764132973, "loss": 3.0507707595825195, "step": 7145, "token_acc": 0.2920774872856449 }, { "epoch": 4.188800938141307, "grad_norm": 0.22691902109601858, "learning_rate": 0.00046604857351561433, "loss": 3.0100255012512207, "step": 7146, "token_acc": 0.298947072266356 }, { "epoch": 4.1893872764585165, "grad_norm": 0.20072408231028657, "learning_rate": 0.0004660363808687704, "loss": 3.0749642848968506, "step": 7147, "token_acc": 0.2882328750431551 }, { "epoch": 4.189973614775726, "grad_norm": 0.21423720638904545, "learning_rate": 0.0004660241861925559, "loss": 3.0191421508789062, "step": 7148, "token_acc": 0.2981584586832364 }, { "epoch": 4.190559953092935, "grad_norm": 0.2076223938319146, "learning_rate": 0.00046601198948708516, "loss": 3.026064395904541, "step": 7149, "token_acc": 0.2944992306148391 }, { "epoch": 4.191146291410144, "grad_norm": 0.1902893680957885, "learning_rate": 0.0004659997907524729, "loss": 3.045395851135254, "step": 7150, "token_acc": 0.2932133960553693 }, { "epoch": 4.191732629727353, "grad_norm": 0.23110492311367747, "learning_rate": 0.00046598758998883373, "loss": 3.0631494522094727, "step": 7151, "token_acc": 0.29036563768486934 }, { "epoch": 4.192318968044562, "grad_norm": 0.19852537666000913, "learning_rate": 0.00046597538719628207, "loss": 2.9920687675476074, "step": 7152, "token_acc": 0.3016063965486845 }, { "epoch": 4.192905306361771, "grad_norm": 0.18116172925316323, "learning_rate": 0.00046596318237493277, "loss": 3.076284170150757, "step": 7153, "token_acc": 0.2881908571821139 }, { "epoch": 4.193491644678979, "grad_norm": 0.21801417753415978, "learning_rate": 0.0004659509755249004, "loss": 3.077207088470459, "step": 7154, "token_acc": 0.29055617767823816 }, { "epoch": 4.1940779829961885, "grad_norm": 0.21016189565742255, "learning_rate": 0.00046593876664629955, "loss": 3.0335421562194824, "step": 7155, "token_acc": 0.29657758771870657 }, { "epoch": 4.194664321313398, "grad_norm": 0.16630249379307907, "learning_rate": 0.000465926555739245, "loss": 3.034965753555298, "step": 7156, "token_acc": 0.29499131599647965 }, { "epoch": 4.195250659630607, "grad_norm": 0.21604587693350585, "learning_rate": 0.00046591434280385144, "loss": 3.0059666633605957, "step": 7157, "token_acc": 0.29904675608047765 }, { "epoch": 4.195836997947816, "grad_norm": 0.17651302496647003, "learning_rate": 0.00046590212784023354, "loss": 2.9971914291381836, "step": 7158, "token_acc": 0.299662286082898 }, { "epoch": 4.196423336265025, "grad_norm": 0.17693190856646454, "learning_rate": 0.0004658899108485061, "loss": 3.041050434112549, "step": 7159, "token_acc": 0.2941525293281957 }, { "epoch": 4.197009674582234, "grad_norm": 0.20259167037250458, "learning_rate": 0.00046587769182878383, "loss": 3.0409457683563232, "step": 7160, "token_acc": 0.2942173379859894 }, { "epoch": 4.197596012899443, "grad_norm": 0.16577071297280604, "learning_rate": 0.0004658654707811816, "loss": 3.0118017196655273, "step": 7161, "token_acc": 0.2980252185975467 }, { "epoch": 4.198182351216652, "grad_norm": 0.18552614042436216, "learning_rate": 0.0004658532477058141, "loss": 3.0386929512023926, "step": 7162, "token_acc": 0.2937376260646567 }, { "epoch": 4.198768689533861, "grad_norm": 0.22671153390412097, "learning_rate": 0.00046584102260279625, "loss": 3.0002856254577637, "step": 7163, "token_acc": 0.29869861489386507 }, { "epoch": 4.19935502785107, "grad_norm": 0.2824981426324045, "learning_rate": 0.00046582879547224276, "loss": 3.0433058738708496, "step": 7164, "token_acc": 0.29393711606049444 }, { "epoch": 4.199941366168279, "grad_norm": 0.411495209009468, "learning_rate": 0.00046581656631426873, "loss": 3.017425060272217, "step": 7165, "token_acc": 0.2975307670104175 }, { "epoch": 4.200527704485488, "grad_norm": 0.3118122567759677, "learning_rate": 0.00046580433512898874, "loss": 2.9966278076171875, "step": 7166, "token_acc": 0.30098733577314973 }, { "epoch": 4.201114042802697, "grad_norm": 0.17435651759247894, "learning_rate": 0.0004657921019165179, "loss": 2.9982504844665527, "step": 7167, "token_acc": 0.2999541551410631 }, { "epoch": 4.201700381119906, "grad_norm": 0.2626497240931547, "learning_rate": 0.0004657798666769709, "loss": 3.087343454360962, "step": 7168, "token_acc": 0.287779965234467 }, { "epoch": 4.202286719437115, "grad_norm": 0.19462075163709555, "learning_rate": 0.00046576762941046295, "loss": 2.9727554321289062, "step": 7169, "token_acc": 0.30430436423746243 }, { "epoch": 4.202873057754324, "grad_norm": 0.20068935494331192, "learning_rate": 0.00046575539011710885, "loss": 3.0082504749298096, "step": 7170, "token_acc": 0.29929280460834234 }, { "epoch": 4.203459396071533, "grad_norm": 0.1701033985192429, "learning_rate": 0.0004657431487970236, "loss": 3.035154104232788, "step": 7171, "token_acc": 0.29428575944032365 }, { "epoch": 4.2040457343887425, "grad_norm": 0.21203657352301916, "learning_rate": 0.00046573090545032217, "loss": 3.006300926208496, "step": 7172, "token_acc": 0.30026322187807225 }, { "epoch": 4.204632072705952, "grad_norm": 0.21393399496336932, "learning_rate": 0.0004657186600771196, "loss": 3.055222511291504, "step": 7173, "token_acc": 0.29272289773045224 }, { "epoch": 4.205218411023161, "grad_norm": 0.21800821552289154, "learning_rate": 0.0004657064126775309, "loss": 3.0469555854797363, "step": 7174, "token_acc": 0.292700488206281 }, { "epoch": 4.205804749340369, "grad_norm": 0.19224082996588798, "learning_rate": 0.00046569416325167114, "loss": 3.017199754714966, "step": 7175, "token_acc": 0.2973582216346439 }, { "epoch": 4.206391087657578, "grad_norm": 0.21649697547673905, "learning_rate": 0.0004656819117996553, "loss": 3.0250484943389893, "step": 7176, "token_acc": 0.2947412513639443 }, { "epoch": 4.206977425974787, "grad_norm": 0.2293386470284171, "learning_rate": 0.00046566965832159856, "loss": 3.0057473182678223, "step": 7177, "token_acc": 0.2987094255120537 }, { "epoch": 4.207563764291996, "grad_norm": 0.1840261651221997, "learning_rate": 0.00046565740281761603, "loss": 3.0113186836242676, "step": 7178, "token_acc": 0.29763357312332095 }, { "epoch": 4.208150102609205, "grad_norm": 0.2523318353567239, "learning_rate": 0.0004656451452878228, "loss": 3.0586607456207275, "step": 7179, "token_acc": 0.2908959859237039 }, { "epoch": 4.2087364409264145, "grad_norm": 0.2092421207723749, "learning_rate": 0.000465632885732334, "loss": 3.036681652069092, "step": 7180, "token_acc": 0.2953302538317887 }, { "epoch": 4.209322779243624, "grad_norm": 0.2213120261401445, "learning_rate": 0.00046562062415126483, "loss": 3.007185459136963, "step": 7181, "token_acc": 0.2978972121780217 }, { "epoch": 4.209909117560833, "grad_norm": 0.19155800810470536, "learning_rate": 0.0004656083605447304, "loss": 3.0147643089294434, "step": 7182, "token_acc": 0.2986389237781517 }, { "epoch": 4.210495455878042, "grad_norm": 0.21149699446447984, "learning_rate": 0.000465596094912846, "loss": 3.034579277038574, "step": 7183, "token_acc": 0.2950187123914462 }, { "epoch": 4.211081794195251, "grad_norm": 0.27381916078877117, "learning_rate": 0.0004655838272557268, "loss": 3.0246386528015137, "step": 7184, "token_acc": 0.2962732011751261 }, { "epoch": 4.21166813251246, "grad_norm": 0.19809543772936736, "learning_rate": 0.000465571557573488, "loss": 3.0541319847106934, "step": 7185, "token_acc": 0.29117324274888945 }, { "epoch": 4.212254470829668, "grad_norm": 0.22581517657389624, "learning_rate": 0.00046555928586624495, "loss": 3.05717396736145, "step": 7186, "token_acc": 0.29128791852043256 }, { "epoch": 4.212840809146877, "grad_norm": 0.217388709016104, "learning_rate": 0.00046554701213411285, "loss": 3.045132637023926, "step": 7187, "token_acc": 0.29182146728347225 }, { "epoch": 4.2134271474640865, "grad_norm": 0.18840122464008327, "learning_rate": 0.00046553473637720713, "loss": 3.102694511413574, "step": 7188, "token_acc": 0.2832544304327789 }, { "epoch": 4.214013485781296, "grad_norm": 0.24554499698267465, "learning_rate": 0.0004655224585956429, "loss": 3.002741813659668, "step": 7189, "token_acc": 0.2994943103236521 }, { "epoch": 4.214599824098505, "grad_norm": 0.19128149306691974, "learning_rate": 0.0004655101787895356, "loss": 3.063619613647461, "step": 7190, "token_acc": 0.2886796035392853 }, { "epoch": 4.215186162415714, "grad_norm": 0.26583750266094613, "learning_rate": 0.00046549789695900056, "loss": 2.9953603744506836, "step": 7191, "token_acc": 0.3001553314472275 }, { "epoch": 4.215772500732923, "grad_norm": 0.20798661582231734, "learning_rate": 0.0004654856131041532, "loss": 3.0, "step": 7192, "token_acc": 0.299153776136946 }, { "epoch": 4.216358839050132, "grad_norm": 0.22306383487478446, "learning_rate": 0.0004654733272251088, "loss": 3.0378551483154297, "step": 7193, "token_acc": 0.2944513283683211 }, { "epoch": 4.216945177367341, "grad_norm": 0.1912486582995136, "learning_rate": 0.0004654610393219829, "loss": 2.9882826805114746, "step": 7194, "token_acc": 0.30115435321962813 }, { "epoch": 4.21753151568455, "grad_norm": 0.2684242131620467, "learning_rate": 0.0004654487493948909, "loss": 3.072085380554199, "step": 7195, "token_acc": 0.289867864147827 }, { "epoch": 4.218117854001759, "grad_norm": 0.21315851180081472, "learning_rate": 0.00046543645744394823, "loss": 3.0188755989074707, "step": 7196, "token_acc": 0.2970566837111133 }, { "epoch": 4.218704192318968, "grad_norm": 0.2336857103000578, "learning_rate": 0.00046542416346927033, "loss": 3.0823090076446533, "step": 7197, "token_acc": 0.2874185242867106 }, { "epoch": 4.219290530636177, "grad_norm": 0.23008886321897284, "learning_rate": 0.00046541186747097265, "loss": 3.012960910797119, "step": 7198, "token_acc": 0.2974197192980646 }, { "epoch": 4.219876868953386, "grad_norm": 0.21535649589772046, "learning_rate": 0.00046539956944917084, "loss": 3.0952091217041016, "step": 7199, "token_acc": 0.2888128943322199 }, { "epoch": 4.220463207270595, "grad_norm": 0.18874393717942592, "learning_rate": 0.00046538726940398024, "loss": 3.0290207862854004, "step": 7200, "token_acc": 0.2953556783277116 }, { "epoch": 4.221049545587804, "grad_norm": 0.20581388503280557, "learning_rate": 0.0004653749673355165, "loss": 3.0229454040527344, "step": 7201, "token_acc": 0.29560466356369797 }, { "epoch": 4.221635883905013, "grad_norm": 0.2056077547446293, "learning_rate": 0.0004653626632438952, "loss": 3.067230224609375, "step": 7202, "token_acc": 0.2909809757618472 }, { "epoch": 4.222222222222222, "grad_norm": 0.21441276898880496, "learning_rate": 0.00046535035712923185, "loss": 3.0767927169799805, "step": 7203, "token_acc": 0.2883587184939103 }, { "epoch": 4.222808560539431, "grad_norm": 0.204499742479287, "learning_rate": 0.0004653380489916421, "loss": 3.006409168243408, "step": 7204, "token_acc": 0.29920727224116034 }, { "epoch": 4.2233948988566405, "grad_norm": 0.16815923724321757, "learning_rate": 0.0004653257388312415, "loss": 3.044400215148926, "step": 7205, "token_acc": 0.29545357762612984 }, { "epoch": 4.22398123717385, "grad_norm": 0.18524566030904668, "learning_rate": 0.0004653134266481458, "loss": 3.0381197929382324, "step": 7206, "token_acc": 0.2931384324448647 }, { "epoch": 4.224567575491059, "grad_norm": 0.17668464946370718, "learning_rate": 0.0004653011124424706, "loss": 3.013251543045044, "step": 7207, "token_acc": 0.2979389715371891 }, { "epoch": 4.225153913808267, "grad_norm": 0.18257376746229081, "learning_rate": 0.0004652887962143315, "loss": 3.023289203643799, "step": 7208, "token_acc": 0.29631769831433763 }, { "epoch": 4.225740252125476, "grad_norm": 0.1788219354723936, "learning_rate": 0.0004652764779638444, "loss": 3.045978307723999, "step": 7209, "token_acc": 0.29395063780502845 }, { "epoch": 4.226326590442685, "grad_norm": 0.1823665469096321, "learning_rate": 0.0004652641576911247, "loss": 3.07065486907959, "step": 7210, "token_acc": 0.2900328454368022 }, { "epoch": 4.226912928759894, "grad_norm": 0.20739412835154172, "learning_rate": 0.00046525183539628846, "loss": 3.0098862648010254, "step": 7211, "token_acc": 0.2977733136797068 }, { "epoch": 4.227499267077103, "grad_norm": 0.23144851817703418, "learning_rate": 0.0004652395110794512, "loss": 3.062488555908203, "step": 7212, "token_acc": 0.29075703128245606 }, { "epoch": 4.2280856053943126, "grad_norm": 0.19903605197128096, "learning_rate": 0.0004652271847407288, "loss": 3.0978357791900635, "step": 7213, "token_acc": 0.28609121174645175 }, { "epoch": 4.228671943711522, "grad_norm": 0.17601060419982678, "learning_rate": 0.0004652148563802371, "loss": 3.0160069465637207, "step": 7214, "token_acc": 0.29667864250820647 }, { "epoch": 4.229258282028731, "grad_norm": 0.22273856148661494, "learning_rate": 0.00046520252599809166, "loss": 3.066481590270996, "step": 7215, "token_acc": 0.29091505247428845 }, { "epoch": 4.22984462034594, "grad_norm": 0.165558317848903, "learning_rate": 0.0004651901935944086, "loss": 3.001032590866089, "step": 7216, "token_acc": 0.2988250310841701 }, { "epoch": 4.230430958663149, "grad_norm": 0.17574694925998402, "learning_rate": 0.0004651778591693036, "loss": 3.060455799102783, "step": 7217, "token_acc": 0.2919625692278312 }, { "epoch": 4.231017296980358, "grad_norm": 0.20723927960812277, "learning_rate": 0.0004651655227228926, "loss": 3.0199244022369385, "step": 7218, "token_acc": 0.2973019356583423 }, { "epoch": 4.231603635297566, "grad_norm": 0.19253394581645475, "learning_rate": 0.0004651531842552914, "loss": 3.007469892501831, "step": 7219, "token_acc": 0.29877539274020704 }, { "epoch": 4.2321899736147754, "grad_norm": 0.19161325732394313, "learning_rate": 0.00046514084376661605, "loss": 3.0038843154907227, "step": 7220, "token_acc": 0.2987262775589544 }, { "epoch": 4.232776311931985, "grad_norm": 0.22692899923408688, "learning_rate": 0.00046512850125698225, "loss": 3.0106732845306396, "step": 7221, "token_acc": 0.2992220305498503 }, { "epoch": 4.233362650249194, "grad_norm": 0.2115732935318443, "learning_rate": 0.0004651161567265062, "loss": 3.077622413635254, "step": 7222, "token_acc": 0.288922710322971 }, { "epoch": 4.233948988566403, "grad_norm": 0.2019073345427456, "learning_rate": 0.0004651038101753036, "loss": 3.0127577781677246, "step": 7223, "token_acc": 0.29833541318555806 }, { "epoch": 4.234535326883612, "grad_norm": 0.22052979661675895, "learning_rate": 0.00046509146160349067, "loss": 3.059021472930908, "step": 7224, "token_acc": 0.29122310496211345 }, { "epoch": 4.235121665200821, "grad_norm": 0.2146427398563426, "learning_rate": 0.0004650791110111833, "loss": 3.079977512359619, "step": 7225, "token_acc": 0.2885198773409265 }, { "epoch": 4.23570800351803, "grad_norm": 0.23425564957326006, "learning_rate": 0.0004650667583984974, "loss": 3.056211233139038, "step": 7226, "token_acc": 0.28999268765697694 }, { "epoch": 4.236294341835239, "grad_norm": 0.2735447098311677, "learning_rate": 0.0004650544037655492, "loss": 3.0535433292388916, "step": 7227, "token_acc": 0.2917613560765321 }, { "epoch": 4.236880680152448, "grad_norm": 0.21842977066231503, "learning_rate": 0.00046504204711245455, "loss": 3.0150058269500732, "step": 7228, "token_acc": 0.29675714810572495 }, { "epoch": 4.237467018469657, "grad_norm": 0.19820564950678218, "learning_rate": 0.0004650296884393298, "loss": 3.0259552001953125, "step": 7229, "token_acc": 0.2947137685913417 }, { "epoch": 4.238053356786866, "grad_norm": 0.21132207864794167, "learning_rate": 0.0004650173277462908, "loss": 3.0346803665161133, "step": 7230, "token_acc": 0.29386813297310743 }, { "epoch": 4.238639695104075, "grad_norm": 0.20254279478224177, "learning_rate": 0.0004650049650334538, "loss": 3.0663952827453613, "step": 7231, "token_acc": 0.29137044791724714 }, { "epoch": 4.239226033421284, "grad_norm": 0.2551141040057678, "learning_rate": 0.00046499260030093484, "loss": 3.0588698387145996, "step": 7232, "token_acc": 0.291092350103377 }, { "epoch": 4.239812371738493, "grad_norm": 0.25184660175377055, "learning_rate": 0.00046498023354885, "loss": 3.086754322052002, "step": 7233, "token_acc": 0.28723888281356014 }, { "epoch": 4.240398710055702, "grad_norm": 0.23249709531231724, "learning_rate": 0.00046496786477731567, "loss": 3.0265302658081055, "step": 7234, "token_acc": 0.29619402312022197 }, { "epoch": 4.240985048372911, "grad_norm": 0.21436659470740724, "learning_rate": 0.0004649554939864479, "loss": 3.075906276702881, "step": 7235, "token_acc": 0.28844102621680795 }, { "epoch": 4.24157138669012, "grad_norm": 0.1997818984105397, "learning_rate": 0.00046494312117636294, "loss": 3.0163071155548096, "step": 7236, "token_acc": 0.2964620237315317 }, { "epoch": 4.2421577250073295, "grad_norm": 0.2194907664669766, "learning_rate": 0.00046493074634717705, "loss": 3.060027599334717, "step": 7237, "token_acc": 0.29129932222981136 }, { "epoch": 4.242744063324539, "grad_norm": 0.17815121209963833, "learning_rate": 0.0004649183694990063, "loss": 3.0438966751098633, "step": 7238, "token_acc": 0.2943165939789135 }, { "epoch": 4.243330401641748, "grad_norm": 0.22647371250225418, "learning_rate": 0.00046490599063196713, "loss": 3.0117921829223633, "step": 7239, "token_acc": 0.29971508595711804 }, { "epoch": 4.243916739958956, "grad_norm": 0.20886039102782136, "learning_rate": 0.0004648936097461758, "loss": 3.072829484939575, "step": 7240, "token_acc": 0.28895028818054636 }, { "epoch": 4.244503078276165, "grad_norm": 0.19316258779186285, "learning_rate": 0.0004648812268417485, "loss": 3.003109931945801, "step": 7241, "token_acc": 0.29721121374464204 }, { "epoch": 4.245089416593374, "grad_norm": 0.19067020562495265, "learning_rate": 0.00046486884191880167, "loss": 3.0332727432250977, "step": 7242, "token_acc": 0.29396787837249727 }, { "epoch": 4.245675754910583, "grad_norm": 0.17649926805330626, "learning_rate": 0.00046485645497745164, "loss": 2.9963645935058594, "step": 7243, "token_acc": 0.30061032212631106 }, { "epoch": 4.246262093227792, "grad_norm": 0.18658002375145616, "learning_rate": 0.0004648440660178147, "loss": 3.020477771759033, "step": 7244, "token_acc": 0.29699250056657805 }, { "epoch": 4.2468484315450015, "grad_norm": 0.20006045232623287, "learning_rate": 0.00046483167504000726, "loss": 3.0562543869018555, "step": 7245, "token_acc": 0.2918381580863474 }, { "epoch": 4.247434769862211, "grad_norm": 0.1834211176446755, "learning_rate": 0.0004648192820441458, "loss": 3.000281810760498, "step": 7246, "token_acc": 0.29908064703828696 }, { "epoch": 4.24802110817942, "grad_norm": 0.1838794358666694, "learning_rate": 0.0004648068870303466, "loss": 3.0101988315582275, "step": 7247, "token_acc": 0.29780088068996063 }, { "epoch": 4.248607446496629, "grad_norm": 0.21910430028999436, "learning_rate": 0.0004647944899987261, "loss": 3.032127857208252, "step": 7248, "token_acc": 0.2949095220201015 }, { "epoch": 4.249193784813838, "grad_norm": 0.2604695523252541, "learning_rate": 0.0004647820909494009, "loss": 3.0543711185455322, "step": 7249, "token_acc": 0.29206657339550235 }, { "epoch": 4.249780123131047, "grad_norm": 0.26027715891762565, "learning_rate": 0.0004647696898824873, "loss": 3.0225749015808105, "step": 7250, "token_acc": 0.29730030762702875 }, { "epoch": 4.250366461448255, "grad_norm": 0.31807102803223836, "learning_rate": 0.0004647572867981019, "loss": 3.0529260635375977, "step": 7251, "token_acc": 0.29128068097010057 }, { "epoch": 4.250952799765464, "grad_norm": 0.2326010368280377, "learning_rate": 0.0004647448816963612, "loss": 3.0328445434570312, "step": 7252, "token_acc": 0.2962837510887448 }, { "epoch": 4.2515391380826735, "grad_norm": 0.18287497552776938, "learning_rate": 0.00046473247457738166, "loss": 3.0036303997039795, "step": 7253, "token_acc": 0.3008589236653279 }, { "epoch": 4.252125476399883, "grad_norm": 0.18327713223996892, "learning_rate": 0.00046472006544128, "loss": 3.0194225311279297, "step": 7254, "token_acc": 0.2944594335936988 }, { "epoch": 4.252711814717092, "grad_norm": 0.19120016504664566, "learning_rate": 0.00046470765428817255, "loss": 3.0305886268615723, "step": 7255, "token_acc": 0.29578205477171404 }, { "epoch": 4.253298153034301, "grad_norm": 0.1741665719739343, "learning_rate": 0.000464695241118176, "loss": 3.0565361976623535, "step": 7256, "token_acc": 0.29156514157317576 }, { "epoch": 4.25388449135151, "grad_norm": 0.20598949173902928, "learning_rate": 0.000464682825931407, "loss": 3.0029425621032715, "step": 7257, "token_acc": 0.2985429229943485 }, { "epoch": 4.254470829668719, "grad_norm": 0.308150823920553, "learning_rate": 0.00046467040872798216, "loss": 3.017829418182373, "step": 7258, "token_acc": 0.2955879402026688 }, { "epoch": 4.255057167985928, "grad_norm": 0.28887097728298866, "learning_rate": 0.00046465798950801805, "loss": 2.993651866912842, "step": 7259, "token_acc": 0.30148115790382635 }, { "epoch": 4.255643506303137, "grad_norm": 0.18842538286960578, "learning_rate": 0.0004646455682716314, "loss": 3.062232494354248, "step": 7260, "token_acc": 0.2922357083537459 }, { "epoch": 4.256229844620346, "grad_norm": 0.2779440427377432, "learning_rate": 0.00046463314501893896, "loss": 3.022719383239746, "step": 7261, "token_acc": 0.29545203983599416 }, { "epoch": 4.256816182937555, "grad_norm": 0.22232304462280986, "learning_rate": 0.0004646207197500572, "loss": 3.01255202293396, "step": 7262, "token_acc": 0.2969424489293757 }, { "epoch": 4.257402521254764, "grad_norm": 0.2330451779201088, "learning_rate": 0.00046460829246510306, "loss": 2.9859397411346436, "step": 7263, "token_acc": 0.30138488098088384 }, { "epoch": 4.257988859571973, "grad_norm": 0.2645558763049161, "learning_rate": 0.0004645958631641932, "loss": 3.0558505058288574, "step": 7264, "token_acc": 0.2907449990580086 }, { "epoch": 4.258575197889182, "grad_norm": 0.19444701069906487, "learning_rate": 0.0004645834318474443, "loss": 3.103001117706299, "step": 7265, "token_acc": 0.284434247324346 }, { "epoch": 4.259161536206391, "grad_norm": 0.21392301788358983, "learning_rate": 0.0004645709985149734, "loss": 3.0365567207336426, "step": 7266, "token_acc": 0.2956237338605355 }, { "epoch": 4.2597478745236, "grad_norm": 0.16570621180879094, "learning_rate": 0.0004645585631668969, "loss": 3.029478073120117, "step": 7267, "token_acc": 0.29352355663338525 }, { "epoch": 4.260334212840809, "grad_norm": 0.19392138266077966, "learning_rate": 0.000464546125803332, "loss": 3.0014162063598633, "step": 7268, "token_acc": 0.30090012419045276 }, { "epoch": 4.260920551158018, "grad_norm": 0.22477921693453845, "learning_rate": 0.00046453368642439524, "loss": 3.0667591094970703, "step": 7269, "token_acc": 0.29108717831237363 }, { "epoch": 4.2615068894752275, "grad_norm": 0.22039136420494712, "learning_rate": 0.00046452124503020367, "loss": 2.995938301086426, "step": 7270, "token_acc": 0.3007941206925391 }, { "epoch": 4.262093227792437, "grad_norm": 0.19878033778551069, "learning_rate": 0.00046450880162087393, "loss": 3.042675495147705, "step": 7271, "token_acc": 0.29312096440559915 }, { "epoch": 4.262679566109645, "grad_norm": 0.2805607964746652, "learning_rate": 0.00046449635619652313, "loss": 3.011254072189331, "step": 7272, "token_acc": 0.2982654186460963 }, { "epoch": 4.263265904426854, "grad_norm": 0.18583625651976154, "learning_rate": 0.00046448390875726813, "loss": 3.0606212615966797, "step": 7273, "token_acc": 0.29005361078519915 }, { "epoch": 4.263852242744063, "grad_norm": 0.22311917847378054, "learning_rate": 0.0004644714593032258, "loss": 3.053183078765869, "step": 7274, "token_acc": 0.29212003465041636 }, { "epoch": 4.264438581061272, "grad_norm": 0.19137704478020862, "learning_rate": 0.0004644590078345131, "loss": 3.0386698246002197, "step": 7275, "token_acc": 0.29410758955932526 }, { "epoch": 4.265024919378481, "grad_norm": 0.2652698207684537, "learning_rate": 0.00046444655435124707, "loss": 3.031611919403076, "step": 7276, "token_acc": 0.29370177980977263 }, { "epoch": 4.26561125769569, "grad_norm": 0.1910222691437405, "learning_rate": 0.0004644340988535446, "loss": 3.0476956367492676, "step": 7277, "token_acc": 0.2931088137559998 }, { "epoch": 4.2661975960128995, "grad_norm": 0.2018811217732455, "learning_rate": 0.0004644216413415227, "loss": 3.0440738201141357, "step": 7278, "token_acc": 0.2950478908692701 }, { "epoch": 4.266783934330109, "grad_norm": 0.18914238463092745, "learning_rate": 0.00046440918181529843, "loss": 3.030407667160034, "step": 7279, "token_acc": 0.29616783733446894 }, { "epoch": 4.267370272647318, "grad_norm": 0.19186678296370016, "learning_rate": 0.0004643967202749888, "loss": 2.9866089820861816, "step": 7280, "token_acc": 0.30093894693820483 }, { "epoch": 4.267956610964527, "grad_norm": 0.18825563741892448, "learning_rate": 0.00046438425672071096, "loss": 3.0327978134155273, "step": 7281, "token_acc": 0.2959708997305935 }, { "epoch": 4.268542949281736, "grad_norm": 0.19522793126427482, "learning_rate": 0.00046437179115258175, "loss": 3.0397255420684814, "step": 7282, "token_acc": 0.29447283225506143 }, { "epoch": 4.269129287598945, "grad_norm": 0.20642936480255422, "learning_rate": 0.00046435932357071863, "loss": 3.0638389587402344, "step": 7283, "token_acc": 0.2904871857685672 }, { "epoch": 4.269715625916153, "grad_norm": 0.19911454429565026, "learning_rate": 0.0004643468539752384, "loss": 3.03776478767395, "step": 7284, "token_acc": 0.293763730177161 }, { "epoch": 4.270301964233362, "grad_norm": 0.20976080990143434, "learning_rate": 0.00046433438236625834, "loss": 3.0086519718170166, "step": 7285, "token_acc": 0.29792241026791016 }, { "epoch": 4.2708883025505715, "grad_norm": 0.17352337568920734, "learning_rate": 0.0004643219087438956, "loss": 2.997100591659546, "step": 7286, "token_acc": 0.3018106177823775 }, { "epoch": 4.271474640867781, "grad_norm": 0.17357494714334998, "learning_rate": 0.0004643094331082673, "loss": 3.0867109298706055, "step": 7287, "token_acc": 0.2877382832658295 }, { "epoch": 4.27206097918499, "grad_norm": 0.19556261316209386, "learning_rate": 0.0004642969554594907, "loss": 3.0226526260375977, "step": 7288, "token_acc": 0.2971607747068783 }, { "epoch": 4.272647317502199, "grad_norm": 0.18168954928689307, "learning_rate": 0.0004642844757976829, "loss": 3.010441780090332, "step": 7289, "token_acc": 0.2985246062433212 }, { "epoch": 4.273233655819408, "grad_norm": 0.17607367496516332, "learning_rate": 0.0004642719941229613, "loss": 3.0098719596862793, "step": 7290, "token_acc": 0.29916682266837286 }, { "epoch": 4.273819994136617, "grad_norm": 0.1849178085101402, "learning_rate": 0.00046425951043544295, "loss": 3.0792784690856934, "step": 7291, "token_acc": 0.2887901663082132 }, { "epoch": 4.274406332453826, "grad_norm": 0.17569223705055148, "learning_rate": 0.00046424702473524525, "loss": 3.052720069885254, "step": 7292, "token_acc": 0.29230357587542455 }, { "epoch": 4.274992670771035, "grad_norm": 0.22752647269424886, "learning_rate": 0.00046423453702248545, "loss": 3.0107295513153076, "step": 7293, "token_acc": 0.298837892370955 }, { "epoch": 4.2755790090882435, "grad_norm": 0.2717712867275703, "learning_rate": 0.0004642220472972809, "loss": 3.056264877319336, "step": 7294, "token_acc": 0.2919780932772723 }, { "epoch": 4.276165347405453, "grad_norm": 0.26113764586484167, "learning_rate": 0.00046420955555974885, "loss": 3.042703628540039, "step": 7295, "token_acc": 0.2931754118285966 }, { "epoch": 4.276751685722662, "grad_norm": 0.1753930968064935, "learning_rate": 0.0004641970618100067, "loss": 3.0342659950256348, "step": 7296, "token_acc": 0.2958851986183953 }, { "epoch": 4.277338024039871, "grad_norm": 0.2384081498373174, "learning_rate": 0.00046418456604817174, "loss": 3.0522491931915283, "step": 7297, "token_acc": 0.29252802371862563 }, { "epoch": 4.27792436235708, "grad_norm": 0.2041932751150039, "learning_rate": 0.0004641720682743614, "loss": 3.047947883605957, "step": 7298, "token_acc": 0.29237264756960407 }, { "epoch": 4.278510700674289, "grad_norm": 0.18322360249718328, "learning_rate": 0.00046415956848869314, "loss": 3.0624027252197266, "step": 7299, "token_acc": 0.2902337048380333 }, { "epoch": 4.279097038991498, "grad_norm": 0.253297719892055, "learning_rate": 0.0004641470666912843, "loss": 3.070003032684326, "step": 7300, "token_acc": 0.2896063627633273 }, { "epoch": 4.279683377308707, "grad_norm": 0.17833470294833959, "learning_rate": 0.0004641345628822523, "loss": 3.0455422401428223, "step": 7301, "token_acc": 0.2916853773846705 }, { "epoch": 4.280269715625916, "grad_norm": 0.2805479688463854, "learning_rate": 0.00046412205706171475, "loss": 3.0642621517181396, "step": 7302, "token_acc": 0.29094388122367676 }, { "epoch": 4.2808560539431255, "grad_norm": 0.24582430513601233, "learning_rate": 0.00046410954922978886, "loss": 3.051823616027832, "step": 7303, "token_acc": 0.2926300925851639 }, { "epoch": 4.281442392260335, "grad_norm": 0.24180166479787638, "learning_rate": 0.0004640970393865923, "loss": 3.0173614025115967, "step": 7304, "token_acc": 0.2968326510536482 }, { "epoch": 4.282028730577543, "grad_norm": 0.23451020379998191, "learning_rate": 0.00046408452753224263, "loss": 3.057274580001831, "step": 7305, "token_acc": 0.2923452419674082 }, { "epoch": 4.282615068894752, "grad_norm": 0.1912025874681136, "learning_rate": 0.00046407201366685725, "loss": 3.0389208793640137, "step": 7306, "token_acc": 0.29443759111808904 }, { "epoch": 4.283201407211961, "grad_norm": 0.19723245996826194, "learning_rate": 0.0004640594977905538, "loss": 2.961979389190674, "step": 7307, "token_acc": 0.3041204429362024 }, { "epoch": 4.28378774552917, "grad_norm": 0.18488005292196993, "learning_rate": 0.00046404697990344975, "loss": 3.015009880065918, "step": 7308, "token_acc": 0.2982548721543547 }, { "epoch": 4.284374083846379, "grad_norm": 0.17611423045426525, "learning_rate": 0.00046403446000566273, "loss": 3.046461582183838, "step": 7309, "token_acc": 0.29312512229182247 }, { "epoch": 4.284960422163588, "grad_norm": 0.20805243030340356, "learning_rate": 0.0004640219380973105, "loss": 3.0603318214416504, "step": 7310, "token_acc": 0.2900340411821718 }, { "epoch": 4.2855467604807975, "grad_norm": 0.17134574390579776, "learning_rate": 0.0004640094141785105, "loss": 3.0567173957824707, "step": 7311, "token_acc": 0.2927464796094379 }, { "epoch": 4.286133098798007, "grad_norm": 0.1993838388091723, "learning_rate": 0.00046399688824938046, "loss": 3.025550127029419, "step": 7312, "token_acc": 0.2964498219609892 }, { "epoch": 4.286719437115216, "grad_norm": 0.17916713057511413, "learning_rate": 0.000463984360310038, "loss": 3.0443339347839355, "step": 7313, "token_acc": 0.2928862964476596 }, { "epoch": 4.287305775432425, "grad_norm": 0.19834512359026277, "learning_rate": 0.00046397183036060073, "loss": 2.9818224906921387, "step": 7314, "token_acc": 0.302160169987504 }, { "epoch": 4.287892113749633, "grad_norm": 0.20869768346536827, "learning_rate": 0.0004639592984011866, "loss": 3.0265040397644043, "step": 7315, "token_acc": 0.2967787499509554 }, { "epoch": 4.288478452066842, "grad_norm": 0.20297974577995187, "learning_rate": 0.0004639467644319131, "loss": 3.04345440864563, "step": 7316, "token_acc": 0.2930626048937289 }, { "epoch": 4.289064790384051, "grad_norm": 0.19962836702406517, "learning_rate": 0.00046393422845289803, "loss": 3.066373348236084, "step": 7317, "token_acc": 0.29207596767467764 }, { "epoch": 4.28965112870126, "grad_norm": 0.16984820983701024, "learning_rate": 0.0004639216904642592, "loss": 3.055436134338379, "step": 7318, "token_acc": 0.2916192038206241 }, { "epoch": 4.2902374670184695, "grad_norm": 0.1768482049248973, "learning_rate": 0.0004639091504661143, "loss": 3.0117740631103516, "step": 7319, "token_acc": 0.29911648043707256 }, { "epoch": 4.290823805335679, "grad_norm": 0.17060784803186016, "learning_rate": 0.0004638966084585812, "loss": 3.0297725200653076, "step": 7320, "token_acc": 0.2950180001718021 }, { "epoch": 4.291410143652888, "grad_norm": 0.1726061282150735, "learning_rate": 0.0004638840644417777, "loss": 3.042325019836426, "step": 7321, "token_acc": 0.2941343895386727 }, { "epoch": 4.291996481970097, "grad_norm": 0.19375187834065757, "learning_rate": 0.00046387151841582164, "loss": 3.041559934616089, "step": 7322, "token_acc": 0.29206977095634146 }, { "epoch": 4.292582820287306, "grad_norm": 0.19113190939270544, "learning_rate": 0.00046385897038083085, "loss": 3.053023338317871, "step": 7323, "token_acc": 0.2910645939385639 }, { "epoch": 4.293169158604515, "grad_norm": 0.15489284377181867, "learning_rate": 0.00046384642033692327, "loss": 3.041614532470703, "step": 7324, "token_acc": 0.2926240639640475 }, { "epoch": 4.293755496921724, "grad_norm": 0.21746515007652106, "learning_rate": 0.00046383386828421664, "loss": 3.053802728652954, "step": 7325, "token_acc": 0.2934623613512772 }, { "epoch": 4.294341835238933, "grad_norm": 0.22551872754465307, "learning_rate": 0.000463821314222829, "loss": 3.0038957595825195, "step": 7326, "token_acc": 0.29875068727472665 }, { "epoch": 4.2949281735561415, "grad_norm": 0.1941942717225257, "learning_rate": 0.0004638087581528783, "loss": 3.0205600261688232, "step": 7327, "token_acc": 0.2966617359406579 }, { "epoch": 4.295514511873351, "grad_norm": 0.19776246537358266, "learning_rate": 0.0004637962000744823, "loss": 3.0114293098449707, "step": 7328, "token_acc": 0.29730630850306794 }, { "epoch": 4.29610085019056, "grad_norm": 0.20152552678980784, "learning_rate": 0.0004637836399877592, "loss": 3.028628349304199, "step": 7329, "token_acc": 0.2949789348324391 }, { "epoch": 4.296687188507769, "grad_norm": 0.19760176639140292, "learning_rate": 0.0004637710778928268, "loss": 3.0333213806152344, "step": 7330, "token_acc": 0.295072634941916 }, { "epoch": 4.297273526824978, "grad_norm": 0.19297214435437715, "learning_rate": 0.0004637585137898033, "loss": 3.0465164184570312, "step": 7331, "token_acc": 0.2923753420214185 }, { "epoch": 4.297859865142187, "grad_norm": 0.25139937937946333, "learning_rate": 0.0004637459476788065, "loss": 3.0764904022216797, "step": 7332, "token_acc": 0.2906442415196889 }, { "epoch": 4.298446203459396, "grad_norm": 0.2593749680843242, "learning_rate": 0.0004637333795599545, "loss": 3.058875799179077, "step": 7333, "token_acc": 0.2911292875715568 }, { "epoch": 4.299032541776605, "grad_norm": 0.23887168492587066, "learning_rate": 0.00046372080943336553, "loss": 3.045907497406006, "step": 7334, "token_acc": 0.2941029604721456 }, { "epoch": 4.299618880093814, "grad_norm": 0.21006450653436493, "learning_rate": 0.0004637082372991575, "loss": 3.013392925262451, "step": 7335, "token_acc": 0.2981420176046398 }, { "epoch": 4.3002052184110235, "grad_norm": 0.18718388217301946, "learning_rate": 0.0004636956631574486, "loss": 3.0517544746398926, "step": 7336, "token_acc": 0.2921343732476933 }, { "epoch": 4.300791556728232, "grad_norm": 0.20754944183084517, "learning_rate": 0.00046368308700835684, "loss": 3.035308361053467, "step": 7337, "token_acc": 0.29397371514050646 }, { "epoch": 4.301377895045441, "grad_norm": 0.19462959098141244, "learning_rate": 0.0004636705088520005, "loss": 3.0191650390625, "step": 7338, "token_acc": 0.29612204011168186 }, { "epoch": 4.30196423336265, "grad_norm": 0.17733369515346875, "learning_rate": 0.00046365792868849755, "loss": 3.062168836593628, "step": 7339, "token_acc": 0.29056886934528336 }, { "epoch": 4.302550571679859, "grad_norm": 0.18462095112039786, "learning_rate": 0.0004636453465179663, "loss": 3.032287120819092, "step": 7340, "token_acc": 0.2946809536593694 }, { "epoch": 4.303136909997068, "grad_norm": 0.1982254727375965, "learning_rate": 0.0004636327623405249, "loss": 3.0676932334899902, "step": 7341, "token_acc": 0.2918679242807855 }, { "epoch": 4.303723248314277, "grad_norm": 0.19974278706109944, "learning_rate": 0.0004636201761562916, "loss": 3.004348039627075, "step": 7342, "token_acc": 0.29733898958735056 }, { "epoch": 4.304309586631486, "grad_norm": 0.2191078782981199, "learning_rate": 0.0004636075879653846, "loss": 3.075151205062866, "step": 7343, "token_acc": 0.2892454777208615 }, { "epoch": 4.3048959249486956, "grad_norm": 0.19359164903270398, "learning_rate": 0.00046359499776792216, "loss": 3.0351898670196533, "step": 7344, "token_acc": 0.2935231848825711 }, { "epoch": 4.305482263265905, "grad_norm": 0.1864356484532815, "learning_rate": 0.00046358240556402256, "loss": 3.046383857727051, "step": 7345, "token_acc": 0.29286576642219303 }, { "epoch": 4.306068601583114, "grad_norm": 0.19228299001564558, "learning_rate": 0.000463569811353804, "loss": 3.0079708099365234, "step": 7346, "token_acc": 0.30021297809144487 }, { "epoch": 4.306654939900323, "grad_norm": 0.19630039623658913, "learning_rate": 0.0004635572151373849, "loss": 3.0555596351623535, "step": 7347, "token_acc": 0.2900031040493252 }, { "epoch": 4.307241278217531, "grad_norm": 0.22452998725573733, "learning_rate": 0.00046354461691488347, "loss": 2.9841198921203613, "step": 7348, "token_acc": 0.3009481720806281 }, { "epoch": 4.30782761653474, "grad_norm": 0.29677900505742066, "learning_rate": 0.0004635320166864182, "loss": 3.0319552421569824, "step": 7349, "token_acc": 0.2944271361015214 }, { "epoch": 4.308413954851949, "grad_norm": 0.3466598447656047, "learning_rate": 0.0004635194144521073, "loss": 3.052605628967285, "step": 7350, "token_acc": 0.2914837736162137 }, { "epoch": 4.3090002931691584, "grad_norm": 0.230542325421348, "learning_rate": 0.0004635068102120693, "loss": 3.0765819549560547, "step": 7351, "token_acc": 0.2886258562680028 }, { "epoch": 4.309586631486368, "grad_norm": 0.22935980257355698, "learning_rate": 0.0004634942039664225, "loss": 3.0282375812530518, "step": 7352, "token_acc": 0.29517580751582795 }, { "epoch": 4.310172969803577, "grad_norm": 0.2844022599781738, "learning_rate": 0.00046348159571528533, "loss": 3.007565975189209, "step": 7353, "token_acc": 0.2993737269407518 }, { "epoch": 4.310759308120786, "grad_norm": 0.1703867614221709, "learning_rate": 0.00046346898545877624, "loss": 3.01338267326355, "step": 7354, "token_acc": 0.2973343916832808 }, { "epoch": 4.311345646437995, "grad_norm": 0.2621886210881118, "learning_rate": 0.00046345637319701366, "loss": 3.0332469940185547, "step": 7355, "token_acc": 0.294285767163453 }, { "epoch": 4.311931984755204, "grad_norm": 0.22373239822929716, "learning_rate": 0.00046344375893011614, "loss": 3.008401393890381, "step": 7356, "token_acc": 0.2988404858627301 }, { "epoch": 4.312518323072413, "grad_norm": 0.2872764483225045, "learning_rate": 0.0004634311426582021, "loss": 3.005619764328003, "step": 7357, "token_acc": 0.2985731498850093 }, { "epoch": 4.313104661389621, "grad_norm": 0.19039720073001115, "learning_rate": 0.00046341852438139004, "loss": 3.0463571548461914, "step": 7358, "token_acc": 0.2945624043034647 }, { "epoch": 4.3136909997068305, "grad_norm": 0.2408970839291239, "learning_rate": 0.00046340590409979857, "loss": 3.0570287704467773, "step": 7359, "token_acc": 0.28995665263136117 }, { "epoch": 4.31427733802404, "grad_norm": 0.24425054183646672, "learning_rate": 0.00046339328181354617, "loss": 3.0411548614501953, "step": 7360, "token_acc": 0.2918149558437384 }, { "epoch": 4.314863676341249, "grad_norm": 0.19640617824775172, "learning_rate": 0.00046338065752275147, "loss": 3.05983304977417, "step": 7361, "token_acc": 0.29158017677089654 }, { "epoch": 4.315450014658458, "grad_norm": 0.21490540395242239, "learning_rate": 0.000463368031227533, "loss": 3.0167531967163086, "step": 7362, "token_acc": 0.2981231046635522 }, { "epoch": 4.316036352975667, "grad_norm": 0.25830799264201687, "learning_rate": 0.00046335540292800936, "loss": 3.0580382347106934, "step": 7363, "token_acc": 0.2912083976195724 }, { "epoch": 4.316622691292876, "grad_norm": 0.21051107765455762, "learning_rate": 0.0004633427726242993, "loss": 3.011385917663574, "step": 7364, "token_acc": 0.29773547050246135 }, { "epoch": 4.317209029610085, "grad_norm": 0.23722867947038911, "learning_rate": 0.00046333014031652134, "loss": 3.0127954483032227, "step": 7365, "token_acc": 0.2972987145100692 }, { "epoch": 4.317795367927294, "grad_norm": 0.17873039234554378, "learning_rate": 0.00046331750600479415, "loss": 3.034630537033081, "step": 7366, "token_acc": 0.2970629736085707 }, { "epoch": 4.318381706244503, "grad_norm": 0.24504705505039803, "learning_rate": 0.0004633048696892365, "loss": 3.0660440921783447, "step": 7367, "token_acc": 0.29079199646035103 }, { "epoch": 4.3189680445617125, "grad_norm": 0.18589682580352346, "learning_rate": 0.00046329223136996694, "loss": 3.082886219024658, "step": 7368, "token_acc": 0.28787691125507753 }, { "epoch": 4.319554382878922, "grad_norm": 0.20957307702994904, "learning_rate": 0.00046327959104710436, "loss": 3.0548925399780273, "step": 7369, "token_acc": 0.2927629613271152 }, { "epoch": 4.32014072119613, "grad_norm": 0.18438467602863795, "learning_rate": 0.00046326694872076736, "loss": 3.0427393913269043, "step": 7370, "token_acc": 0.2932563290421583 }, { "epoch": 4.320727059513339, "grad_norm": 0.23334771636014218, "learning_rate": 0.0004632543043910748, "loss": 3.053088665008545, "step": 7371, "token_acc": 0.2922609655229314 }, { "epoch": 4.321313397830548, "grad_norm": 0.17994176066788198, "learning_rate": 0.0004632416580581454, "loss": 3.0342416763305664, "step": 7372, "token_acc": 0.2960781853565298 }, { "epoch": 4.321899736147757, "grad_norm": 0.2136935670673401, "learning_rate": 0.00046322900972209797, "loss": 3.0433990955352783, "step": 7373, "token_acc": 0.29527102516719 }, { "epoch": 4.322486074464966, "grad_norm": 0.19449680227748337, "learning_rate": 0.00046321635938305133, "loss": 2.990283489227295, "step": 7374, "token_acc": 0.30067372075721255 }, { "epoch": 4.323072412782175, "grad_norm": 0.1959037126312654, "learning_rate": 0.0004632037070411243, "loss": 3.0249247550964355, "step": 7375, "token_acc": 0.29758399767781796 }, { "epoch": 4.3236587510993845, "grad_norm": 0.18869596898559512, "learning_rate": 0.00046319105269643576, "loss": 3.0392751693725586, "step": 7376, "token_acc": 0.2941637705049543 }, { "epoch": 4.324245089416594, "grad_norm": 0.18300549749946282, "learning_rate": 0.00046317839634910454, "loss": 2.992873191833496, "step": 7377, "token_acc": 0.30137012556746307 }, { "epoch": 4.324831427733803, "grad_norm": 0.1891110642875182, "learning_rate": 0.0004631657379992495, "loss": 3.002100944519043, "step": 7378, "token_acc": 0.2999768011754071 }, { "epoch": 4.325417766051012, "grad_norm": 0.21096247846318414, "learning_rate": 0.00046315307764698964, "loss": 3.0342583656311035, "step": 7379, "token_acc": 0.2948329995477744 }, { "epoch": 4.32600410436822, "grad_norm": 0.175912505571332, "learning_rate": 0.0004631404152924439, "loss": 3.012756824493408, "step": 7380, "token_acc": 0.2983351757456652 }, { "epoch": 4.326590442685429, "grad_norm": 0.1978897737387396, "learning_rate": 0.0004631277509357311, "loss": 3.0481491088867188, "step": 7381, "token_acc": 0.29259827502623686 }, { "epoch": 4.327176781002638, "grad_norm": 0.1823141016796918, "learning_rate": 0.00046311508457697026, "loss": 3.0035839080810547, "step": 7382, "token_acc": 0.29976809754864403 }, { "epoch": 4.327763119319847, "grad_norm": 0.20311035923879286, "learning_rate": 0.00046310241621628037, "loss": 3.037753105163574, "step": 7383, "token_acc": 0.2949219308251765 }, { "epoch": 4.3283494576370565, "grad_norm": 0.1717274841777956, "learning_rate": 0.0004630897458537805, "loss": 3.05173659324646, "step": 7384, "token_acc": 0.2918921656768769 }, { "epoch": 4.328935795954266, "grad_norm": 0.20656958757902574, "learning_rate": 0.00046307707348958963, "loss": 3.0078141689300537, "step": 7385, "token_acc": 0.2998708157801897 }, { "epoch": 4.329522134271475, "grad_norm": 0.19851052682711015, "learning_rate": 0.0004630643991238267, "loss": 3.0546536445617676, "step": 7386, "token_acc": 0.29180702400142156 }, { "epoch": 4.330108472588684, "grad_norm": 0.20104521123418023, "learning_rate": 0.0004630517227566108, "loss": 3.027625560760498, "step": 7387, "token_acc": 0.2946310267609235 }, { "epoch": 4.330694810905893, "grad_norm": 0.22656090493990086, "learning_rate": 0.0004630390443880612, "loss": 2.994175434112549, "step": 7388, "token_acc": 0.30018326663425215 }, { "epoch": 4.331281149223102, "grad_norm": 0.21753634825306367, "learning_rate": 0.0004630263640182968, "loss": 3.0565760135650635, "step": 7389, "token_acc": 0.292736252673965 }, { "epoch": 4.331867487540311, "grad_norm": 0.22480229454816908, "learning_rate": 0.00046301368164743676, "loss": 3.0724668502807617, "step": 7390, "token_acc": 0.2897222995527881 }, { "epoch": 4.33245382585752, "grad_norm": 0.20059961649897526, "learning_rate": 0.0004630009972756002, "loss": 3.069899320602417, "step": 7391, "token_acc": 0.2890455707722978 }, { "epoch": 4.3330401641747285, "grad_norm": 0.21102671325181863, "learning_rate": 0.0004629883109029063, "loss": 3.0591177940368652, "step": 7392, "token_acc": 0.2909188363896243 }, { "epoch": 4.333626502491938, "grad_norm": 0.2619960559431775, "learning_rate": 0.0004629756225294742, "loss": 3.007251262664795, "step": 7393, "token_acc": 0.30001754488566584 }, { "epoch": 4.334212840809147, "grad_norm": 0.2717205633323569, "learning_rate": 0.0004629629321554232, "loss": 3.0278992652893066, "step": 7394, "token_acc": 0.2954074799093344 }, { "epoch": 4.334799179126356, "grad_norm": 0.22727725778816382, "learning_rate": 0.0004629502397808723, "loss": 3.0420618057250977, "step": 7395, "token_acc": 0.2942933704860016 }, { "epoch": 4.335385517443565, "grad_norm": 0.1982573513698482, "learning_rate": 0.0004629375454059409, "loss": 3.023836135864258, "step": 7396, "token_acc": 0.2976283561815654 }, { "epoch": 4.335971855760774, "grad_norm": 0.2042734423495468, "learning_rate": 0.0004629248490307483, "loss": 3.055373191833496, "step": 7397, "token_acc": 0.2931242015987779 }, { "epoch": 4.336558194077983, "grad_norm": 0.23162282968783332, "learning_rate": 0.00046291215065541355, "loss": 3.0937328338623047, "step": 7398, "token_acc": 0.287022644204512 }, { "epoch": 4.337144532395192, "grad_norm": 0.1975061969905795, "learning_rate": 0.0004628994502800561, "loss": 3.0817885398864746, "step": 7399, "token_acc": 0.28821430236800166 }, { "epoch": 4.337730870712401, "grad_norm": 0.19797168246825958, "learning_rate": 0.00046288674790479514, "loss": 3.0889339447021484, "step": 7400, "token_acc": 0.2879575763484893 }, { "epoch": 4.3383172090296105, "grad_norm": 0.266195047381109, "learning_rate": 0.0004628740435297501, "loss": 3.0350232124328613, "step": 7401, "token_acc": 0.2952281801399562 }, { "epoch": 4.338903547346819, "grad_norm": 0.24735116569942564, "learning_rate": 0.0004628613371550402, "loss": 3.0221309661865234, "step": 7402, "token_acc": 0.2957843290861582 }, { "epoch": 4.339489885664028, "grad_norm": 0.17618476439583544, "learning_rate": 0.000462848628780785, "loss": 3.0492477416992188, "step": 7403, "token_acc": 0.2930530102402284 }, { "epoch": 4.340076223981237, "grad_norm": 0.2135099098245777, "learning_rate": 0.00046283591840710366, "loss": 3.036322593688965, "step": 7404, "token_acc": 0.2939802869233151 }, { "epoch": 4.340662562298446, "grad_norm": 0.19537418433865283, "learning_rate": 0.00046282320603411567, "loss": 3.020264148712158, "step": 7405, "token_acc": 0.29657603222557904 }, { "epoch": 4.341248900615655, "grad_norm": 0.18830810334279963, "learning_rate": 0.0004628104916619405, "loss": 3.103588581085205, "step": 7406, "token_acc": 0.2872384531719452 }, { "epoch": 4.341835238932864, "grad_norm": 0.20806976663294371, "learning_rate": 0.00046279777529069745, "loss": 3.0320184230804443, "step": 7407, "token_acc": 0.294376162560944 }, { "epoch": 4.342421577250073, "grad_norm": 0.19028785166870088, "learning_rate": 0.0004627850569205061, "loss": 3.0100531578063965, "step": 7408, "token_acc": 0.2992730782289737 }, { "epoch": 4.3430079155672825, "grad_norm": 0.19171124445599927, "learning_rate": 0.00046277233655148587, "loss": 3.0491299629211426, "step": 7409, "token_acc": 0.29164931945556444 }, { "epoch": 4.343594253884492, "grad_norm": 0.2702892482098735, "learning_rate": 0.00046275961418375624, "loss": 3.0463979244232178, "step": 7410, "token_acc": 0.292536013175835 }, { "epoch": 4.344180592201701, "grad_norm": 0.23284447504153385, "learning_rate": 0.00046274688981743674, "loss": 2.990690231323242, "step": 7411, "token_acc": 0.30113254328458566 }, { "epoch": 4.34476693051891, "grad_norm": 0.18645159730963004, "learning_rate": 0.00046273416345264684, "loss": 3.044921398162842, "step": 7412, "token_acc": 0.2930963528927609 }, { "epoch": 4.345353268836118, "grad_norm": 0.20587907028872307, "learning_rate": 0.0004627214350895063, "loss": 3.034092903137207, "step": 7413, "token_acc": 0.29246645308651487 }, { "epoch": 4.345939607153327, "grad_norm": 0.21073741733558052, "learning_rate": 0.0004627087047281343, "loss": 3.0388731956481934, "step": 7414, "token_acc": 0.29396236386433583 }, { "epoch": 4.346525945470536, "grad_norm": 0.18052979202304362, "learning_rate": 0.0004626959723686508, "loss": 3.0058445930480957, "step": 7415, "token_acc": 0.29808360510240467 }, { "epoch": 4.347112283787745, "grad_norm": 0.17053592469449644, "learning_rate": 0.0004626832380111752, "loss": 3.02262282371521, "step": 7416, "token_acc": 0.2960323123057195 }, { "epoch": 4.3476986221049545, "grad_norm": 0.16604459574535513, "learning_rate": 0.00046267050165582716, "loss": 2.993769884109497, "step": 7417, "token_acc": 0.3013771186440678 }, { "epoch": 4.348284960422164, "grad_norm": 0.1826125976318458, "learning_rate": 0.00046265776330272636, "loss": 2.9973368644714355, "step": 7418, "token_acc": 0.3001957862917323 }, { "epoch": 4.348871298739373, "grad_norm": 0.18444829239719415, "learning_rate": 0.0004626450229519924, "loss": 3.031925916671753, "step": 7419, "token_acc": 0.2959684655987117 }, { "epoch": 4.349457637056582, "grad_norm": 0.16675363910608254, "learning_rate": 0.00046263228060374503, "loss": 3.0233850479125977, "step": 7420, "token_acc": 0.29627331979968796 }, { "epoch": 4.350043975373791, "grad_norm": 0.1659424896027177, "learning_rate": 0.0004626195362581039, "loss": 3.0460872650146484, "step": 7421, "token_acc": 0.2947687975007161 }, { "epoch": 4.350630313691, "grad_norm": 0.15873325434938698, "learning_rate": 0.0004626067899151887, "loss": 3.0167508125305176, "step": 7422, "token_acc": 0.2968006566913095 }, { "epoch": 4.351216652008208, "grad_norm": 0.1681809321458993, "learning_rate": 0.00046259404157511925, "loss": 3.0244288444519043, "step": 7423, "token_acc": 0.2971358285011422 }, { "epoch": 4.351802990325417, "grad_norm": 0.18082993369166306, "learning_rate": 0.00046258129123801525, "loss": 3.069662570953369, "step": 7424, "token_acc": 0.28891627922280066 }, { "epoch": 4.3523893286426265, "grad_norm": 0.18903947782295186, "learning_rate": 0.0004625685389039964, "loss": 3.0051651000976562, "step": 7425, "token_acc": 0.29768696913729503 }, { "epoch": 4.352975666959836, "grad_norm": 0.25331623215662735, "learning_rate": 0.0004625557845731827, "loss": 3.0654406547546387, "step": 7426, "token_acc": 0.2917292079524388 }, { "epoch": 4.353562005277045, "grad_norm": 0.3447159936341741, "learning_rate": 0.0004625430282456937, "loss": 3.027750015258789, "step": 7427, "token_acc": 0.29675155461130187 }, { "epoch": 4.354148343594254, "grad_norm": 0.2704918855141654, "learning_rate": 0.0004625302699216495, "loss": 3.0281574726104736, "step": 7428, "token_acc": 0.297872684929067 }, { "epoch": 4.354734681911463, "grad_norm": 0.18803840869986135, "learning_rate": 0.00046251750960116966, "loss": 3.0601987838745117, "step": 7429, "token_acc": 0.29000721807065216 }, { "epoch": 4.355321020228672, "grad_norm": 0.2304811079669045, "learning_rate": 0.0004625047472843742, "loss": 2.998924970626831, "step": 7430, "token_acc": 0.3007969128420545 }, { "epoch": 4.355907358545881, "grad_norm": 0.17825665131839397, "learning_rate": 0.00046249198297138304, "loss": 3.0382819175720215, "step": 7431, "token_acc": 0.2959405689358233 }, { "epoch": 4.35649369686309, "grad_norm": 0.19885623527954244, "learning_rate": 0.0004624792166623161, "loss": 3.0109152793884277, "step": 7432, "token_acc": 0.2982725635401621 }, { "epoch": 4.357080035180299, "grad_norm": 0.20673945253928655, "learning_rate": 0.0004624664483572931, "loss": 3.0245442390441895, "step": 7433, "token_acc": 0.2964558277060774 }, { "epoch": 4.3576663734975085, "grad_norm": 0.188909268992054, "learning_rate": 0.0004624536780564342, "loss": 2.9925849437713623, "step": 7434, "token_acc": 0.3013480222113429 }, { "epoch": 4.358252711814717, "grad_norm": 0.2062115653746405, "learning_rate": 0.0004624409057598593, "loss": 3.0509238243103027, "step": 7435, "token_acc": 0.29182644001066294 }, { "epoch": 4.358839050131926, "grad_norm": 0.21702497087612563, "learning_rate": 0.00046242813146768827, "loss": 3.063058376312256, "step": 7436, "token_acc": 0.29087926938250613 }, { "epoch": 4.359425388449135, "grad_norm": 0.23843733211278406, "learning_rate": 0.0004624153551800413, "loss": 3.0219860076904297, "step": 7437, "token_acc": 0.2968000831147243 }, { "epoch": 4.360011726766344, "grad_norm": 0.18915393427127203, "learning_rate": 0.0004624025768970382, "loss": 2.9877147674560547, "step": 7438, "token_acc": 0.3002915679384508 }, { "epoch": 4.360598065083553, "grad_norm": 0.2041851067242998, "learning_rate": 0.0004623897966187992, "loss": 3.0817365646362305, "step": 7439, "token_acc": 0.2885972888732372 }, { "epoch": 4.361184403400762, "grad_norm": 0.244161759423216, "learning_rate": 0.0004623770143454442, "loss": 3.002307653427124, "step": 7440, "token_acc": 0.29759288747346074 }, { "epoch": 4.361770741717971, "grad_norm": 0.20037149822585196, "learning_rate": 0.00046236423007709333, "loss": 3.0436453819274902, "step": 7441, "token_acc": 0.2933716033432103 }, { "epoch": 4.3623570800351805, "grad_norm": 0.19635094778165368, "learning_rate": 0.00046235144381386674, "loss": 3.0295345783233643, "step": 7442, "token_acc": 0.2978717274470158 }, { "epoch": 4.36294341835239, "grad_norm": 0.19633615469518834, "learning_rate": 0.0004623386555558844, "loss": 3.0125646591186523, "step": 7443, "token_acc": 0.29774658486365585 }, { "epoch": 4.363529756669599, "grad_norm": 0.17908632697662138, "learning_rate": 0.00046232586530326657, "loss": 3.0864644050598145, "step": 7444, "token_acc": 0.2889055066745159 }, { "epoch": 4.364116094986807, "grad_norm": 0.19821601778191889, "learning_rate": 0.00046231307305613336, "loss": 3.0216212272644043, "step": 7445, "token_acc": 0.2968090271913528 }, { "epoch": 4.364702433304016, "grad_norm": 0.19244222399770558, "learning_rate": 0.0004623002788146049, "loss": 3.0228304862976074, "step": 7446, "token_acc": 0.29487746145838617 }, { "epoch": 4.365288771621225, "grad_norm": 0.1850902811456137, "learning_rate": 0.0004622874825788014, "loss": 3.024705410003662, "step": 7447, "token_acc": 0.29581152526202814 }, { "epoch": 4.365875109938434, "grad_norm": 0.21606500966413003, "learning_rate": 0.00046227468434884304, "loss": 3.0333304405212402, "step": 7448, "token_acc": 0.2944822680621888 }, { "epoch": 4.366461448255643, "grad_norm": 0.2377340161700647, "learning_rate": 0.00046226188412485015, "loss": 3.014246940612793, "step": 7449, "token_acc": 0.2954652280739237 }, { "epoch": 4.3670477865728525, "grad_norm": 0.2259790114221647, "learning_rate": 0.0004622490819069428, "loss": 3.0706403255462646, "step": 7450, "token_acc": 0.28907210824767204 }, { "epoch": 4.367634124890062, "grad_norm": 0.1952300272455978, "learning_rate": 0.00046223627769524135, "loss": 3.0077767372131348, "step": 7451, "token_acc": 0.2980014872413731 }, { "epoch": 4.368220463207271, "grad_norm": 0.20735602647882279, "learning_rate": 0.0004622234714898661, "loss": 3.0574991703033447, "step": 7452, "token_acc": 0.291812599751729 }, { "epoch": 4.36880680152448, "grad_norm": 0.24411749543127234, "learning_rate": 0.0004622106632909373, "loss": 3.023402214050293, "step": 7453, "token_acc": 0.29814502087996964 }, { "epoch": 4.369393139841689, "grad_norm": 0.22436214858677092, "learning_rate": 0.0004621978530985752, "loss": 2.9573724269866943, "step": 7454, "token_acc": 0.3053596688845811 }, { "epoch": 4.369979478158898, "grad_norm": 0.17339411378137617, "learning_rate": 0.00046218504091290023, "loss": 3.0339279174804688, "step": 7455, "token_acc": 0.2939892686429952 }, { "epoch": 4.370565816476106, "grad_norm": 0.28667152284942843, "learning_rate": 0.0004621722267340328, "loss": 3.0635414123535156, "step": 7456, "token_acc": 0.2904077179884861 }, { "epoch": 4.371152154793315, "grad_norm": 0.26881350239032703, "learning_rate": 0.0004621594105620932, "loss": 3.018627882003784, "step": 7457, "token_acc": 0.2958344190894539 }, { "epoch": 4.3717384931105245, "grad_norm": 0.17669170233468612, "learning_rate": 0.0004621465923972018, "loss": 3.04150390625, "step": 7458, "token_acc": 0.29517203672832726 }, { "epoch": 4.372324831427734, "grad_norm": 0.22400859644971893, "learning_rate": 0.00046213377223947906, "loss": 3.0622315406799316, "step": 7459, "token_acc": 0.2910539455457555 }, { "epoch": 4.372911169744943, "grad_norm": 0.2302023986970875, "learning_rate": 0.0004621209500890453, "loss": 3.0642857551574707, "step": 7460, "token_acc": 0.29130354114348284 }, { "epoch": 4.373497508062152, "grad_norm": 0.20886895596562155, "learning_rate": 0.00046210812594602116, "loss": 3.043332099914551, "step": 7461, "token_acc": 0.29435710131915893 }, { "epoch": 4.374083846379361, "grad_norm": 0.17952397745575238, "learning_rate": 0.00046209529981052687, "loss": 3.0028300285339355, "step": 7462, "token_acc": 0.29915414463779044 }, { "epoch": 4.37467018469657, "grad_norm": 0.20580491871342954, "learning_rate": 0.00046208247168268314, "loss": 3.048549175262451, "step": 7463, "token_acc": 0.29231760632786635 }, { "epoch": 4.375256523013779, "grad_norm": 0.20089238126024245, "learning_rate": 0.00046206964156261034, "loss": 3.042198896408081, "step": 7464, "token_acc": 0.29449159353200244 }, { "epoch": 4.375842861330988, "grad_norm": 0.2441780075199423, "learning_rate": 0.00046205680945042907, "loss": 3.0827395915985107, "step": 7465, "token_acc": 0.28691553534548925 }, { "epoch": 4.3764291996481965, "grad_norm": 0.22016799191874425, "learning_rate": 0.00046204397534625974, "loss": 2.9968390464782715, "step": 7466, "token_acc": 0.2996711288638453 }, { "epoch": 4.377015537965406, "grad_norm": 0.20222940892897134, "learning_rate": 0.0004620311392502231, "loss": 3.054934024810791, "step": 7467, "token_acc": 0.29077670028212205 }, { "epoch": 4.377601876282615, "grad_norm": 0.2717866536463735, "learning_rate": 0.0004620183011624395, "loss": 3.0090277194976807, "step": 7468, "token_acc": 0.2979135837505828 }, { "epoch": 4.378188214599824, "grad_norm": 0.247079871140854, "learning_rate": 0.00046200546108302975, "loss": 3.018577814102173, "step": 7469, "token_acc": 0.29660258376622395 }, { "epoch": 4.378774552917033, "grad_norm": 0.22053513633342253, "learning_rate": 0.0004619926190121143, "loss": 3.0542314052581787, "step": 7470, "token_acc": 0.2919831179549325 }, { "epoch": 4.379360891234242, "grad_norm": 0.25853138927987507, "learning_rate": 0.0004619797749498139, "loss": 3.0643656253814697, "step": 7471, "token_acc": 0.29062110695853355 }, { "epoch": 4.379947229551451, "grad_norm": 0.19452490485520127, "learning_rate": 0.00046196692889624927, "loss": 3.0672216415405273, "step": 7472, "token_acc": 0.2914582418062375 }, { "epoch": 4.38053356786866, "grad_norm": 0.2704189465219825, "learning_rate": 0.0004619540808515408, "loss": 3.043999195098877, "step": 7473, "token_acc": 0.29353675406401675 }, { "epoch": 4.381119906185869, "grad_norm": 0.23342420395324937, "learning_rate": 0.0004619412308158094, "loss": 3.0202512741088867, "step": 7474, "token_acc": 0.2971954674220963 }, { "epoch": 4.3817062445030786, "grad_norm": 0.22168594754530893, "learning_rate": 0.0004619283787891758, "loss": 3.03303861618042, "step": 7475, "token_acc": 0.29470362644077797 }, { "epoch": 4.382292582820288, "grad_norm": 0.24261362989054872, "learning_rate": 0.0004619155247717606, "loss": 3.0300254821777344, "step": 7476, "token_acc": 0.29696056423361633 }, { "epoch": 4.382878921137497, "grad_norm": 0.19319289097640557, "learning_rate": 0.0004619026687636846, "loss": 3.0648913383483887, "step": 7477, "token_acc": 0.29075852580892664 }, { "epoch": 4.383465259454705, "grad_norm": 0.26043536195155187, "learning_rate": 0.0004618898107650686, "loss": 3.029869556427002, "step": 7478, "token_acc": 0.29468627576905787 }, { "epoch": 4.384051597771914, "grad_norm": 0.1862942816372656, "learning_rate": 0.0004618769507760333, "loss": 3.05059814453125, "step": 7479, "token_acc": 0.29489964446397043 }, { "epoch": 4.384637936089123, "grad_norm": 0.24064883855313304, "learning_rate": 0.00046186408879669963, "loss": 3.0370357036590576, "step": 7480, "token_acc": 0.2941743837327585 }, { "epoch": 4.385224274406332, "grad_norm": 0.1964551663450361, "learning_rate": 0.00046185122482718827, "loss": 3.0265300273895264, "step": 7481, "token_acc": 0.29669174971188395 }, { "epoch": 4.3858106127235414, "grad_norm": 0.1938626907250114, "learning_rate": 0.00046183835886762014, "loss": 3.0124495029449463, "step": 7482, "token_acc": 0.29618153533085756 }, { "epoch": 4.386396951040751, "grad_norm": 0.18284241318335548, "learning_rate": 0.00046182549091811607, "loss": 3.065272092819214, "step": 7483, "token_acc": 0.29051542173364137 }, { "epoch": 4.38698328935796, "grad_norm": 0.20076509633656564, "learning_rate": 0.000461812620978797, "loss": 3.042503833770752, "step": 7484, "token_acc": 0.29422638550650887 }, { "epoch": 4.387569627675169, "grad_norm": 0.1906941828565981, "learning_rate": 0.0004617997490497838, "loss": 3.0366454124450684, "step": 7485, "token_acc": 0.2940044810654047 }, { "epoch": 4.388155965992378, "grad_norm": 0.18685972866946116, "learning_rate": 0.0004617868751311973, "loss": 3.038287401199341, "step": 7486, "token_acc": 0.29333488056467216 }, { "epoch": 4.388742304309587, "grad_norm": 0.2183809187724572, "learning_rate": 0.0004617739992231585, "loss": 3.0009188652038574, "step": 7487, "token_acc": 0.29914816182288095 }, { "epoch": 4.389328642626795, "grad_norm": 0.17034879116128313, "learning_rate": 0.0004617611213257883, "loss": 3.029564619064331, "step": 7488, "token_acc": 0.2942860367524086 }, { "epoch": 4.389914980944004, "grad_norm": 0.1900525296614871, "learning_rate": 0.0004617482414392078, "loss": 3.0664420127868652, "step": 7489, "token_acc": 0.2895762292700927 }, { "epoch": 4.3905013192612135, "grad_norm": 0.1926831749226504, "learning_rate": 0.0004617353595635379, "loss": 3.0137276649475098, "step": 7490, "token_acc": 0.29782754920233934 }, { "epoch": 4.391087657578423, "grad_norm": 0.1909624701739409, "learning_rate": 0.00046172247569889956, "loss": 3.0185084342956543, "step": 7491, "token_acc": 0.2971992022815574 }, { "epoch": 4.391673995895632, "grad_norm": 0.15874226550319134, "learning_rate": 0.00046170958984541387, "loss": 3.038285732269287, "step": 7492, "token_acc": 0.2934004112618496 }, { "epoch": 4.392260334212841, "grad_norm": 0.2085482013322463, "learning_rate": 0.000461696702003202, "loss": 3.0913572311401367, "step": 7493, "token_acc": 0.2875825117420556 }, { "epoch": 4.39284667253005, "grad_norm": 0.28067362998581946, "learning_rate": 0.00046168381217238474, "loss": 3.044562816619873, "step": 7494, "token_acc": 0.2949803147068606 }, { "epoch": 4.393433010847259, "grad_norm": 0.3662581477477971, "learning_rate": 0.0004616709203530833, "loss": 3.055656909942627, "step": 7495, "token_acc": 0.2920879898332176 }, { "epoch": 4.394019349164468, "grad_norm": 0.24937378478764022, "learning_rate": 0.0004616580265454189, "loss": 3.0151169300079346, "step": 7496, "token_acc": 0.2976364085218582 }, { "epoch": 4.394605687481677, "grad_norm": 0.20363295757192482, "learning_rate": 0.0004616451307495124, "loss": 3.104248046875, "step": 7497, "token_acc": 0.2856949570827064 }, { "epoch": 4.395192025798886, "grad_norm": 0.21018321343781718, "learning_rate": 0.00046163223296548524, "loss": 3.020953893661499, "step": 7498, "token_acc": 0.2963507737954072 }, { "epoch": 4.395778364116095, "grad_norm": 0.17920563235808587, "learning_rate": 0.00046161933319345836, "loss": 3.02058744430542, "step": 7499, "token_acc": 0.2973521252610471 }, { "epoch": 4.396364702433304, "grad_norm": 0.22142518460442173, "learning_rate": 0.000461606431433553, "loss": 3.038877487182617, "step": 7500, "token_acc": 0.2945777851551761 }, { "epoch": 4.396951040750513, "grad_norm": 0.2019659188045671, "learning_rate": 0.00046159352768589037, "loss": 3.028102159500122, "step": 7501, "token_acc": 0.29525295177644495 }, { "epoch": 4.397537379067722, "grad_norm": 0.1917446154424647, "learning_rate": 0.0004615806219505917, "loss": 3.0374135971069336, "step": 7502, "token_acc": 0.2940361388905192 }, { "epoch": 4.398123717384931, "grad_norm": 0.19446824834025309, "learning_rate": 0.0004615677142277782, "loss": 3.0670928955078125, "step": 7503, "token_acc": 0.2897940074906367 }, { "epoch": 4.39871005570214, "grad_norm": 0.23448785003383812, "learning_rate": 0.00046155480451757103, "loss": 3.0418567657470703, "step": 7504, "token_acc": 0.29472074244014984 }, { "epoch": 4.399296394019349, "grad_norm": 0.1604750626715864, "learning_rate": 0.0004615418928200916, "loss": 3.0699028968811035, "step": 7505, "token_acc": 0.2888524976746014 }, { "epoch": 4.399882732336558, "grad_norm": 0.19260752813030613, "learning_rate": 0.00046152897913546114, "loss": 3.001244306564331, "step": 7506, "token_acc": 0.30016563190682416 }, { "epoch": 4.4004690706537675, "grad_norm": 0.1809449615076826, "learning_rate": 0.000461516063463801, "loss": 3.053567409515381, "step": 7507, "token_acc": 0.2920464276774304 }, { "epoch": 4.401055408970977, "grad_norm": 0.19050727131520465, "learning_rate": 0.00046150314580523246, "loss": 3.034363269805908, "step": 7508, "token_acc": 0.29392466780111276 }, { "epoch": 4.401641747288186, "grad_norm": 0.1681378730789792, "learning_rate": 0.0004614902261598768, "loss": 3.0069069862365723, "step": 7509, "token_acc": 0.29926307894743864 }, { "epoch": 4.402228085605394, "grad_norm": 0.17451969246407623, "learning_rate": 0.00046147730452785553, "loss": 3.067537307739258, "step": 7510, "token_acc": 0.28939893922592885 }, { "epoch": 4.402814423922603, "grad_norm": 0.17058777414392742, "learning_rate": 0.00046146438090928987, "loss": 3.073115348815918, "step": 7511, "token_acc": 0.29044533049411153 }, { "epoch": 4.403400762239812, "grad_norm": 0.19937195508768987, "learning_rate": 0.0004614514553043014, "loss": 3.08248233795166, "step": 7512, "token_acc": 0.2880395563276761 }, { "epoch": 4.403987100557021, "grad_norm": 0.19833060788006351, "learning_rate": 0.00046143852771301136, "loss": 3.064807653427124, "step": 7513, "token_acc": 0.2905400454312303 }, { "epoch": 4.40457343887423, "grad_norm": 0.2180654112977944, "learning_rate": 0.0004614255981355413, "loss": 3.053335666656494, "step": 7514, "token_acc": 0.29108790210148505 }, { "epoch": 4.4051597771914395, "grad_norm": 0.19577594974991983, "learning_rate": 0.00046141266657201267, "loss": 3.0100479125976562, "step": 7515, "token_acc": 0.2977492844995645 }, { "epoch": 4.405746115508649, "grad_norm": 0.17864755186611248, "learning_rate": 0.0004613997330225469, "loss": 2.9719505310058594, "step": 7516, "token_acc": 0.304961007482348 }, { "epoch": 4.406332453825858, "grad_norm": 0.2387633630615382, "learning_rate": 0.00046138679748726545, "loss": 3.031125545501709, "step": 7517, "token_acc": 0.29523237441073924 }, { "epoch": 4.406918792143067, "grad_norm": 0.22450527761595424, "learning_rate": 0.00046137385996628997, "loss": 3.058399200439453, "step": 7518, "token_acc": 0.2922976709762941 }, { "epoch": 4.407505130460276, "grad_norm": 0.2112571747983414, "learning_rate": 0.00046136092045974186, "loss": 3.049314260482788, "step": 7519, "token_acc": 0.2924011073802083 }, { "epoch": 4.408091468777485, "grad_norm": 0.2267099030405212, "learning_rate": 0.0004613479789677427, "loss": 3.032458782196045, "step": 7520, "token_acc": 0.29258290861922803 }, { "epoch": 4.408677807094693, "grad_norm": 0.27874556392461547, "learning_rate": 0.0004613350354904141, "loss": 3.0328762531280518, "step": 7521, "token_acc": 0.29732554985926024 }, { "epoch": 4.409264145411902, "grad_norm": 0.2134244192194194, "learning_rate": 0.00046132209002787763, "loss": 3.0926437377929688, "step": 7522, "token_acc": 0.28702195514087664 }, { "epoch": 4.4098504837291115, "grad_norm": 0.19880226126512426, "learning_rate": 0.00046130914258025486, "loss": 3.0537564754486084, "step": 7523, "token_acc": 0.2922183694028015 }, { "epoch": 4.410436822046321, "grad_norm": 0.2622580314408094, "learning_rate": 0.00046129619314766744, "loss": 3.05915904045105, "step": 7524, "token_acc": 0.2914480197384957 }, { "epoch": 4.41102316036353, "grad_norm": 0.2122384969223726, "learning_rate": 0.00046128324173023706, "loss": 3.0373120307922363, "step": 7525, "token_acc": 0.29413602391233556 }, { "epoch": 4.411609498680739, "grad_norm": 0.2127315936843975, "learning_rate": 0.00046127028832808526, "loss": 3.050812005996704, "step": 7526, "token_acc": 0.2913184541720087 }, { "epoch": 4.412195836997948, "grad_norm": 0.23332393311396762, "learning_rate": 0.0004612573329413338, "loss": 3.041382312774658, "step": 7527, "token_acc": 0.29259515607638475 }, { "epoch": 4.412782175315157, "grad_norm": 0.21529283590460704, "learning_rate": 0.0004612443755701044, "loss": 2.9850082397460938, "step": 7528, "token_acc": 0.30377380936644943 }, { "epoch": 4.413368513632366, "grad_norm": 0.2041303729429446, "learning_rate": 0.00046123141621451873, "loss": 3.0363340377807617, "step": 7529, "token_acc": 0.2955727775760036 }, { "epoch": 4.413954851949575, "grad_norm": 0.22530750428748536, "learning_rate": 0.0004612184548746986, "loss": 3.0665838718414307, "step": 7530, "token_acc": 0.2913468192706947 }, { "epoch": 4.4145411902667835, "grad_norm": 0.22001839042793542, "learning_rate": 0.00046120549155076565, "loss": 3.032942771911621, "step": 7531, "token_acc": 0.29565338036323663 }, { "epoch": 4.415127528583993, "grad_norm": 0.2583822756365117, "learning_rate": 0.0004611925262428417, "loss": 3.0183916091918945, "step": 7532, "token_acc": 0.2971070735112197 }, { "epoch": 4.415713866901202, "grad_norm": 0.21185490751462283, "learning_rate": 0.00046117955895104857, "loss": 3.003661870956421, "step": 7533, "token_acc": 0.2977303174954151 }, { "epoch": 4.416300205218411, "grad_norm": 0.2663105007930926, "learning_rate": 0.00046116658967550807, "loss": 3.045348882675171, "step": 7534, "token_acc": 0.29202843221372965 }, { "epoch": 4.41688654353562, "grad_norm": 0.26219437019213276, "learning_rate": 0.00046115361841634196, "loss": 3.0178146362304688, "step": 7535, "token_acc": 0.29475757219704823 }, { "epoch": 4.417472881852829, "grad_norm": 0.17467774884495788, "learning_rate": 0.00046114064517367216, "loss": 3.0526294708251953, "step": 7536, "token_acc": 0.29370285382363176 }, { "epoch": 4.418059220170038, "grad_norm": 0.3011361147810642, "learning_rate": 0.0004611276699476206, "loss": 3.0759549140930176, "step": 7537, "token_acc": 0.28947607608907355 }, { "epoch": 4.418645558487247, "grad_norm": 0.2462495028347038, "learning_rate": 0.000461114692738309, "loss": 3.059208869934082, "step": 7538, "token_acc": 0.29187650193292236 }, { "epoch": 4.419231896804456, "grad_norm": 0.20878714913971583, "learning_rate": 0.0004611017135458593, "loss": 3.0137548446655273, "step": 7539, "token_acc": 0.29972064094454987 }, { "epoch": 4.4198182351216655, "grad_norm": 0.2123034709715419, "learning_rate": 0.0004610887323703935, "loss": 3.0560672283172607, "step": 7540, "token_acc": 0.2914231008369039 }, { "epoch": 4.420404573438875, "grad_norm": 0.259187211035135, "learning_rate": 0.0004610757492120335, "loss": 3.0791683197021484, "step": 7541, "token_acc": 0.2891130396177751 }, { "epoch": 4.420990911756084, "grad_norm": 0.20754974764083525, "learning_rate": 0.0004610627640709013, "loss": 3.024458885192871, "step": 7542, "token_acc": 0.2980797769258972 }, { "epoch": 4.421577250073292, "grad_norm": 0.19050257960433375, "learning_rate": 0.0004610497769471188, "loss": 3.071842670440674, "step": 7543, "token_acc": 0.2898069893899546 }, { "epoch": 4.422163588390501, "grad_norm": 0.21999382050881797, "learning_rate": 0.00046103678784080806, "loss": 3.0235085487365723, "step": 7544, "token_acc": 0.29515990636592465 }, { "epoch": 4.42274992670771, "grad_norm": 0.19931819454019808, "learning_rate": 0.00046102379675209103, "loss": 3.0318026542663574, "step": 7545, "token_acc": 0.29505515501999846 }, { "epoch": 4.423336265024919, "grad_norm": 0.17324184639266896, "learning_rate": 0.00046101080368108986, "loss": 3.0414838790893555, "step": 7546, "token_acc": 0.2943136634426207 }, { "epoch": 4.423922603342128, "grad_norm": 0.19821724235811047, "learning_rate": 0.0004609978086279265, "loss": 3.085927963256836, "step": 7547, "token_acc": 0.287089475233894 }, { "epoch": 4.4245089416593375, "grad_norm": 0.18841893008344263, "learning_rate": 0.0004609848115927231, "loss": 3.0053563117980957, "step": 7548, "token_acc": 0.3006212083437214 }, { "epoch": 4.425095279976547, "grad_norm": 0.17069164278988555, "learning_rate": 0.0004609718125756016, "loss": 3.1011414527893066, "step": 7549, "token_acc": 0.28639622631360967 }, { "epoch": 4.425681618293756, "grad_norm": 0.21289496093769322, "learning_rate": 0.0004609588115766843, "loss": 3.02915096282959, "step": 7550, "token_acc": 0.2960657481418605 }, { "epoch": 4.426267956610965, "grad_norm": 0.17667380727781076, "learning_rate": 0.0004609458085960931, "loss": 3.04573917388916, "step": 7551, "token_acc": 0.2922632386194547 }, { "epoch": 4.426854294928174, "grad_norm": 0.16491452943010507, "learning_rate": 0.00046093280363395045, "loss": 3.087282180786133, "step": 7552, "token_acc": 0.2873702919574479 }, { "epoch": 4.427440633245382, "grad_norm": 0.1911721354188527, "learning_rate": 0.00046091979669037834, "loss": 3.044433355331421, "step": 7553, "token_acc": 0.29121318781494515 }, { "epoch": 4.428026971562591, "grad_norm": 0.1857855981643311, "learning_rate": 0.00046090678776549885, "loss": 3.0064706802368164, "step": 7554, "token_acc": 0.29731909704987375 }, { "epoch": 4.4286133098798, "grad_norm": 0.1893392276757729, "learning_rate": 0.00046089377685943435, "loss": 3.0068907737731934, "step": 7555, "token_acc": 0.29849096323408847 }, { "epoch": 4.4291996481970095, "grad_norm": 0.16947234966737473, "learning_rate": 0.00046088076397230696, "loss": 3.016566753387451, "step": 7556, "token_acc": 0.29645059805631696 }, { "epoch": 4.429785986514219, "grad_norm": 0.22668331160395627, "learning_rate": 0.00046086774910423893, "loss": 3.0473833084106445, "step": 7557, "token_acc": 0.2916787489125979 }, { "epoch": 4.430372324831428, "grad_norm": 0.24443664671461063, "learning_rate": 0.0004608547322553526, "loss": 3.0571484565734863, "step": 7558, "token_acc": 0.2917369952362904 }, { "epoch": 4.430958663148637, "grad_norm": 0.2040382538332167, "learning_rate": 0.0004608417134257702, "loss": 3.013051986694336, "step": 7559, "token_acc": 0.29631763241099374 }, { "epoch": 4.431545001465846, "grad_norm": 0.19142343128363662, "learning_rate": 0.000460828692615614, "loss": 3.048431873321533, "step": 7560, "token_acc": 0.2923738644678838 }, { "epoch": 4.432131339783055, "grad_norm": 0.19831279855285922, "learning_rate": 0.00046081566982500633, "loss": 3.054229974746704, "step": 7561, "token_acc": 0.2908986343048418 }, { "epoch": 4.432717678100264, "grad_norm": 0.17629069328705005, "learning_rate": 0.0004608026450540695, "loss": 3.0721988677978516, "step": 7562, "token_acc": 0.2896223219467107 }, { "epoch": 4.433304016417473, "grad_norm": 0.19204106224098036, "learning_rate": 0.0004607896183029259, "loss": 3.0516953468322754, "step": 7563, "token_acc": 0.2927323001736223 }, { "epoch": 4.4338903547346815, "grad_norm": 0.23524025998156667, "learning_rate": 0.00046077658957169787, "loss": 3.0560128688812256, "step": 7564, "token_acc": 0.29137966856815234 }, { "epoch": 4.434476693051891, "grad_norm": 0.17342587407626683, "learning_rate": 0.0004607635588605078, "loss": 3.047670364379883, "step": 7565, "token_acc": 0.29239575552412284 }, { "epoch": 4.4350630313691, "grad_norm": 0.17894744182875094, "learning_rate": 0.00046075052616947816, "loss": 3.0308995246887207, "step": 7566, "token_acc": 0.29421973313012323 }, { "epoch": 4.435649369686309, "grad_norm": 0.2087917073797313, "learning_rate": 0.0004607374914987312, "loss": 3.0656702518463135, "step": 7567, "token_acc": 0.28901734104046245 }, { "epoch": 4.436235708003518, "grad_norm": 0.20080034291228516, "learning_rate": 0.0004607244548483896, "loss": 2.9949564933776855, "step": 7568, "token_acc": 0.29910187576987646 }, { "epoch": 4.436822046320727, "grad_norm": 0.1813953204199183, "learning_rate": 0.00046071141621857565, "loss": 3.0483360290527344, "step": 7569, "token_acc": 0.2937331399368298 }, { "epoch": 4.437408384637936, "grad_norm": 0.18517493390155132, "learning_rate": 0.00046069837560941185, "loss": 3.0493578910827637, "step": 7570, "token_acc": 0.2917145141172764 }, { "epoch": 4.437994722955145, "grad_norm": 0.20185606585101415, "learning_rate": 0.00046068533302102076, "loss": 3.038222312927246, "step": 7571, "token_acc": 0.2930980591985165 }, { "epoch": 4.438581061272354, "grad_norm": 0.17781904835872908, "learning_rate": 0.0004606722884535249, "loss": 3.0672547817230225, "step": 7572, "token_acc": 0.29026904466936254 }, { "epoch": 4.4391673995895635, "grad_norm": 0.23736385628053638, "learning_rate": 0.0004606592419070468, "loss": 3.006150722503662, "step": 7573, "token_acc": 0.2981968739658768 }, { "epoch": 4.439753737906772, "grad_norm": 0.2719648388681537, "learning_rate": 0.00046064619338170886, "loss": 3.091278553009033, "step": 7574, "token_acc": 0.28724266951349076 }, { "epoch": 4.440340076223981, "grad_norm": 0.19574987469290808, "learning_rate": 0.00046063314287763394, "loss": 3.01570463180542, "step": 7575, "token_acc": 0.2976582567723025 }, { "epoch": 4.44092641454119, "grad_norm": 0.2531118030477968, "learning_rate": 0.00046062009039494437, "loss": 3.0444788932800293, "step": 7576, "token_acc": 0.29264501153916794 }, { "epoch": 4.441512752858399, "grad_norm": 0.37624361535698847, "learning_rate": 0.00046060703593376283, "loss": 3.0619711875915527, "step": 7577, "token_acc": 0.2892516944143812 }, { "epoch": 4.442099091175608, "grad_norm": 0.2257985745639285, "learning_rate": 0.0004605939794942121, "loss": 3.058312177658081, "step": 7578, "token_acc": 0.2932942178278081 }, { "epoch": 4.442685429492817, "grad_norm": 0.2547080460667835, "learning_rate": 0.0004605809210764146, "loss": 3.04949688911438, "step": 7579, "token_acc": 0.29187478851116916 }, { "epoch": 4.443271767810026, "grad_norm": 0.2788817409800309, "learning_rate": 0.0004605678606804932, "loss": 3.0276925563812256, "step": 7580, "token_acc": 0.29549438790610844 }, { "epoch": 4.4438581061272355, "grad_norm": 0.2087876851287658, "learning_rate": 0.00046055479830657043, "loss": 3.020184278488159, "step": 7581, "token_acc": 0.2978189852995701 }, { "epoch": 4.444444444444445, "grad_norm": 0.2214421005231179, "learning_rate": 0.00046054173395476905, "loss": 3.056488275527954, "step": 7582, "token_acc": 0.2918954649788942 }, { "epoch": 4.445030782761654, "grad_norm": 0.22878164383851737, "learning_rate": 0.0004605286676252118, "loss": 3.03840970993042, "step": 7583, "token_acc": 0.29531565497435125 }, { "epoch": 4.445617121078863, "grad_norm": 0.19363909319283915, "learning_rate": 0.00046051559931802147, "loss": 3.032454490661621, "step": 7584, "token_acc": 0.2954528743842106 }, { "epoch": 4.446203459396072, "grad_norm": 0.21859671217780147, "learning_rate": 0.00046050252903332073, "loss": 3.0649924278259277, "step": 7585, "token_acc": 0.2914438645078824 }, { "epoch": 4.44678979771328, "grad_norm": 0.19093612809457278, "learning_rate": 0.00046048945677123234, "loss": 3.0845353603363037, "step": 7586, "token_acc": 0.2882938099412945 }, { "epoch": 4.447376136030489, "grad_norm": 0.24384448242587697, "learning_rate": 0.0004604763825318792, "loss": 3.0703938007354736, "step": 7587, "token_acc": 0.28956492472727663 }, { "epoch": 4.447962474347698, "grad_norm": 0.17252300731934725, "learning_rate": 0.000460463306315384, "loss": 3.0647661685943604, "step": 7588, "token_acc": 0.29012570355483114 }, { "epoch": 4.4485488126649075, "grad_norm": 0.244178454899283, "learning_rate": 0.0004604502281218697, "loss": 3.079650640487671, "step": 7589, "token_acc": 0.28702297505294944 }, { "epoch": 4.449135150982117, "grad_norm": 0.1721861728363252, "learning_rate": 0.0004604371479514591, "loss": 3.0347256660461426, "step": 7590, "token_acc": 0.2953310000744657 }, { "epoch": 4.449721489299326, "grad_norm": 0.2321810275607078, "learning_rate": 0.000460424065804275, "loss": 3.104008913040161, "step": 7591, "token_acc": 0.2873571216865762 }, { "epoch": 4.450307827616535, "grad_norm": 0.17731771741092256, "learning_rate": 0.00046041098168044035, "loss": 3.0531649589538574, "step": 7592, "token_acc": 0.2932982663621811 }, { "epoch": 4.450894165933744, "grad_norm": 0.19180301755694798, "learning_rate": 0.00046039789558007817, "loss": 3.036776065826416, "step": 7593, "token_acc": 0.2963379539836992 }, { "epoch": 4.451480504250953, "grad_norm": 0.21070659281966705, "learning_rate": 0.0004603848075033111, "loss": 3.022726535797119, "step": 7594, "token_acc": 0.2981204160210995 }, { "epoch": 4.452066842568162, "grad_norm": 0.19618898554572026, "learning_rate": 0.0004603717174502624, "loss": 3.0223283767700195, "step": 7595, "token_acc": 0.29647060019363547 }, { "epoch": 4.45265318088537, "grad_norm": 0.19443355004759513, "learning_rate": 0.0004603586254210549, "loss": 3.019186496734619, "step": 7596, "token_acc": 0.29905173689663195 }, { "epoch": 4.4532395192025795, "grad_norm": 0.19261926878416546, "learning_rate": 0.0004603455314158115, "loss": 3.0328915119171143, "step": 7597, "token_acc": 0.295583789948875 }, { "epoch": 4.453825857519789, "grad_norm": 0.20681565845253028, "learning_rate": 0.00046033243543465533, "loss": 3.036186695098877, "step": 7598, "token_acc": 0.2954959399948561 }, { "epoch": 4.454412195836998, "grad_norm": 0.18015500147747537, "learning_rate": 0.00046031933747770935, "loss": 3.045621395111084, "step": 7599, "token_acc": 0.29264101024364136 }, { "epoch": 4.454998534154207, "grad_norm": 0.18553987528731267, "learning_rate": 0.00046030623754509656, "loss": 3.0554604530334473, "step": 7600, "token_acc": 0.2933688291657423 }, { "epoch": 4.455584872471416, "grad_norm": 0.1746189110956432, "learning_rate": 0.00046029313563694007, "loss": 3.0395989418029785, "step": 7601, "token_acc": 0.2941514447822652 }, { "epoch": 4.456171210788625, "grad_norm": 0.19828341803691518, "learning_rate": 0.000460280031753363, "loss": 3.022763729095459, "step": 7602, "token_acc": 0.29717530186078195 }, { "epoch": 4.456757549105834, "grad_norm": 0.19313079957981577, "learning_rate": 0.00046026692589448833, "loss": 3.0611565113067627, "step": 7603, "token_acc": 0.2916771869798886 }, { "epoch": 4.457343887423043, "grad_norm": 0.1950737882058072, "learning_rate": 0.0004602538180604392, "loss": 3.0042316913604736, "step": 7604, "token_acc": 0.29964273366370864 }, { "epoch": 4.457930225740252, "grad_norm": 0.1744623100650474, "learning_rate": 0.00046024070825133887, "loss": 3.0035977363586426, "step": 7605, "token_acc": 0.3001417397781824 }, { "epoch": 4.4585165640574616, "grad_norm": 0.17689431113635637, "learning_rate": 0.0004602275964673103, "loss": 3.072335958480835, "step": 7606, "token_acc": 0.2890095545301292 }, { "epoch": 4.45910290237467, "grad_norm": 0.17619544005229673, "learning_rate": 0.0004602144827084768, "loss": 3.0310516357421875, "step": 7607, "token_acc": 0.29582374494807856 }, { "epoch": 4.459689240691879, "grad_norm": 0.17805173348495315, "learning_rate": 0.0004602013669749615, "loss": 3.0765323638916016, "step": 7608, "token_acc": 0.28899109712472093 }, { "epoch": 4.460275579009088, "grad_norm": 0.20360165757750823, "learning_rate": 0.00046018824926688756, "loss": 3.0297372341156006, "step": 7609, "token_acc": 0.2954833952226699 }, { "epoch": 4.460861917326297, "grad_norm": 0.24057609116017406, "learning_rate": 0.0004601751295843783, "loss": 3.0555989742279053, "step": 7610, "token_acc": 0.29239412142885846 }, { "epoch": 4.461448255643506, "grad_norm": 0.20937971061391722, "learning_rate": 0.0004601620079275569, "loss": 3.031153678894043, "step": 7611, "token_acc": 0.2959155453916928 }, { "epoch": 4.462034593960715, "grad_norm": 0.17822492522673333, "learning_rate": 0.0004601488842965466, "loss": 3.020857810974121, "step": 7612, "token_acc": 0.2961217814223674 }, { "epoch": 4.4626209322779244, "grad_norm": 0.1709022609023486, "learning_rate": 0.00046013575869147073, "loss": 3.0303144454956055, "step": 7613, "token_acc": 0.29555015733099116 }, { "epoch": 4.463207270595134, "grad_norm": 0.17338764594468506, "learning_rate": 0.00046012263111245254, "loss": 2.973038911819458, "step": 7614, "token_acc": 0.30378191650688063 }, { "epoch": 4.463793608912343, "grad_norm": 0.18911517414390897, "learning_rate": 0.00046010950155961545, "loss": 3.0565481185913086, "step": 7615, "token_acc": 0.2931893580325444 }, { "epoch": 4.464379947229552, "grad_norm": 0.18840251798153082, "learning_rate": 0.0004600963700330827, "loss": 3.0413818359375, "step": 7616, "token_acc": 0.2935745746161677 }, { "epoch": 4.464966285546761, "grad_norm": 0.20457301093121866, "learning_rate": 0.00046008323653297763, "loss": 3.0494651794433594, "step": 7617, "token_acc": 0.29291603297846797 }, { "epoch": 4.465552623863969, "grad_norm": 0.18857032788092065, "learning_rate": 0.00046007010105942367, "loss": 3.018894672393799, "step": 7618, "token_acc": 0.29704119474276125 }, { "epoch": 4.466138962181178, "grad_norm": 0.24119875757845458, "learning_rate": 0.0004600569636125441, "loss": 3.0258312225341797, "step": 7619, "token_acc": 0.29576290953692613 }, { "epoch": 4.466725300498387, "grad_norm": 0.29686425326240484, "learning_rate": 0.0004600438241924625, "loss": 3.0597023963928223, "step": 7620, "token_acc": 0.2905547415686594 }, { "epoch": 4.4673116388155965, "grad_norm": 0.19145685364262147, "learning_rate": 0.00046003068279930223, "loss": 3.053844928741455, "step": 7621, "token_acc": 0.2924921915364823 }, { "epoch": 4.467897977132806, "grad_norm": 0.22489455372119785, "learning_rate": 0.0004600175394331867, "loss": 3.0231003761291504, "step": 7622, "token_acc": 0.29773761767764767 }, { "epoch": 4.468484315450015, "grad_norm": 0.2109328049697858, "learning_rate": 0.0004600043940942393, "loss": 3.0442333221435547, "step": 7623, "token_acc": 0.2921478089553154 }, { "epoch": 4.469070653767224, "grad_norm": 0.18363313063692918, "learning_rate": 0.0004599912467825837, "loss": 3.0066678524017334, "step": 7624, "token_acc": 0.29904314304504587 }, { "epoch": 4.469656992084433, "grad_norm": 0.21398662392085024, "learning_rate": 0.0004599780974983432, "loss": 3.013634204864502, "step": 7625, "token_acc": 0.2985257825183919 }, { "epoch": 4.470243330401642, "grad_norm": 0.20295223182419272, "learning_rate": 0.0004599649462416415, "loss": 3.0667214393615723, "step": 7626, "token_acc": 0.2906267159024964 }, { "epoch": 4.470829668718851, "grad_norm": 0.17860655984519236, "learning_rate": 0.0004599517930126021, "loss": 3.0143256187438965, "step": 7627, "token_acc": 0.2973822979971151 }, { "epoch": 4.47141600703606, "grad_norm": 0.2351790644713847, "learning_rate": 0.00045993863781134845, "loss": 3.0475316047668457, "step": 7628, "token_acc": 0.29309907073579444 }, { "epoch": 4.4720023453532685, "grad_norm": 0.21542947540069318, "learning_rate": 0.0004599254806380042, "loss": 3.081923484802246, "step": 7629, "token_acc": 0.2872054807792824 }, { "epoch": 4.472588683670478, "grad_norm": 0.17678907403227306, "learning_rate": 0.000459912321492693, "loss": 3.032973051071167, "step": 7630, "token_acc": 0.29336243618718255 }, { "epoch": 4.473175021987687, "grad_norm": 0.21351148519034216, "learning_rate": 0.00045989916037553837, "loss": 3.0537538528442383, "step": 7631, "token_acc": 0.29228213860393204 }, { "epoch": 4.473761360304896, "grad_norm": 0.19759561204369963, "learning_rate": 0.00045988599728666394, "loss": 2.9968748092651367, "step": 7632, "token_acc": 0.3006177058348124 }, { "epoch": 4.474347698622105, "grad_norm": 0.21578599620471187, "learning_rate": 0.00045987283222619335, "loss": 3.0576815605163574, "step": 7633, "token_acc": 0.28963389295212766 }, { "epoch": 4.474934036939314, "grad_norm": 0.20917375035422553, "learning_rate": 0.0004598596651942504, "loss": 3.026186943054199, "step": 7634, "token_acc": 0.29704727670506686 }, { "epoch": 4.475520375256523, "grad_norm": 0.21339744656140158, "learning_rate": 0.00045984649619095863, "loss": 3.0379202365875244, "step": 7635, "token_acc": 0.29579791890030094 }, { "epoch": 4.476106713573732, "grad_norm": 0.19259239672021897, "learning_rate": 0.0004598333252164418, "loss": 2.9974935054779053, "step": 7636, "token_acc": 0.29921285667431946 }, { "epoch": 4.476693051890941, "grad_norm": 0.20314865285111752, "learning_rate": 0.00045982015227082366, "loss": 3.027759075164795, "step": 7637, "token_acc": 0.29749978911420294 }, { "epoch": 4.4772793902081505, "grad_norm": 0.22442506785654343, "learning_rate": 0.0004598069773542279, "loss": 3.005478858947754, "step": 7638, "token_acc": 0.2991695758513285 }, { "epoch": 4.477865728525359, "grad_norm": 0.1886233409721311, "learning_rate": 0.0004597938004667783, "loss": 3.0778331756591797, "step": 7639, "token_acc": 0.2886811188699488 }, { "epoch": 4.478452066842568, "grad_norm": 0.1988770101538601, "learning_rate": 0.00045978062160859876, "loss": 3.0884857177734375, "step": 7640, "token_acc": 0.2876125080696363 }, { "epoch": 4.479038405159777, "grad_norm": 0.20425523007572852, "learning_rate": 0.0004597674407798128, "loss": 3.0628671646118164, "step": 7641, "token_acc": 0.2903683498210738 }, { "epoch": 4.479624743476986, "grad_norm": 0.22862713093909018, "learning_rate": 0.00045975425798054447, "loss": 3.035349130630493, "step": 7642, "token_acc": 0.2934626446250491 }, { "epoch": 4.480211081794195, "grad_norm": 0.22363805270717516, "learning_rate": 0.0004597410732109176, "loss": 3.0828819274902344, "step": 7643, "token_acc": 0.28832578394525366 }, { "epoch": 4.480797420111404, "grad_norm": 0.24389988757603698, "learning_rate": 0.0004597278864710559, "loss": 3.0328969955444336, "step": 7644, "token_acc": 0.29425555276781834 }, { "epoch": 4.481383758428613, "grad_norm": 0.27192738494728863, "learning_rate": 0.0004597146977610833, "loss": 3.0757884979248047, "step": 7645, "token_acc": 0.2891024920968596 }, { "epoch": 4.4819700967458225, "grad_norm": 0.20843095547578383, "learning_rate": 0.00045970150708112375, "loss": 3.072634696960449, "step": 7646, "token_acc": 0.28930052998720013 }, { "epoch": 4.482556435063032, "grad_norm": 0.1985178151505475, "learning_rate": 0.0004596883144313011, "loss": 3.085329532623291, "step": 7647, "token_acc": 0.28829426567089306 }, { "epoch": 4.483142773380241, "grad_norm": 0.2955113249349143, "learning_rate": 0.00045967511981173924, "loss": 2.996570587158203, "step": 7648, "token_acc": 0.29940599140419527 }, { "epoch": 4.48372911169745, "grad_norm": 0.2081176403428122, "learning_rate": 0.0004596619232225623, "loss": 3.046630382537842, "step": 7649, "token_acc": 0.29375353230885165 }, { "epoch": 4.484315450014659, "grad_norm": 0.23506372420972854, "learning_rate": 0.00045964872466389405, "loss": 3.0310287475585938, "step": 7650, "token_acc": 0.2953277813254337 }, { "epoch": 4.484901788331867, "grad_norm": 0.23271746353133058, "learning_rate": 0.00045963552413585853, "loss": 3.0389418601989746, "step": 7651, "token_acc": 0.2937115522939102 }, { "epoch": 4.485488126649076, "grad_norm": 0.1761589432160985, "learning_rate": 0.00045962232163857973, "loss": 3.0496087074279785, "step": 7652, "token_acc": 0.2926761828176567 }, { "epoch": 4.486074464966285, "grad_norm": 0.18759810523165646, "learning_rate": 0.00045960911717218166, "loss": 3.0675244331359863, "step": 7653, "token_acc": 0.29119780629387215 }, { "epoch": 4.4866608032834945, "grad_norm": 0.21101509564276377, "learning_rate": 0.0004595959107367885, "loss": 3.019044876098633, "step": 7654, "token_acc": 0.29737900832333036 }, { "epoch": 4.487247141600704, "grad_norm": 0.26529732338338785, "learning_rate": 0.0004595827023325241, "loss": 3.0790176391601562, "step": 7655, "token_acc": 0.2889874257518115 }, { "epoch": 4.487833479917913, "grad_norm": 0.19246519183388464, "learning_rate": 0.00045956949195951263, "loss": 3.0009732246398926, "step": 7656, "token_acc": 0.3004856939593151 }, { "epoch": 4.488419818235122, "grad_norm": 0.2051223163386124, "learning_rate": 0.0004595562796178783, "loss": 3.0152268409729004, "step": 7657, "token_acc": 0.2972520234822931 }, { "epoch": 4.489006156552331, "grad_norm": 0.17723538594703814, "learning_rate": 0.0004595430653077449, "loss": 3.059682607650757, "step": 7658, "token_acc": 0.29077984374753724 }, { "epoch": 4.48959249486954, "grad_norm": 0.20399896300376053, "learning_rate": 0.0004595298490292369, "loss": 3.054368257522583, "step": 7659, "token_acc": 0.29176005273566247 }, { "epoch": 4.490178833186749, "grad_norm": 0.19659008619947899, "learning_rate": 0.0004595166307824783, "loss": 3.0809693336486816, "step": 7660, "token_acc": 0.28822145170191277 }, { "epoch": 4.490765171503957, "grad_norm": 0.18918408917871415, "learning_rate": 0.00045950341056759326, "loss": 3.082911252975464, "step": 7661, "token_acc": 0.2868065248640653 }, { "epoch": 4.4913515098211665, "grad_norm": 0.2054273124442771, "learning_rate": 0.000459490188384706, "loss": 3.073209047317505, "step": 7662, "token_acc": 0.28910329683153035 }, { "epoch": 4.491937848138376, "grad_norm": 0.17997147275755476, "learning_rate": 0.0004594769642339407, "loss": 3.051784038543701, "step": 7663, "token_acc": 0.29516069842519266 }, { "epoch": 4.492524186455585, "grad_norm": 0.16282336307018755, "learning_rate": 0.00045946373811542166, "loss": 3.0419459342956543, "step": 7664, "token_acc": 0.2941067324743347 }, { "epoch": 4.493110524772794, "grad_norm": 0.18096161143455447, "learning_rate": 0.00045945051002927297, "loss": 2.9990177154541016, "step": 7665, "token_acc": 0.30059398647107166 }, { "epoch": 4.493696863090003, "grad_norm": 0.17682385862585487, "learning_rate": 0.00045943727997561895, "loss": 3.0519909858703613, "step": 7666, "token_acc": 0.29240413075339133 }, { "epoch": 4.494283201407212, "grad_norm": 0.2125860237547938, "learning_rate": 0.000459424047954584, "loss": 3.041882038116455, "step": 7667, "token_acc": 0.2941605513854162 }, { "epoch": 4.494869539724421, "grad_norm": 0.1681214479200011, "learning_rate": 0.00045941081396629226, "loss": 3.003459930419922, "step": 7668, "token_acc": 0.299738332806165 }, { "epoch": 4.49545587804163, "grad_norm": 0.19237578631808083, "learning_rate": 0.0004593975780108681, "loss": 2.994502067565918, "step": 7669, "token_acc": 0.3002833843887756 }, { "epoch": 4.496042216358839, "grad_norm": 0.18309851029699306, "learning_rate": 0.0004593843400884359, "loss": 3.04885196685791, "step": 7670, "token_acc": 0.29291126664686695 }, { "epoch": 4.4966285546760485, "grad_norm": 0.1779808166741814, "learning_rate": 0.00045937110019912, "loss": 3.028949022293091, "step": 7671, "token_acc": 0.29651900170618845 }, { "epoch": 4.497214892993257, "grad_norm": 0.1745979920444208, "learning_rate": 0.00045935785834304467, "loss": 2.9968109130859375, "step": 7672, "token_acc": 0.301610053926734 }, { "epoch": 4.497801231310466, "grad_norm": 0.16727829491908888, "learning_rate": 0.0004593446145203344, "loss": 3.0861566066741943, "step": 7673, "token_acc": 0.2858729332825957 }, { "epoch": 4.498387569627675, "grad_norm": 0.1789467100488619, "learning_rate": 0.00045933136873111356, "loss": 3.048555850982666, "step": 7674, "token_acc": 0.29419937022184056 }, { "epoch": 4.498973907944884, "grad_norm": 0.22849473559913733, "learning_rate": 0.00045931812097550664, "loss": 3.0504724979400635, "step": 7675, "token_acc": 0.2921356245931341 }, { "epoch": 4.499560246262093, "grad_norm": 0.2241713022227615, "learning_rate": 0.00045930487125363803, "loss": 3.0393242835998535, "step": 7676, "token_acc": 0.29277535797188387 }, { "epoch": 4.500146584579302, "grad_norm": 0.18230175907619314, "learning_rate": 0.00045929161956563216, "loss": 2.9953837394714355, "step": 7677, "token_acc": 0.2995537676034234 }, { "epoch": 4.500732922896511, "grad_norm": 0.1820095338597325, "learning_rate": 0.00045927836591161354, "loss": 3.0582523345947266, "step": 7678, "token_acc": 0.29255348438405443 }, { "epoch": 4.5013192612137205, "grad_norm": 0.2512032333928572, "learning_rate": 0.0004592651102917067, "loss": 3.0348997116088867, "step": 7679, "token_acc": 0.29461697830886585 }, { "epoch": 4.50190559953093, "grad_norm": 0.2868468636997157, "learning_rate": 0.00045925185270603614, "loss": 3.0552401542663574, "step": 7680, "token_acc": 0.29216013366786 }, { "epoch": 4.502491937848139, "grad_norm": 0.17669534218764027, "learning_rate": 0.0004592385931547264, "loss": 3.0304698944091797, "step": 7681, "token_acc": 0.2956780923994039 }, { "epoch": 4.503078276165347, "grad_norm": 0.23152504955669662, "learning_rate": 0.00045922533163790204, "loss": 3.063931703567505, "step": 7682, "token_acc": 0.29083877535492303 }, { "epoch": 4.503664614482556, "grad_norm": 0.22386396929177713, "learning_rate": 0.0004592120681556876, "loss": 2.9869446754455566, "step": 7683, "token_acc": 0.30246536620488274 }, { "epoch": 4.504250952799765, "grad_norm": 0.20879659004422466, "learning_rate": 0.0004591988027082077, "loss": 2.991276264190674, "step": 7684, "token_acc": 0.3004150650944819 }, { "epoch": 4.504837291116974, "grad_norm": 0.27811433477695924, "learning_rate": 0.0004591855352955869, "loss": 3.015376567840576, "step": 7685, "token_acc": 0.2976625774027173 }, { "epoch": 4.505423629434183, "grad_norm": 0.1707204533158236, "learning_rate": 0.0004591722659179499, "loss": 3.071946144104004, "step": 7686, "token_acc": 0.2880634835956255 }, { "epoch": 4.5060099677513925, "grad_norm": 0.23221264426288762, "learning_rate": 0.0004591589945754214, "loss": 3.0003676414489746, "step": 7687, "token_acc": 0.3000526173878216 }, { "epoch": 4.506596306068602, "grad_norm": 0.17165889883176683, "learning_rate": 0.00045914572126812595, "loss": 3.0096435546875, "step": 7688, "token_acc": 0.29698042399158936 }, { "epoch": 4.507182644385811, "grad_norm": 0.2290507121877382, "learning_rate": 0.00045913244599618823, "loss": 3.0237956047058105, "step": 7689, "token_acc": 0.2956233299548512 }, { "epoch": 4.50776898270302, "grad_norm": 0.21658924712637703, "learning_rate": 0.00045911916875973304, "loss": 3.04410982131958, "step": 7690, "token_acc": 0.2937747922048444 }, { "epoch": 4.508355321020229, "grad_norm": 0.18584523983268905, "learning_rate": 0.00045910588955888507, "loss": 3.0015764236450195, "step": 7691, "token_acc": 0.2991690641122487 }, { "epoch": 4.508941659337438, "grad_norm": 0.25221599568930114, "learning_rate": 0.000459092608393769, "loss": 3.036126136779785, "step": 7692, "token_acc": 0.2951906540344164 }, { "epoch": 4.509527997654647, "grad_norm": 0.1751188515357791, "learning_rate": 0.00045907932526450965, "loss": 3.034043312072754, "step": 7693, "token_acc": 0.2961817361373463 }, { "epoch": 4.510114335971855, "grad_norm": 0.21835964645741596, "learning_rate": 0.0004590660401712318, "loss": 3.080922842025757, "step": 7694, "token_acc": 0.28965449242872415 }, { "epoch": 4.5107006742890645, "grad_norm": 0.19167026498992387, "learning_rate": 0.00045905275311406015, "loss": 3.056273937225342, "step": 7695, "token_acc": 0.2919000748874675 }, { "epoch": 4.511287012606274, "grad_norm": 0.19030844965852323, "learning_rate": 0.0004590394640931196, "loss": 3.0271034240722656, "step": 7696, "token_acc": 0.2959358540995205 }, { "epoch": 4.511873350923483, "grad_norm": 0.1797431704819906, "learning_rate": 0.000459026173108535, "loss": 2.9959287643432617, "step": 7697, "token_acc": 0.29931240358645234 }, { "epoch": 4.512459689240692, "grad_norm": 0.22678782099337802, "learning_rate": 0.00045901288016043115, "loss": 3.0537538528442383, "step": 7698, "token_acc": 0.29174404380043584 }, { "epoch": 4.513046027557901, "grad_norm": 0.19356081852668974, "learning_rate": 0.00045899958524893295, "loss": 3.025479316711426, "step": 7699, "token_acc": 0.2968606140137365 }, { "epoch": 4.51363236587511, "grad_norm": 0.19817106163271916, "learning_rate": 0.0004589862883741653, "loss": 3.041426658630371, "step": 7700, "token_acc": 0.2939535364765808 }, { "epoch": 4.514218704192319, "grad_norm": 0.18803742458370246, "learning_rate": 0.00045897298953625305, "loss": 3.0845577716827393, "step": 7701, "token_acc": 0.2865216267129613 }, { "epoch": 4.514805042509528, "grad_norm": 0.2127429259119938, "learning_rate": 0.0004589596887353211, "loss": 3.0653514862060547, "step": 7702, "token_acc": 0.2905331854075573 }, { "epoch": 4.515391380826737, "grad_norm": 0.2159512173558895, "learning_rate": 0.00045894638597149457, "loss": 3.0213801860809326, "step": 7703, "token_acc": 0.2979369255996644 }, { "epoch": 4.515977719143946, "grad_norm": 0.18218567402970115, "learning_rate": 0.0004589330812448983, "loss": 3.0130577087402344, "step": 7704, "token_acc": 0.29880119243093306 }, { "epoch": 4.516564057461155, "grad_norm": 0.20899489687218348, "learning_rate": 0.0004589197745556572, "loss": 3.055253744125366, "step": 7705, "token_acc": 0.2919411980290752 }, { "epoch": 4.517150395778364, "grad_norm": 0.23275977744429535, "learning_rate": 0.0004589064659038964, "loss": 3.0597341060638428, "step": 7706, "token_acc": 0.2922139739581437 }, { "epoch": 4.517736734095573, "grad_norm": 0.20660279838715576, "learning_rate": 0.00045889315528974074, "loss": 3.053131580352783, "step": 7707, "token_acc": 0.2899625260019685 }, { "epoch": 4.518323072412782, "grad_norm": 0.17891155482051155, "learning_rate": 0.0004588798427133155, "loss": 3.025653839111328, "step": 7708, "token_acc": 0.2966734433190004 }, { "epoch": 4.518909410729991, "grad_norm": 0.2163608318598857, "learning_rate": 0.0004588665281747456, "loss": 3.0432658195495605, "step": 7709, "token_acc": 0.29366915647167285 }, { "epoch": 4.5194957490472, "grad_norm": 0.198861684944232, "learning_rate": 0.0004588532116741561, "loss": 3.0471251010894775, "step": 7710, "token_acc": 0.29204545163135043 }, { "epoch": 4.520082087364409, "grad_norm": 0.16259892347042804, "learning_rate": 0.00045883989321167205, "loss": 3.0011978149414062, "step": 7711, "token_acc": 0.29965453095934097 }, { "epoch": 4.5206684256816185, "grad_norm": 0.18153699015824948, "learning_rate": 0.00045882657278741864, "loss": 3.054030418395996, "step": 7712, "token_acc": 0.29132056410465734 }, { "epoch": 4.521254763998828, "grad_norm": 0.19386590429606068, "learning_rate": 0.000458813250401521, "loss": 3.0295534133911133, "step": 7713, "token_acc": 0.29564199760856846 }, { "epoch": 4.521841102316037, "grad_norm": 0.1806088290152924, "learning_rate": 0.00045879992605410425, "loss": 3.012301445007324, "step": 7714, "token_acc": 0.29913974961042267 }, { "epoch": 4.522427440633246, "grad_norm": 0.21495100188026894, "learning_rate": 0.00045878659974529356, "loss": 3.0546364784240723, "step": 7715, "token_acc": 0.2927611468598133 }, { "epoch": 4.523013778950454, "grad_norm": 0.4024803673868194, "learning_rate": 0.0004587732714752141, "loss": 3.058104991912842, "step": 7716, "token_acc": 0.292005202436931 }, { "epoch": 4.523600117267663, "grad_norm": 0.4272667956721717, "learning_rate": 0.0004587599412439911, "loss": 3.0296711921691895, "step": 7717, "token_acc": 0.2962941331074964 }, { "epoch": 4.524186455584872, "grad_norm": 0.1810501761750138, "learning_rate": 0.00045874660905174974, "loss": 3.0296177864074707, "step": 7718, "token_acc": 0.2949934106989777 }, { "epoch": 4.524772793902081, "grad_norm": 0.22838991700802103, "learning_rate": 0.0004587332748986153, "loss": 3.0276174545288086, "step": 7719, "token_acc": 0.29666679621209174 }, { "epoch": 4.5253591322192905, "grad_norm": 0.19426745031098175, "learning_rate": 0.00045871993878471296, "loss": 3.0476722717285156, "step": 7720, "token_acc": 0.29296903460837886 }, { "epoch": 4.5259454705365, "grad_norm": 0.19812493437044693, "learning_rate": 0.0004587066007101681, "loss": 3.054412841796875, "step": 7721, "token_acc": 0.29300589286190437 }, { "epoch": 4.526531808853709, "grad_norm": 0.18855234007528757, "learning_rate": 0.000458693260675106, "loss": 3.047390937805176, "step": 7722, "token_acc": 0.29380012663571126 }, { "epoch": 4.527118147170918, "grad_norm": 0.2101971386185216, "learning_rate": 0.00045867991867965186, "loss": 3.0842652320861816, "step": 7723, "token_acc": 0.2884389018337628 }, { "epoch": 4.527704485488127, "grad_norm": 0.20930430793115856, "learning_rate": 0.00045866657472393103, "loss": 3.0205554962158203, "step": 7724, "token_acc": 0.29626444837401883 }, { "epoch": 4.528290823805335, "grad_norm": 0.23076508329338388, "learning_rate": 0.000458653228808069, "loss": 3.0232300758361816, "step": 7725, "token_acc": 0.2967079710010076 }, { "epoch": 4.528877162122544, "grad_norm": 0.20141783369370414, "learning_rate": 0.00045863988093219107, "loss": 3.0242323875427246, "step": 7726, "token_acc": 0.2937037914068461 }, { "epoch": 4.529463500439753, "grad_norm": 0.19090497395877684, "learning_rate": 0.00045862653109642255, "loss": 3.0027780532836914, "step": 7727, "token_acc": 0.2989830052092503 }, { "epoch": 4.5300498387569625, "grad_norm": 0.19197377586514258, "learning_rate": 0.000458613179300889, "loss": 3.063613176345825, "step": 7728, "token_acc": 0.29165483082930055 }, { "epoch": 4.530636177074172, "grad_norm": 0.19180830330935006, "learning_rate": 0.0004585998255457156, "loss": 3.080824851989746, "step": 7729, "token_acc": 0.2884022200524711 }, { "epoch": 4.531222515391381, "grad_norm": 0.17019802054699226, "learning_rate": 0.00045858646983102795, "loss": 3.023195743560791, "step": 7730, "token_acc": 0.29698061047212043 }, { "epoch": 4.53180885370859, "grad_norm": 0.17747700102354017, "learning_rate": 0.0004585731121569515, "loss": 3.0080957412719727, "step": 7731, "token_acc": 0.2987777416205662 }, { "epoch": 4.532395192025799, "grad_norm": 0.1908854400285193, "learning_rate": 0.0004585597525236118, "loss": 3.0752322673797607, "step": 7732, "token_acc": 0.2887484552595429 }, { "epoch": 4.532981530343008, "grad_norm": 0.19326827839285396, "learning_rate": 0.0004585463909311342, "loss": 3.0320725440979004, "step": 7733, "token_acc": 0.29640988444668703 }, { "epoch": 4.533567868660217, "grad_norm": 0.19616406765670003, "learning_rate": 0.0004585330273796443, "loss": 3.0425262451171875, "step": 7734, "token_acc": 0.2942502671566711 }, { "epoch": 4.534154206977426, "grad_norm": 0.18366474670819022, "learning_rate": 0.0004585196618692676, "loss": 3.027400016784668, "step": 7735, "token_acc": 0.2951814160594489 }, { "epoch": 4.534740545294635, "grad_norm": 0.18902503538459364, "learning_rate": 0.0004585062944001296, "loss": 3.0546491146087646, "step": 7736, "token_acc": 0.2924429304833554 }, { "epoch": 4.535326883611844, "grad_norm": 0.18439302650081577, "learning_rate": 0.00045849292497235605, "loss": 3.064779043197632, "step": 7737, "token_acc": 0.2896026755921256 }, { "epoch": 4.535913221929053, "grad_norm": 0.20476792036272337, "learning_rate": 0.00045847955358607235, "loss": 3.0330610275268555, "step": 7738, "token_acc": 0.29280855148250523 }, { "epoch": 4.536499560246262, "grad_norm": 0.21228118470811175, "learning_rate": 0.0004584661802414042, "loss": 3.046804428100586, "step": 7739, "token_acc": 0.2936720094951936 }, { "epoch": 4.537085898563471, "grad_norm": 0.18642310165554377, "learning_rate": 0.0004584528049384771, "loss": 2.9907026290893555, "step": 7740, "token_acc": 0.30123607945294695 }, { "epoch": 4.53767223688068, "grad_norm": 0.2131402338298228, "learning_rate": 0.00045843942767741686, "loss": 3.011488914489746, "step": 7741, "token_acc": 0.3002470103901196 }, { "epoch": 4.538258575197889, "grad_norm": 0.21270579458651356, "learning_rate": 0.0004584260484583491, "loss": 3.0763394832611084, "step": 7742, "token_acc": 0.2896072496639135 }, { "epoch": 4.538844913515098, "grad_norm": 0.21310108387558624, "learning_rate": 0.0004584126672813995, "loss": 2.997494697570801, "step": 7743, "token_acc": 0.29953174036063057 }, { "epoch": 4.5394312518323074, "grad_norm": 0.2654213540846141, "learning_rate": 0.00045839928414669366, "loss": 3.0808634757995605, "step": 7744, "token_acc": 0.28987784479699935 }, { "epoch": 4.540017590149517, "grad_norm": 0.24063047560261408, "learning_rate": 0.0004583858990543574, "loss": 3.0638937950134277, "step": 7745, "token_acc": 0.2895288752209705 }, { "epoch": 4.540603928466726, "grad_norm": 0.24776016780815946, "learning_rate": 0.00045837251200451633, "loss": 3.0562894344329834, "step": 7746, "token_acc": 0.29238145218925454 }, { "epoch": 4.541190266783934, "grad_norm": 0.192615369918968, "learning_rate": 0.0004583591229972964, "loss": 3.066903591156006, "step": 7747, "token_acc": 0.2900211724331319 }, { "epoch": 4.541776605101143, "grad_norm": 0.2140920060614169, "learning_rate": 0.00045834573203282333, "loss": 3.0957999229431152, "step": 7748, "token_acc": 0.28673020567719065 }, { "epoch": 4.542362943418352, "grad_norm": 0.24122298067303372, "learning_rate": 0.00045833233911122276, "loss": 3.0607690811157227, "step": 7749, "token_acc": 0.2913910250837134 }, { "epoch": 4.542949281735561, "grad_norm": 0.19856602871552073, "learning_rate": 0.0004583189442326206, "loss": 3.031538486480713, "step": 7750, "token_acc": 0.2948558429959619 }, { "epoch": 4.54353562005277, "grad_norm": 0.20121596088634475, "learning_rate": 0.0004583055473971427, "loss": 3.020907402038574, "step": 7751, "token_acc": 0.2978971514495922 }, { "epoch": 4.5441219583699795, "grad_norm": 0.21288798828894406, "learning_rate": 0.00045829214860491484, "loss": 3.051473617553711, "step": 7752, "token_acc": 0.29312433464263427 }, { "epoch": 4.544708296687189, "grad_norm": 0.1805941019037169, "learning_rate": 0.00045827874785606294, "loss": 3.0340399742126465, "step": 7753, "token_acc": 0.2950359969259319 }, { "epoch": 4.545294635004398, "grad_norm": 0.2105900560084025, "learning_rate": 0.0004582653451507129, "loss": 3.025294303894043, "step": 7754, "token_acc": 0.29581852117439444 }, { "epoch": 4.545880973321607, "grad_norm": 0.18487510097102186, "learning_rate": 0.0004582519404889906, "loss": 3.031548023223877, "step": 7755, "token_acc": 0.29606893177837046 }, { "epoch": 4.546467311638816, "grad_norm": 0.18060953686528056, "learning_rate": 0.0004582385338710218, "loss": 3.0275983810424805, "step": 7756, "token_acc": 0.2953803574661804 }, { "epoch": 4.547053649956025, "grad_norm": 0.1749221349190226, "learning_rate": 0.0004582251252969327, "loss": 3.005312442779541, "step": 7757, "token_acc": 0.2974636776017356 }, { "epoch": 4.547639988273234, "grad_norm": 0.18756230523521286, "learning_rate": 0.0004582117147668491, "loss": 3.037630319595337, "step": 7758, "token_acc": 0.2939565479814833 }, { "epoch": 4.548226326590442, "grad_norm": 0.1644114789029569, "learning_rate": 0.000458198302280897, "loss": 3.0071287155151367, "step": 7759, "token_acc": 0.2985444621332727 }, { "epoch": 4.5488126649076515, "grad_norm": 0.17893638760164182, "learning_rate": 0.00045818488783920243, "loss": 3.0003890991210938, "step": 7760, "token_acc": 0.30030078661141185 }, { "epoch": 4.549399003224861, "grad_norm": 0.18242037025939323, "learning_rate": 0.0004581714714418914, "loss": 3.0751843452453613, "step": 7761, "token_acc": 0.2889476111533524 }, { "epoch": 4.54998534154207, "grad_norm": 0.18564942110865656, "learning_rate": 0.0004581580530890899, "loss": 3.0438437461853027, "step": 7762, "token_acc": 0.2935649247511362 }, { "epoch": 4.550571679859279, "grad_norm": 0.18099165097538658, "learning_rate": 0.00045814463278092386, "loss": 3.028074264526367, "step": 7763, "token_acc": 0.29566046349257497 }, { "epoch": 4.551158018176488, "grad_norm": 0.18097236819219453, "learning_rate": 0.0004581312105175196, "loss": 3.0315089225769043, "step": 7764, "token_acc": 0.2956141311460523 }, { "epoch": 4.551744356493697, "grad_norm": 0.19254680790331796, "learning_rate": 0.0004581177862990031, "loss": 3.0414376258850098, "step": 7765, "token_acc": 0.29523516897847374 }, { "epoch": 4.552330694810906, "grad_norm": 0.1963984932501173, "learning_rate": 0.00045810436012550036, "loss": 3.065140724182129, "step": 7766, "token_acc": 0.2916476494751255 }, { "epoch": 4.552917033128115, "grad_norm": 0.20240325099651418, "learning_rate": 0.0004580909319971376, "loss": 3.0250349044799805, "step": 7767, "token_acc": 0.29663222359900554 }, { "epoch": 4.5535033714453235, "grad_norm": 0.19617551799624186, "learning_rate": 0.00045807750191404097, "loss": 3.0141658782958984, "step": 7768, "token_acc": 0.29564309365013564 }, { "epoch": 4.554089709762533, "grad_norm": 0.21570344843240088, "learning_rate": 0.00045806406987633654, "loss": 3.0346503257751465, "step": 7769, "token_acc": 0.2962813225839695 }, { "epoch": 4.554676048079742, "grad_norm": 0.20665914542151365, "learning_rate": 0.0004580506358841506, "loss": 3.0941786766052246, "step": 7770, "token_acc": 0.286630640015626 }, { "epoch": 4.555262386396951, "grad_norm": 0.21125058683192813, "learning_rate": 0.0004580371999376093, "loss": 3.0495142936706543, "step": 7771, "token_acc": 0.29260867249094397 }, { "epoch": 4.55584872471416, "grad_norm": 0.248619213700559, "learning_rate": 0.00045802376203683874, "loss": 3.054462432861328, "step": 7772, "token_acc": 0.2914053224976269 }, { "epoch": 4.556435063031369, "grad_norm": 0.21562458466427578, "learning_rate": 0.00045801032218196537, "loss": 3.0564768314361572, "step": 7773, "token_acc": 0.29120048222330325 }, { "epoch": 4.557021401348578, "grad_norm": 0.18914746405070024, "learning_rate": 0.00045799688037311524, "loss": 3.01826548576355, "step": 7774, "token_acc": 0.2969143135800292 }, { "epoch": 4.557607739665787, "grad_norm": 0.2452973916671826, "learning_rate": 0.00045798343661041473, "loss": 2.999192476272583, "step": 7775, "token_acc": 0.30062212095954516 }, { "epoch": 4.558194077982996, "grad_norm": 0.24432388075903044, "learning_rate": 0.00045796999089399004, "loss": 3.0432722568511963, "step": 7776, "token_acc": 0.29412719574861024 }, { "epoch": 4.5587804163002055, "grad_norm": 0.18643706648495512, "learning_rate": 0.00045795654322396763, "loss": 3.035109519958496, "step": 7777, "token_acc": 0.2945248053603599 }, { "epoch": 4.559366754617415, "grad_norm": 0.194972220787616, "learning_rate": 0.0004579430936004737, "loss": 3.0296120643615723, "step": 7778, "token_acc": 0.2971370332622996 }, { "epoch": 4.559953092934624, "grad_norm": 0.17548699458488332, "learning_rate": 0.0004579296420236346, "loss": 3.049912452697754, "step": 7779, "token_acc": 0.2915303972043642 }, { "epoch": 4.560539431251832, "grad_norm": 0.1820991507949024, "learning_rate": 0.00045791618849357665, "loss": 3.0085248947143555, "step": 7780, "token_acc": 0.2982165457916047 }, { "epoch": 4.561125769569041, "grad_norm": 0.21062205360248948, "learning_rate": 0.0004579027330104263, "loss": 3.0634238719940186, "step": 7781, "token_acc": 0.2902926695842451 }, { "epoch": 4.56171210788625, "grad_norm": 0.18000310628358196, "learning_rate": 0.0004578892755743099, "loss": 3.064964771270752, "step": 7782, "token_acc": 0.29101600990075593 }, { "epoch": 4.562298446203459, "grad_norm": 0.2417925866651743, "learning_rate": 0.000457875816185354, "loss": 3.0440642833709717, "step": 7783, "token_acc": 0.29445420494299124 }, { "epoch": 4.562884784520668, "grad_norm": 0.25673945686718974, "learning_rate": 0.0004578623548436849, "loss": 3.041970729827881, "step": 7784, "token_acc": 0.29396096406022343 }, { "epoch": 4.5634711228378775, "grad_norm": 0.16908148835360404, "learning_rate": 0.00045784889154942897, "loss": 3.030210494995117, "step": 7785, "token_acc": 0.2949385145234974 }, { "epoch": 4.564057461155087, "grad_norm": 0.2641465438307317, "learning_rate": 0.00045783542630271277, "loss": 3.0457029342651367, "step": 7786, "token_acc": 0.2932319521749162 }, { "epoch": 4.564643799472296, "grad_norm": 0.21116599667111, "learning_rate": 0.000457821959103663, "loss": 3.0689854621887207, "step": 7787, "token_acc": 0.28922215395771855 }, { "epoch": 4.565230137789505, "grad_norm": 0.18972275715627357, "learning_rate": 0.0004578084899524058, "loss": 3.0240399837493896, "step": 7788, "token_acc": 0.2965556411808468 }, { "epoch": 4.565816476106714, "grad_norm": 0.21030515877549327, "learning_rate": 0.0004577950188490679, "loss": 3.081120014190674, "step": 7789, "token_acc": 0.2887685960002914 }, { "epoch": 4.566402814423922, "grad_norm": 0.1599056253271638, "learning_rate": 0.0004577815457937758, "loss": 3.064271926879883, "step": 7790, "token_acc": 0.29154171175349103 }, { "epoch": 4.566989152741131, "grad_norm": 0.19393529607962012, "learning_rate": 0.00045776807078665605, "loss": 3.035736083984375, "step": 7791, "token_acc": 0.29338998507081887 }, { "epoch": 4.56757549105834, "grad_norm": 0.18845045061097335, "learning_rate": 0.00045775459382783537, "loss": 3.103361129760742, "step": 7792, "token_acc": 0.28608454461403215 }, { "epoch": 4.5681618293755495, "grad_norm": 0.18427864509328393, "learning_rate": 0.0004577411149174401, "loss": 3.039100170135498, "step": 7793, "token_acc": 0.2945561223305029 }, { "epoch": 4.568748167692759, "grad_norm": 0.1598271897619845, "learning_rate": 0.00045772763405559704, "loss": 3.0363528728485107, "step": 7794, "token_acc": 0.293373154278315 }, { "epoch": 4.569334506009968, "grad_norm": 0.2135086611404317, "learning_rate": 0.0004577141512424327, "loss": 3.0375876426696777, "step": 7795, "token_acc": 0.29509345371526674 }, { "epoch": 4.569920844327177, "grad_norm": 0.19849947670627496, "learning_rate": 0.0004577006664780739, "loss": 3.039111614227295, "step": 7796, "token_acc": 0.29426071999677494 }, { "epoch": 4.570507182644386, "grad_norm": 0.186675314575643, "learning_rate": 0.0004576871797626472, "loss": 3.03743052482605, "step": 7797, "token_acc": 0.29343637719476645 }, { "epoch": 4.571093520961595, "grad_norm": 0.1800593933108539, "learning_rate": 0.0004576736910962793, "loss": 3.0046610832214355, "step": 7798, "token_acc": 0.29926222509163614 }, { "epoch": 4.571679859278804, "grad_norm": 0.26045604531164673, "learning_rate": 0.0004576602004790969, "loss": 3.0314443111419678, "step": 7799, "token_acc": 0.29492697603476736 }, { "epoch": 4.572266197596013, "grad_norm": 0.31667420401475516, "learning_rate": 0.00045764670791122674, "loss": 3.063892126083374, "step": 7800, "token_acc": 0.29145895559472934 }, { "epoch": 4.572852535913222, "grad_norm": 0.22121253578207276, "learning_rate": 0.00045763321339279555, "loss": 3.0507192611694336, "step": 7801, "token_acc": 0.2930239820807443 }, { "epoch": 4.573438874230431, "grad_norm": 0.21146686732468206, "learning_rate": 0.00045761971692393014, "loss": 3.060736656188965, "step": 7802, "token_acc": 0.292124773579342 }, { "epoch": 4.57402521254764, "grad_norm": 0.332990626583307, "learning_rate": 0.00045760621850475725, "loss": 3.0674777030944824, "step": 7803, "token_acc": 0.2904646465655201 }, { "epoch": 4.574611550864849, "grad_norm": 0.2678546140168523, "learning_rate": 0.00045759271813540373, "loss": 3.0217394828796387, "step": 7804, "token_acc": 0.29794296532104775 }, { "epoch": 4.575197889182058, "grad_norm": 0.2344257995751777, "learning_rate": 0.0004575792158159963, "loss": 3.0525519847869873, "step": 7805, "token_acc": 0.29195585361220194 }, { "epoch": 4.575784227499267, "grad_norm": 0.2292192891417131, "learning_rate": 0.0004575657115466619, "loss": 3.0600693225860596, "step": 7806, "token_acc": 0.29203474443002037 }, { "epoch": 4.576370565816476, "grad_norm": 0.245183151581815, "learning_rate": 0.0004575522053275273, "loss": 3.0749831199645996, "step": 7807, "token_acc": 0.2887324297062794 }, { "epoch": 4.576956904133685, "grad_norm": 0.20423631980703807, "learning_rate": 0.00045753869715871944, "loss": 3.0036659240722656, "step": 7808, "token_acc": 0.30051814888235695 }, { "epoch": 4.577543242450894, "grad_norm": 0.24868295142134825, "learning_rate": 0.00045752518704036515, "loss": 3.0619301795959473, "step": 7809, "token_acc": 0.2911696776030626 }, { "epoch": 4.5781295807681035, "grad_norm": 0.18682809427540573, "learning_rate": 0.0004575116749725914, "loss": 3.019085168838501, "step": 7810, "token_acc": 0.2965460753757619 }, { "epoch": 4.578715919085313, "grad_norm": 0.25127746898004016, "learning_rate": 0.0004574981609555251, "loss": 3.0414505004882812, "step": 7811, "token_acc": 0.2945519576920842 }, { "epoch": 4.579302257402521, "grad_norm": 0.20049475699156788, "learning_rate": 0.00045748464498929323, "loss": 3.0160837173461914, "step": 7812, "token_acc": 0.29719824642338927 }, { "epoch": 4.57988859571973, "grad_norm": 0.2524545762848182, "learning_rate": 0.0004574711270740226, "loss": 3.013145923614502, "step": 7813, "token_acc": 0.29616153991319116 }, { "epoch": 4.580474934036939, "grad_norm": 0.17402863796781934, "learning_rate": 0.0004574576072098404, "loss": 3.0083446502685547, "step": 7814, "token_acc": 0.2983774796620568 }, { "epoch": 4.581061272354148, "grad_norm": 0.20853842734291966, "learning_rate": 0.00045744408539687343, "loss": 3.0648179054260254, "step": 7815, "token_acc": 0.2925405776714844 }, { "epoch": 4.581647610671357, "grad_norm": 0.15766727232862143, "learning_rate": 0.0004574305616352489, "loss": 3.049630641937256, "step": 7816, "token_acc": 0.29218349557671486 }, { "epoch": 4.582233948988566, "grad_norm": 0.22414556018201928, "learning_rate": 0.00045741703592509363, "loss": 3.0668680667877197, "step": 7817, "token_acc": 0.29150028813446954 }, { "epoch": 4.5828202873057755, "grad_norm": 0.19722662033128643, "learning_rate": 0.000457403508266535, "loss": 3.011504650115967, "step": 7818, "token_acc": 0.29678234639411827 }, { "epoch": 4.583406625622985, "grad_norm": 0.24578665216183787, "learning_rate": 0.00045738997865969977, "loss": 3.045166254043579, "step": 7819, "token_acc": 0.2946795547837487 }, { "epoch": 4.583992963940194, "grad_norm": 0.1995592644066035, "learning_rate": 0.00045737644710471513, "loss": 3.0374789237976074, "step": 7820, "token_acc": 0.29391765711098944 }, { "epoch": 4.584579302257403, "grad_norm": 0.26004609162815795, "learning_rate": 0.0004573629136017083, "loss": 3.0289974212646484, "step": 7821, "token_acc": 0.2964375229245629 }, { "epoch": 4.585165640574612, "grad_norm": 0.19574788013819414, "learning_rate": 0.00045734937815080626, "loss": 3.0600624084472656, "step": 7822, "token_acc": 0.2910896685372845 }, { "epoch": 4.585751978891821, "grad_norm": 0.19044797189742138, "learning_rate": 0.00045733584075213627, "loss": 3.0403828620910645, "step": 7823, "token_acc": 0.2958146991600987 }, { "epoch": 4.586338317209029, "grad_norm": 0.18438665072054153, "learning_rate": 0.00045732230140582534, "loss": 3.0201401710510254, "step": 7824, "token_acc": 0.2961809414091572 }, { "epoch": 4.586924655526238, "grad_norm": 0.20182846463563778, "learning_rate": 0.00045730876011200087, "loss": 3.0704402923583984, "step": 7825, "token_acc": 0.2894408525908706 }, { "epoch": 4.5875109938434475, "grad_norm": 0.16578806136324223, "learning_rate": 0.0004572952168707899, "loss": 3.0314865112304688, "step": 7826, "token_acc": 0.29559421886920406 }, { "epoch": 4.588097332160657, "grad_norm": 0.18929741670734776, "learning_rate": 0.0004572816716823197, "loss": 3.0050058364868164, "step": 7827, "token_acc": 0.2985936064453815 }, { "epoch": 4.588683670477866, "grad_norm": 0.17837248590461222, "learning_rate": 0.0004572681245467175, "loss": 3.052299737930298, "step": 7828, "token_acc": 0.2923463210084248 }, { "epoch": 4.589270008795075, "grad_norm": 0.1713927243254849, "learning_rate": 0.00045725457546411065, "loss": 3.0259742736816406, "step": 7829, "token_acc": 0.29639184159278636 }, { "epoch": 4.589856347112284, "grad_norm": 0.18679792610351442, "learning_rate": 0.00045724102443462625, "loss": 3.0785880088806152, "step": 7830, "token_acc": 0.288134484563056 }, { "epoch": 4.590442685429493, "grad_norm": 0.1927721388980211, "learning_rate": 0.00045722747145839174, "loss": 2.9990954399108887, "step": 7831, "token_acc": 0.2989585960146511 }, { "epoch": 4.591029023746702, "grad_norm": 0.21173834980583617, "learning_rate": 0.00045721391653553436, "loss": 3.060718059539795, "step": 7832, "token_acc": 0.2898463624736104 }, { "epoch": 4.59161536206391, "grad_norm": 0.22620019919834827, "learning_rate": 0.00045720035966618144, "loss": 2.9942922592163086, "step": 7833, "token_acc": 0.30149051852723086 }, { "epoch": 4.5922017003811195, "grad_norm": 0.17384321562835217, "learning_rate": 0.0004571868008504603, "loss": 3.030264377593994, "step": 7834, "token_acc": 0.2956553886948319 }, { "epoch": 4.592788038698329, "grad_norm": 0.21505182300057427, "learning_rate": 0.00045717324008849846, "loss": 3.07796049118042, "step": 7835, "token_acc": 0.28868085989160597 }, { "epoch": 4.593374377015538, "grad_norm": 0.18935980133505287, "learning_rate": 0.0004571596773804232, "loss": 3.0458874702453613, "step": 7836, "token_acc": 0.29333017950258067 }, { "epoch": 4.593960715332747, "grad_norm": 0.20403231968247543, "learning_rate": 0.0004571461127263618, "loss": 3.047337055206299, "step": 7837, "token_acc": 0.2933505487411233 }, { "epoch": 4.594547053649956, "grad_norm": 0.18278261896458015, "learning_rate": 0.0004571325461264419, "loss": 3.0264110565185547, "step": 7838, "token_acc": 0.29516309305104127 }, { "epoch": 4.595133391967165, "grad_norm": 0.24379592111093562, "learning_rate": 0.0004571189775807908, "loss": 3.0627965927124023, "step": 7839, "token_acc": 0.290358055270973 }, { "epoch": 4.595719730284374, "grad_norm": 0.19741792380508344, "learning_rate": 0.0004571054070895361, "loss": 3.013413190841675, "step": 7840, "token_acc": 0.29737402955203435 }, { "epoch": 4.596306068601583, "grad_norm": 0.21073360258642995, "learning_rate": 0.000457091834652805, "loss": 3.0569615364074707, "step": 7841, "token_acc": 0.29184358048382464 }, { "epoch": 4.596892406918792, "grad_norm": 0.2312127443162448, "learning_rate": 0.0004570782602707253, "loss": 3.041816234588623, "step": 7842, "token_acc": 0.29471457709062526 }, { "epoch": 4.5974787452360015, "grad_norm": 0.17094772193117477, "learning_rate": 0.0004570646839434244, "loss": 3.0402138233184814, "step": 7843, "token_acc": 0.29391356545670516 }, { "epoch": 4.598065083553211, "grad_norm": 0.2050003936788715, "learning_rate": 0.00045705110567102975, "loss": 3.0385661125183105, "step": 7844, "token_acc": 0.2946338829366352 }, { "epoch": 4.598651421870419, "grad_norm": 0.24621689334207333, "learning_rate": 0.000457037525453669, "loss": 3.0482773780822754, "step": 7845, "token_acc": 0.29228956973189785 }, { "epoch": 4.599237760187628, "grad_norm": 0.17848316796310376, "learning_rate": 0.00045702394329146965, "loss": 3.0513901710510254, "step": 7846, "token_acc": 0.29211475664339714 }, { "epoch": 4.599824098504837, "grad_norm": 0.22381012855851568, "learning_rate": 0.00045701035918455936, "loss": 3.023817777633667, "step": 7847, "token_acc": 0.29527217182971865 }, { "epoch": 4.600410436822046, "grad_norm": 0.20455734716908267, "learning_rate": 0.00045699677313306575, "loss": 3.097198486328125, "step": 7848, "token_acc": 0.28609896514687994 }, { "epoch": 4.600996775139255, "grad_norm": 0.18587005632627668, "learning_rate": 0.0004569831851371163, "loss": 3.006114959716797, "step": 7849, "token_acc": 0.29873031756468255 }, { "epoch": 4.601583113456464, "grad_norm": 0.22999507208152906, "learning_rate": 0.0004569695951968388, "loss": 3.0409340858459473, "step": 7850, "token_acc": 0.29468957190366246 }, { "epoch": 4.6021694517736735, "grad_norm": 0.18745321915896673, "learning_rate": 0.00045695600331236076, "loss": 3.044898271560669, "step": 7851, "token_acc": 0.2934812183314424 }, { "epoch": 4.602755790090883, "grad_norm": 0.1958885413413641, "learning_rate": 0.00045694240948381, "loss": 3.060129165649414, "step": 7852, "token_acc": 0.2913964994057395 }, { "epoch": 4.603342128408092, "grad_norm": 0.2996371068790844, "learning_rate": 0.00045692881371131415, "loss": 3.036195993423462, "step": 7853, "token_acc": 0.29451174174360245 }, { "epoch": 4.603928466725301, "grad_norm": 0.2158338103541941, "learning_rate": 0.000456915215995001, "loss": 3.0701870918273926, "step": 7854, "token_acc": 0.28948040616583 }, { "epoch": 4.604514805042509, "grad_norm": 0.22154915762470215, "learning_rate": 0.0004569016163349982, "loss": 3.071502208709717, "step": 7855, "token_acc": 0.28925082280704034 }, { "epoch": 4.605101143359718, "grad_norm": 0.2550295610898617, "learning_rate": 0.0004568880147314334, "loss": 3.0128469467163086, "step": 7856, "token_acc": 0.29724644175911147 }, { "epoch": 4.605687481676927, "grad_norm": 0.1742770690795189, "learning_rate": 0.00045687441118443455, "loss": 3.0335254669189453, "step": 7857, "token_acc": 0.2955858178335176 }, { "epoch": 4.606273819994136, "grad_norm": 0.2895920085185948, "learning_rate": 0.0004568608056941295, "loss": 3.077699661254883, "step": 7858, "token_acc": 0.2880566480708403 }, { "epoch": 4.6068601583113455, "grad_norm": 0.17093613758917756, "learning_rate": 0.00045684719826064567, "loss": 3.036289691925049, "step": 7859, "token_acc": 0.29242059297066375 }, { "epoch": 4.607446496628555, "grad_norm": 0.2611131720770259, "learning_rate": 0.00045683358888411136, "loss": 3.0289061069488525, "step": 7860, "token_acc": 0.29745033687258937 }, { "epoch": 4.608032834945764, "grad_norm": 0.18416645205876153, "learning_rate": 0.0004568199775646541, "loss": 3.063633918762207, "step": 7861, "token_acc": 0.289769503813153 }, { "epoch": 4.608619173262973, "grad_norm": 0.23524793088860999, "learning_rate": 0.00045680636430240186, "loss": 3.0569167137145996, "step": 7862, "token_acc": 0.2922040643500443 }, { "epoch": 4.609205511580182, "grad_norm": 0.17062133038262667, "learning_rate": 0.0004567927490974826, "loss": 3.033095359802246, "step": 7863, "token_acc": 0.2949894443157707 }, { "epoch": 4.609791849897391, "grad_norm": 0.24907997336352825, "learning_rate": 0.00045677913195002397, "loss": 3.0427675247192383, "step": 7864, "token_acc": 0.2944004503752712 }, { "epoch": 4.6103781882146, "grad_norm": 0.17532333886551307, "learning_rate": 0.0004567655128601541, "loss": 3.025683879852295, "step": 7865, "token_acc": 0.29527851212837997 }, { "epoch": 4.610964526531809, "grad_norm": 0.28415268709081853, "learning_rate": 0.00045675189182800086, "loss": 3.0268681049346924, "step": 7866, "token_acc": 0.2958615477553579 }, { "epoch": 4.6115508648490176, "grad_norm": 0.18329562167446117, "learning_rate": 0.0004567382688536922, "loss": 3.079500675201416, "step": 7867, "token_acc": 0.29093990192372476 }, { "epoch": 4.612137203166227, "grad_norm": 0.2929201779464907, "learning_rate": 0.000456724643937356, "loss": 3.06679630279541, "step": 7868, "token_acc": 0.2911921952480173 }, { "epoch": 4.612723541483436, "grad_norm": 0.19688207280450812, "learning_rate": 0.00045671101707912045, "loss": 3.0999252796173096, "step": 7869, "token_acc": 0.28521464085588744 }, { "epoch": 4.613309879800645, "grad_norm": 0.2433088538473592, "learning_rate": 0.00045669738827911345, "loss": 3.0678224563598633, "step": 7870, "token_acc": 0.2912764277384706 }, { "epoch": 4.613896218117854, "grad_norm": 0.17938131011122063, "learning_rate": 0.0004566837575374629, "loss": 3.015923261642456, "step": 7871, "token_acc": 0.29763721190486675 }, { "epoch": 4.614482556435063, "grad_norm": 0.355606150140134, "learning_rate": 0.00045667012485429704, "loss": 3.0801806449890137, "step": 7872, "token_acc": 0.2873492467554146 }, { "epoch": 4.615068894752272, "grad_norm": 0.1667961255781519, "learning_rate": 0.0004566564902297439, "loss": 3.0267834663391113, "step": 7873, "token_acc": 0.29539316460076687 }, { "epoch": 4.615655233069481, "grad_norm": 0.22374083841946085, "learning_rate": 0.0004566428536639314, "loss": 3.0202596187591553, "step": 7874, "token_acc": 0.2984316741672012 }, { "epoch": 4.6162415713866904, "grad_norm": 0.19967579583702366, "learning_rate": 0.00045662921515698783, "loss": 3.031914234161377, "step": 7875, "token_acc": 0.29517436801700175 }, { "epoch": 4.616827909703899, "grad_norm": 0.20627432170159724, "learning_rate": 0.00045661557470904116, "loss": 3.0710954666137695, "step": 7876, "token_acc": 0.29067888963730854 }, { "epoch": 4.617414248021108, "grad_norm": 0.21152795050297074, "learning_rate": 0.0004566019323202196, "loss": 2.979069232940674, "step": 7877, "token_acc": 0.30351272646195926 }, { "epoch": 4.618000586338317, "grad_norm": 0.17841260157409825, "learning_rate": 0.00045658828799065125, "loss": 3.0461244583129883, "step": 7878, "token_acc": 0.2916355054330527 }, { "epoch": 4.618586924655526, "grad_norm": 0.24400830015675043, "learning_rate": 0.0004565746417204644, "loss": 3.0102062225341797, "step": 7879, "token_acc": 0.2982942980082966 }, { "epoch": 4.619173262972735, "grad_norm": 0.16365542200693173, "learning_rate": 0.0004565609935097871, "loss": 3.024242401123047, "step": 7880, "token_acc": 0.29586128449685317 }, { "epoch": 4.619759601289944, "grad_norm": 0.21296580449234057, "learning_rate": 0.0004565473433587476, "loss": 3.0820741653442383, "step": 7881, "token_acc": 0.28791384786789004 }, { "epoch": 4.620345939607153, "grad_norm": 0.1872963026281644, "learning_rate": 0.0004565336912674741, "loss": 3.0502171516418457, "step": 7882, "token_acc": 0.29294386904069203 }, { "epoch": 4.6209322779243625, "grad_norm": 0.22810212910547079, "learning_rate": 0.000456520037236095, "loss": 3.0690391063690186, "step": 7883, "token_acc": 0.28983165088312374 }, { "epoch": 4.621518616241572, "grad_norm": 0.21666338495556886, "learning_rate": 0.00045650638126473834, "loss": 3.068673849105835, "step": 7884, "token_acc": 0.28956878234390915 }, { "epoch": 4.622104954558781, "grad_norm": 0.21799264536862453, "learning_rate": 0.00045649272335353253, "loss": 3.0840187072753906, "step": 7885, "token_acc": 0.28795465357232125 }, { "epoch": 4.62269129287599, "grad_norm": 0.22357206252272302, "learning_rate": 0.0004564790635026058, "loss": 3.0425257682800293, "step": 7886, "token_acc": 0.29420384456552146 }, { "epoch": 4.623277631193199, "grad_norm": 0.19480698400380914, "learning_rate": 0.00045646540171208664, "loss": 3.0728821754455566, "step": 7887, "token_acc": 0.28945330325827606 }, { "epoch": 4.623863969510407, "grad_norm": 0.22484644591645092, "learning_rate": 0.000456451737982103, "loss": 3.053065299987793, "step": 7888, "token_acc": 0.29279723024378324 }, { "epoch": 4.624450307827616, "grad_norm": 0.20016241449974334, "learning_rate": 0.0004564380723127837, "loss": 3.064450740814209, "step": 7889, "token_acc": 0.29046608921100703 }, { "epoch": 4.625036646144825, "grad_norm": 0.23135679916179752, "learning_rate": 0.00045642440470425685, "loss": 3.0510663986206055, "step": 7890, "token_acc": 0.29181653386194395 }, { "epoch": 4.6256229844620345, "grad_norm": 0.16506234400865674, "learning_rate": 0.00045641073515665075, "loss": 3.039574146270752, "step": 7891, "token_acc": 0.29419240189939333 }, { "epoch": 4.626209322779244, "grad_norm": 0.22646823073817215, "learning_rate": 0.0004563970636700941, "loss": 3.00673246383667, "step": 7892, "token_acc": 0.30064467052301497 }, { "epoch": 4.626795661096453, "grad_norm": 0.18024243954908933, "learning_rate": 0.0004563833902447151, "loss": 3.0074453353881836, "step": 7893, "token_acc": 0.2990538246841353 }, { "epoch": 4.627381999413662, "grad_norm": 0.20224201740298728, "learning_rate": 0.00045636971488064224, "loss": 3.032456159591675, "step": 7894, "token_acc": 0.29520516213580555 }, { "epoch": 4.627968337730871, "grad_norm": 0.16375196398798933, "learning_rate": 0.0004563560375780039, "loss": 3.0559322834014893, "step": 7895, "token_acc": 0.2915450771665194 }, { "epoch": 4.62855467604808, "grad_norm": 0.18210666923371072, "learning_rate": 0.00045634235833692886, "loss": 3.018622398376465, "step": 7896, "token_acc": 0.2971205811538342 }, { "epoch": 4.629141014365289, "grad_norm": 0.17284458243342243, "learning_rate": 0.0004563286771575453, "loss": 3.0370490550994873, "step": 7897, "token_acc": 0.2960802314286019 }, { "epoch": 4.629727352682497, "grad_norm": 0.16363261089068157, "learning_rate": 0.00045631499403998177, "loss": 3.050380229949951, "step": 7898, "token_acc": 0.2914851664436687 }, { "epoch": 4.6303136909997065, "grad_norm": 0.19163305783562323, "learning_rate": 0.000456301308984367, "loss": 3.060014486312866, "step": 7899, "token_acc": 0.2909861438870166 }, { "epoch": 4.630900029316916, "grad_norm": 0.1691149577828513, "learning_rate": 0.0004562876219908294, "loss": 3.047626495361328, "step": 7900, "token_acc": 0.29457769186199084 }, { "epoch": 4.631486367634125, "grad_norm": 0.17555885152623965, "learning_rate": 0.00045627393305949754, "loss": 3.066770553588867, "step": 7901, "token_acc": 0.29082377867067577 }, { "epoch": 4.632072705951334, "grad_norm": 0.191307856764747, "learning_rate": 0.00045626024219050013, "loss": 3.025543212890625, "step": 7902, "token_acc": 0.29692497704025084 }, { "epoch": 4.632659044268543, "grad_norm": 0.18382038351374802, "learning_rate": 0.0004562465493839656, "loss": 3.025158166885376, "step": 7903, "token_acc": 0.2972709424254862 }, { "epoch": 4.633245382585752, "grad_norm": 0.18080656463741146, "learning_rate": 0.00045623285464002264, "loss": 3.042851448059082, "step": 7904, "token_acc": 0.292723327997958 }, { "epoch": 4.633831720902961, "grad_norm": 0.1863702473763146, "learning_rate": 0.0004562191579587999, "loss": 3.0448098182678223, "step": 7905, "token_acc": 0.29331946394647723 }, { "epoch": 4.63441805922017, "grad_norm": 0.20571475016046592, "learning_rate": 0.0004562054593404261, "loss": 3.029006004333496, "step": 7906, "token_acc": 0.29477180002984016 }, { "epoch": 4.635004397537379, "grad_norm": 0.19023827110172287, "learning_rate": 0.0004561917587850299, "loss": 3.0246174335479736, "step": 7907, "token_acc": 0.2962522989105136 }, { "epoch": 4.6355907358545885, "grad_norm": 0.16705615844782765, "learning_rate": 0.00045617805629273996, "loss": 2.9670491218566895, "step": 7908, "token_acc": 0.3021874776605631 }, { "epoch": 4.636177074171798, "grad_norm": 0.18374161421009763, "learning_rate": 0.000456164351863685, "loss": 3.076702117919922, "step": 7909, "token_acc": 0.2900571668496867 }, { "epoch": 4.636763412489006, "grad_norm": 0.27914898465552157, "learning_rate": 0.0004561506454979937, "loss": 3.0288381576538086, "step": 7910, "token_acc": 0.2968009842304295 }, { "epoch": 4.637349750806215, "grad_norm": 0.38585079303278547, "learning_rate": 0.000456136937195795, "loss": 3.0211286544799805, "step": 7911, "token_acc": 0.2978466708190921 }, { "epoch": 4.637936089123424, "grad_norm": 0.26664361093245875, "learning_rate": 0.00045612322695721746, "loss": 3.002315044403076, "step": 7912, "token_acc": 0.2998039296837008 }, { "epoch": 4.638522427440633, "grad_norm": 0.1955027416202244, "learning_rate": 0.00045610951478239, "loss": 3.075575828552246, "step": 7913, "token_acc": 0.28842737728744305 }, { "epoch": 4.639108765757842, "grad_norm": 0.21969880187082347, "learning_rate": 0.00045609580067144137, "loss": 3.0196433067321777, "step": 7914, "token_acc": 0.29609575522014425 }, { "epoch": 4.639695104075051, "grad_norm": 0.16791252299413129, "learning_rate": 0.0004560820846245004, "loss": 3.035079002380371, "step": 7915, "token_acc": 0.29473528239610053 }, { "epoch": 4.6402814423922605, "grad_norm": 0.2018297573707955, "learning_rate": 0.0004560683666416959, "loss": 3.017077922821045, "step": 7916, "token_acc": 0.29734695092130903 }, { "epoch": 4.64086778070947, "grad_norm": 0.16422557662644138, "learning_rate": 0.00045605464672315686, "loss": 3.0430288314819336, "step": 7917, "token_acc": 0.29398592450415867 }, { "epoch": 4.641454119026679, "grad_norm": 0.2170352520711253, "learning_rate": 0.00045604092486901205, "loss": 3.0379128456115723, "step": 7918, "token_acc": 0.29385875342329465 }, { "epoch": 4.642040457343887, "grad_norm": 0.18728233539796438, "learning_rate": 0.0004560272010793904, "loss": 3.047757148742676, "step": 7919, "token_acc": 0.2899079976875165 }, { "epoch": 4.642626795661096, "grad_norm": 0.2208402318133815, "learning_rate": 0.00045601347535442077, "loss": 3.096238136291504, "step": 7920, "token_acc": 0.2852529191770701 }, { "epoch": 4.643213133978305, "grad_norm": 0.1747388048875358, "learning_rate": 0.00045599974769423217, "loss": 3.0435028076171875, "step": 7921, "token_acc": 0.2934999616571515 }, { "epoch": 4.643799472295514, "grad_norm": 0.1737587310837407, "learning_rate": 0.00045598601809895356, "loss": 3.0723254680633545, "step": 7922, "token_acc": 0.29071351738974543 }, { "epoch": 4.644385810612723, "grad_norm": 0.20380677393158111, "learning_rate": 0.00045597228656871387, "loss": 3.0363826751708984, "step": 7923, "token_acc": 0.29297960165949055 }, { "epoch": 4.6449721489299325, "grad_norm": 0.1689565559263999, "learning_rate": 0.0004559585531036421, "loss": 3.0367989540100098, "step": 7924, "token_acc": 0.29468068167877365 }, { "epoch": 4.645558487247142, "grad_norm": 0.19029537834983035, "learning_rate": 0.00045594481770386725, "loss": 3.0834813117980957, "step": 7925, "token_acc": 0.2869083526535483 }, { "epoch": 4.646144825564351, "grad_norm": 0.18508468190135086, "learning_rate": 0.00045593108036951836, "loss": 3.064253330230713, "step": 7926, "token_acc": 0.29141578644527555 }, { "epoch": 4.64673116388156, "grad_norm": 0.1897579673008172, "learning_rate": 0.00045591734110072445, "loss": 3.0779192447662354, "step": 7927, "token_acc": 0.28720325925152124 }, { "epoch": 4.647317502198769, "grad_norm": 0.17887400008863225, "learning_rate": 0.0004559035998976146, "loss": 3.045581579208374, "step": 7928, "token_acc": 0.2917418955653832 }, { "epoch": 4.647903840515978, "grad_norm": 0.17211340359954436, "learning_rate": 0.0004558898567603179, "loss": 3.0282535552978516, "step": 7929, "token_acc": 0.2951539143721084 }, { "epoch": 4.648490178833187, "grad_norm": 0.1729761969683447, "learning_rate": 0.0004558761116889634, "loss": 3.1143007278442383, "step": 7930, "token_acc": 0.28410785599183275 }, { "epoch": 4.649076517150396, "grad_norm": 0.18520001643031803, "learning_rate": 0.00045586236468368025, "loss": 3.1094765663146973, "step": 7931, "token_acc": 0.2844304967390566 }, { "epoch": 4.6496628554676045, "grad_norm": 0.231702725486281, "learning_rate": 0.0004558486157445977, "loss": 3.0570220947265625, "step": 7932, "token_acc": 0.29154021295298205 }, { "epoch": 4.650249193784814, "grad_norm": 0.2630793672819458, "learning_rate": 0.0004558348648718447, "loss": 3.0372509956359863, "step": 7933, "token_acc": 0.29441440614667697 }, { "epoch": 4.650835532102023, "grad_norm": 0.20091752943460603, "learning_rate": 0.00045582111206555044, "loss": 3.027027130126953, "step": 7934, "token_acc": 0.2954581870047044 }, { "epoch": 4.651421870419232, "grad_norm": 0.21700402526138166, "learning_rate": 0.0004558073573258443, "loss": 3.010606527328491, "step": 7935, "token_acc": 0.2982048437639171 }, { "epoch": 4.652008208736441, "grad_norm": 0.2566924474291895, "learning_rate": 0.0004557936006528553, "loss": 3.042030096054077, "step": 7936, "token_acc": 0.29335111633015637 }, { "epoch": 4.65259454705365, "grad_norm": 0.17364982250510116, "learning_rate": 0.00045577984204671275, "loss": 3.056523323059082, "step": 7937, "token_acc": 0.2916415561585002 }, { "epoch": 4.653180885370859, "grad_norm": 0.21585042843398988, "learning_rate": 0.0004557660815075459, "loss": 3.0384583473205566, "step": 7938, "token_acc": 0.29403689403689404 }, { "epoch": 4.653767223688068, "grad_norm": 0.18253060876833255, "learning_rate": 0.000455752319035484, "loss": 3.0295188426971436, "step": 7939, "token_acc": 0.29612860313975015 }, { "epoch": 4.654353562005277, "grad_norm": 0.19532156889457608, "learning_rate": 0.0004557385546306562, "loss": 3.0767831802368164, "step": 7940, "token_acc": 0.28948370242986726 }, { "epoch": 4.654939900322486, "grad_norm": 0.18711516343176132, "learning_rate": 0.000455724788293192, "loss": 3.0394368171691895, "step": 7941, "token_acc": 0.2931789336717825 }, { "epoch": 4.655526238639695, "grad_norm": 0.17877288996359258, "learning_rate": 0.00045571102002322063, "loss": 3.0784835815429688, "step": 7942, "token_acc": 0.286616542177456 }, { "epoch": 4.656112576956904, "grad_norm": 0.19452783072630556, "learning_rate": 0.0004556972498208715, "loss": 3.046689987182617, "step": 7943, "token_acc": 0.29318849163841665 }, { "epoch": 4.656698915274113, "grad_norm": 0.18372136955374005, "learning_rate": 0.00045568347768627375, "loss": 3.056165933609009, "step": 7944, "token_acc": 0.2933575753759615 }, { "epoch": 4.657285253591322, "grad_norm": 0.17967535725800876, "learning_rate": 0.00045566970361955695, "loss": 3.0811591148376465, "step": 7945, "token_acc": 0.2884608936313996 }, { "epoch": 4.657871591908531, "grad_norm": 0.18651299542920274, "learning_rate": 0.0004556559276208504, "loss": 3.041147232055664, "step": 7946, "token_acc": 0.29506214543855697 }, { "epoch": 4.65845793022574, "grad_norm": 0.19665698338050452, "learning_rate": 0.00045564214969028363, "loss": 3.0251402854919434, "step": 7947, "token_acc": 0.29764220683215314 }, { "epoch": 4.659044268542949, "grad_norm": 0.16507049214442232, "learning_rate": 0.00045562836982798597, "loss": 3.0489554405212402, "step": 7948, "token_acc": 0.2923183175742111 }, { "epoch": 4.6596306068601585, "grad_norm": 0.25184623437633835, "learning_rate": 0.0004556145880340867, "loss": 3.0518088340759277, "step": 7949, "token_acc": 0.2906663467381637 }, { "epoch": 4.660216945177368, "grad_norm": 0.24003682477099095, "learning_rate": 0.00045560080430871557, "loss": 3.0464553833007812, "step": 7950, "token_acc": 0.2935356564253511 }, { "epoch": 4.660803283494577, "grad_norm": 0.19114426666923445, "learning_rate": 0.0004555870186520019, "loss": 3.099944591522217, "step": 7951, "token_acc": 0.2836475124511327 }, { "epoch": 4.661389621811786, "grad_norm": 0.1833926153591248, "learning_rate": 0.00045557323106407523, "loss": 3.057783603668213, "step": 7952, "token_acc": 0.2921743555575375 }, { "epoch": 4.661975960128994, "grad_norm": 0.1766573805747921, "learning_rate": 0.0004555594415450651, "loss": 3.047929525375366, "step": 7953, "token_acc": 0.2924752077554809 }, { "epoch": 4.662562298446203, "grad_norm": 0.16241575140511705, "learning_rate": 0.000455545650095101, "loss": 3.0324549674987793, "step": 7954, "token_acc": 0.2939304717516735 }, { "epoch": 4.663148636763412, "grad_norm": 0.19158533723766624, "learning_rate": 0.0004555318567143124, "loss": 3.039604902267456, "step": 7955, "token_acc": 0.29435390003248857 }, { "epoch": 4.663734975080621, "grad_norm": 0.17666676190524033, "learning_rate": 0.0004555180614028291, "loss": 3.053238868713379, "step": 7956, "token_acc": 0.2924349273165928 }, { "epoch": 4.6643213133978305, "grad_norm": 0.18959895030074603, "learning_rate": 0.0004555042641607805, "loss": 3.017399549484253, "step": 7957, "token_acc": 0.29614065934065936 }, { "epoch": 4.66490765171504, "grad_norm": 0.19466719233473379, "learning_rate": 0.0004554904649882962, "loss": 3.0840866565704346, "step": 7958, "token_acc": 0.288114284529284 }, { "epoch": 4.665493990032249, "grad_norm": 0.17665803180044912, "learning_rate": 0.0004554766638855059, "loss": 3.0078773498535156, "step": 7959, "token_acc": 0.29760857307077304 }, { "epoch": 4.666080328349458, "grad_norm": 0.19179519989684782, "learning_rate": 0.0004554628608525393, "loss": 3.0187392234802246, "step": 7960, "token_acc": 0.29850843212599737 }, { "epoch": 4.666666666666667, "grad_norm": 0.20598931669329482, "learning_rate": 0.00045544905588952594, "loss": 3.0235226154327393, "step": 7961, "token_acc": 0.29678976762818426 }, { "epoch": 4.667253004983876, "grad_norm": 0.19616889802378318, "learning_rate": 0.00045543524899659555, "loss": 3.0523688793182373, "step": 7962, "token_acc": 0.29227009653112773 }, { "epoch": 4.667839343301084, "grad_norm": 0.1762435468858557, "learning_rate": 0.00045542144017387783, "loss": 3.0494980812072754, "step": 7963, "token_acc": 0.29309676021783265 }, { "epoch": 4.668425681618293, "grad_norm": 0.19397939343755546, "learning_rate": 0.0004554076294215025, "loss": 3.036929130554199, "step": 7964, "token_acc": 0.29460000895040683 }, { "epoch": 4.6690120199355025, "grad_norm": 0.19928160891966196, "learning_rate": 0.00045539381673959925, "loss": 3.026571750640869, "step": 7965, "token_acc": 0.2967803710353082 }, { "epoch": 4.669598358252712, "grad_norm": 0.20083328752098722, "learning_rate": 0.0004553800021282979, "loss": 3.075126886367798, "step": 7966, "token_acc": 0.2919400505460439 }, { "epoch": 4.670184696569921, "grad_norm": 0.19828025977232586, "learning_rate": 0.00045536618558772815, "loss": 3.0255038738250732, "step": 7967, "token_acc": 0.29656638515925354 }, { "epoch": 4.67077103488713, "grad_norm": 0.23931765458144427, "learning_rate": 0.0004553523671180198, "loss": 3.0303902626037598, "step": 7968, "token_acc": 0.2946499158335171 }, { "epoch": 4.671357373204339, "grad_norm": 0.23160710522101882, "learning_rate": 0.00045533854671930276, "loss": 3.004765510559082, "step": 7969, "token_acc": 0.2994257522698485 }, { "epoch": 4.671943711521548, "grad_norm": 0.17454445338296665, "learning_rate": 0.0004553247243917067, "loss": 2.994231939315796, "step": 7970, "token_acc": 0.30172474775924274 }, { "epoch": 4.672530049838757, "grad_norm": 0.19136040893728268, "learning_rate": 0.00045531090013536154, "loss": 3.095249652862549, "step": 7971, "token_acc": 0.2870891465540238 }, { "epoch": 4.673116388155966, "grad_norm": 0.21271905979598432, "learning_rate": 0.0004552970739503971, "loss": 3.039263963699341, "step": 7972, "token_acc": 0.2957374732423052 }, { "epoch": 4.673702726473175, "grad_norm": 0.18717369407524545, "learning_rate": 0.00045528324583694337, "loss": 3.0271553993225098, "step": 7973, "token_acc": 0.2952407367717697 }, { "epoch": 4.6742890647903845, "grad_norm": 0.23895039032356719, "learning_rate": 0.00045526941579513016, "loss": 3.087270498275757, "step": 7974, "token_acc": 0.28823357395443544 }, { "epoch": 4.674875403107593, "grad_norm": 0.22780370556280186, "learning_rate": 0.00045525558382508735, "loss": 3.078446388244629, "step": 7975, "token_acc": 0.29006199712624453 }, { "epoch": 4.675461741424802, "grad_norm": 0.17099853006182997, "learning_rate": 0.00045524174992694487, "loss": 3.058983325958252, "step": 7976, "token_acc": 0.290619735008424 }, { "epoch": 4.676048079742011, "grad_norm": 0.193058552086614, "learning_rate": 0.00045522791410083284, "loss": 3.0329530239105225, "step": 7977, "token_acc": 0.293404875463699 }, { "epoch": 4.67663441805922, "grad_norm": 0.18315649280201907, "learning_rate": 0.0004552140763468811, "loss": 3.0383377075195312, "step": 7978, "token_acc": 0.2963654922500046 }, { "epoch": 4.677220756376429, "grad_norm": 0.1951195549944361, "learning_rate": 0.0004552002366652195, "loss": 3.0981011390686035, "step": 7979, "token_acc": 0.28623494696546775 }, { "epoch": 4.677807094693638, "grad_norm": 0.23991718787673869, "learning_rate": 0.0004551863950559783, "loss": 3.0166354179382324, "step": 7980, "token_acc": 0.2987220048635354 }, { "epoch": 4.678393433010847, "grad_norm": 0.2864690018755094, "learning_rate": 0.0004551725515192874, "loss": 2.9783473014831543, "step": 7981, "token_acc": 0.3018044315424703 }, { "epoch": 4.6789797713280565, "grad_norm": 0.24496585215119268, "learning_rate": 0.00045515870605527674, "loss": 3.055659055709839, "step": 7982, "token_acc": 0.292387446515628 }, { "epoch": 4.679566109645266, "grad_norm": 0.1906869296410676, "learning_rate": 0.00045514485866407663, "loss": 3.052459239959717, "step": 7983, "token_acc": 0.2929727940133166 }, { "epoch": 4.680152447962474, "grad_norm": 0.2711644118935266, "learning_rate": 0.000455131009345817, "loss": 3.1056301593780518, "step": 7984, "token_acc": 0.2854237357952351 }, { "epoch": 4.680738786279683, "grad_norm": 0.18580643623762055, "learning_rate": 0.0004551171581006279, "loss": 3.039222240447998, "step": 7985, "token_acc": 0.29235740599593496 }, { "epoch": 4.681325124596892, "grad_norm": 0.20545877839320048, "learning_rate": 0.0004551033049286395, "loss": 3.036125421524048, "step": 7986, "token_acc": 0.2946557985131618 }, { "epoch": 4.681911462914101, "grad_norm": 0.19072946114544526, "learning_rate": 0.00045508944982998196, "loss": 3.0507709980010986, "step": 7987, "token_acc": 0.2917572596324713 }, { "epoch": 4.68249780123131, "grad_norm": 0.1800013035477047, "learning_rate": 0.0004550755928047854, "loss": 3.065943717956543, "step": 7988, "token_acc": 0.2915095113151853 }, { "epoch": 4.683084139548519, "grad_norm": 0.24032943331994272, "learning_rate": 0.0004550617338531799, "loss": 3.059300422668457, "step": 7989, "token_acc": 0.2914615165510366 }, { "epoch": 4.6836704778657285, "grad_norm": 0.17912214434369897, "learning_rate": 0.00045504787297529593, "loss": 3.000694513320923, "step": 7990, "token_acc": 0.29870037188870296 }, { "epoch": 4.684256816182938, "grad_norm": 0.23300606223592524, "learning_rate": 0.00045503401017126335, "loss": 3.005760669708252, "step": 7991, "token_acc": 0.29930079069463505 }, { "epoch": 4.684843154500147, "grad_norm": 0.18061912017548534, "learning_rate": 0.00045502014544121256, "loss": 2.9905614852905273, "step": 7992, "token_acc": 0.30144510889275383 }, { "epoch": 4.685429492817356, "grad_norm": 0.2323223308557717, "learning_rate": 0.00045500627878527377, "loss": 3.0289628505706787, "step": 7993, "token_acc": 0.2964127637119442 }, { "epoch": 4.686015831134565, "grad_norm": 0.19494810061687487, "learning_rate": 0.00045499241020357725, "loss": 3.0282387733459473, "step": 7994, "token_acc": 0.29615759826289456 }, { "epoch": 4.686602169451774, "grad_norm": 0.18641150233663467, "learning_rate": 0.00045497853969625327, "loss": 3.0627260208129883, "step": 7995, "token_acc": 0.29029246711283013 }, { "epoch": 4.687188507768982, "grad_norm": 0.2641989914205502, "learning_rate": 0.0004549646672634321, "loss": 3.0462896823883057, "step": 7996, "token_acc": 0.29244388969789425 }, { "epoch": 4.687774846086191, "grad_norm": 0.19881687849755472, "learning_rate": 0.0004549507929052441, "loss": 3.046302556991577, "step": 7997, "token_acc": 0.2915923981277607 }, { "epoch": 4.6883611844034006, "grad_norm": 0.25521549583996905, "learning_rate": 0.0004549369166218196, "loss": 3.0197601318359375, "step": 7998, "token_acc": 0.2967985360036189 }, { "epoch": 4.68894752272061, "grad_norm": 0.2189742797385495, "learning_rate": 0.00045492303841328886, "loss": 3.028839588165283, "step": 7999, "token_acc": 0.29546595540359977 }, { "epoch": 4.689533861037819, "grad_norm": 0.2298528077790898, "learning_rate": 0.0004549091582797823, "loss": 3.069355010986328, "step": 8000, "token_acc": 0.28953341740226984 }, { "epoch": 4.690120199355028, "grad_norm": 0.25191456665912076, "learning_rate": 0.00045489527622143036, "loss": 3.1318163871765137, "step": 8001, "token_acc": 0.2814052123444389 }, { "epoch": 4.690706537672237, "grad_norm": 0.17006869245891404, "learning_rate": 0.0004548813922383634, "loss": 3.0846824645996094, "step": 8002, "token_acc": 0.28713435596649717 }, { "epoch": 4.691292875989446, "grad_norm": 0.2035730204122709, "learning_rate": 0.0004548675063307118, "loss": 3.017592430114746, "step": 8003, "token_acc": 0.2971963110747034 }, { "epoch": 4.691879214306655, "grad_norm": 0.17235350820108492, "learning_rate": 0.0004548536184986061, "loss": 3.01029109954834, "step": 8004, "token_acc": 0.2991354596961694 }, { "epoch": 4.692465552623864, "grad_norm": 0.19768502274816915, "learning_rate": 0.0004548397287421767, "loss": 3.0274598598480225, "step": 8005, "token_acc": 0.2956225782804482 }, { "epoch": 4.693051890941073, "grad_norm": 0.21678663790588082, "learning_rate": 0.000454825837061554, "loss": 3.079550266265869, "step": 8006, "token_acc": 0.28935440047719024 }, { "epoch": 4.693638229258282, "grad_norm": 0.20483762918553944, "learning_rate": 0.0004548119434568686, "loss": 3.072958469390869, "step": 8007, "token_acc": 0.2895830115418826 }, { "epoch": 4.694224567575491, "grad_norm": 0.16952649683925472, "learning_rate": 0.000454798047928251, "loss": 3.0399582386016846, "step": 8008, "token_acc": 0.294707421022865 }, { "epoch": 4.6948109058927, "grad_norm": 0.17145382482469476, "learning_rate": 0.0004547841504758318, "loss": 3.0316524505615234, "step": 8009, "token_acc": 0.2953471109895691 }, { "epoch": 4.695397244209909, "grad_norm": 0.17465604321263767, "learning_rate": 0.00045477025109974127, "loss": 3.032283067703247, "step": 8010, "token_acc": 0.294787049756504 }, { "epoch": 4.695983582527118, "grad_norm": 0.1611856137648926, "learning_rate": 0.00045475634980011027, "loss": 3.094754695892334, "step": 8011, "token_acc": 0.28731652713195266 }, { "epoch": 4.696569920844327, "grad_norm": 0.18956354868649164, "learning_rate": 0.0004547424465770693, "loss": 3.093773365020752, "step": 8012, "token_acc": 0.2847676109902721 }, { "epoch": 4.697156259161536, "grad_norm": 0.21644635686943645, "learning_rate": 0.0004547285414307488, "loss": 3.0396718978881836, "step": 8013, "token_acc": 0.2941452958343018 }, { "epoch": 4.6977425974787455, "grad_norm": 0.1689955887525404, "learning_rate": 0.0004547146343612797, "loss": 3.0749409198760986, "step": 8014, "token_acc": 0.28851665599311294 }, { "epoch": 4.698328935795955, "grad_norm": 0.1843501858755421, "learning_rate": 0.00045470072536879237, "loss": 3.05033540725708, "step": 8015, "token_acc": 0.2917679487480102 }, { "epoch": 4.698915274113164, "grad_norm": 0.1833778505990386, "learning_rate": 0.00045468681445341757, "loss": 3.0323336124420166, "step": 8016, "token_acc": 0.2962058353889674 }, { "epoch": 4.699501612430373, "grad_norm": 0.1911092911292201, "learning_rate": 0.00045467290161528594, "loss": 3.057612657546997, "step": 8017, "token_acc": 0.29140716117737 }, { "epoch": 4.700087950747581, "grad_norm": 0.23715853464414993, "learning_rate": 0.00045465898685452825, "loss": 3.026954412460327, "step": 8018, "token_acc": 0.29868579232738246 }, { "epoch": 4.70067428906479, "grad_norm": 0.25408290664332184, "learning_rate": 0.0004546450701712752, "loss": 2.987659454345703, "step": 8019, "token_acc": 0.3015023240371846 }, { "epoch": 4.701260627381999, "grad_norm": 0.1938798581551351, "learning_rate": 0.0004546311515656574, "loss": 3.0442681312561035, "step": 8020, "token_acc": 0.29505844520578944 }, { "epoch": 4.701846965699208, "grad_norm": 0.2266523611047431, "learning_rate": 0.00045461723103780574, "loss": 3.0231189727783203, "step": 8021, "token_acc": 0.29540756857564693 }, { "epoch": 4.7024333040164175, "grad_norm": 0.27228014043134574, "learning_rate": 0.0004546033085878509, "loss": 3.065335273742676, "step": 8022, "token_acc": 0.29120917008794545 }, { "epoch": 4.703019642333627, "grad_norm": 0.18182965437059484, "learning_rate": 0.00045458938421592365, "loss": 3.0405826568603516, "step": 8023, "token_acc": 0.29229593009590377 }, { "epoch": 4.703605980650836, "grad_norm": 0.24000589736865774, "learning_rate": 0.0004545754579221548, "loss": 2.9833414554595947, "step": 8024, "token_acc": 0.3014126755818865 }, { "epoch": 4.704192318968045, "grad_norm": 0.2779801861967681, "learning_rate": 0.00045456152970667525, "loss": 3.055398464202881, "step": 8025, "token_acc": 0.2918309182151375 }, { "epoch": 4.704778657285254, "grad_norm": 0.18690144458034458, "learning_rate": 0.00045454759956961577, "loss": 3.090012788772583, "step": 8026, "token_acc": 0.28691562143671606 }, { "epoch": 4.705364995602462, "grad_norm": 0.24947655291435297, "learning_rate": 0.00045453366751110717, "loss": 3.0479936599731445, "step": 8027, "token_acc": 0.29308369985516297 }, { "epoch": 4.705951333919671, "grad_norm": 0.170181804790625, "learning_rate": 0.0004545197335312804, "loss": 3.0843465328216553, "step": 8028, "token_acc": 0.2863211620904557 }, { "epoch": 4.70653767223688, "grad_norm": 0.268625241087238, "learning_rate": 0.0004545057976302664, "loss": 3.07427978515625, "step": 8029, "token_acc": 0.2882187609179699 }, { "epoch": 4.7071240105540895, "grad_norm": 0.1875515664450732, "learning_rate": 0.00045449185980819595, "loss": 3.0231497287750244, "step": 8030, "token_acc": 0.2960921105176124 }, { "epoch": 4.707710348871299, "grad_norm": 0.25436079627153424, "learning_rate": 0.00045447792006520003, "loss": 3.0118207931518555, "step": 8031, "token_acc": 0.2972007257377717 }, { "epoch": 4.708296687188508, "grad_norm": 0.1882886205226228, "learning_rate": 0.0004544639784014096, "loss": 3.020613193511963, "step": 8032, "token_acc": 0.2973006173452586 }, { "epoch": 4.708883025505717, "grad_norm": 0.23872501615242225, "learning_rate": 0.0004544500348169556, "loss": 3.0313796997070312, "step": 8033, "token_acc": 0.2948937386014991 }, { "epoch": 4.709469363822926, "grad_norm": 0.18599198001046352, "learning_rate": 0.000454436089311969, "loss": 3.0512845516204834, "step": 8034, "token_acc": 0.29296380228096663 }, { "epoch": 4.710055702140135, "grad_norm": 0.22547387328639848, "learning_rate": 0.0004544221418865809, "loss": 3.032609462738037, "step": 8035, "token_acc": 0.2962689113866454 }, { "epoch": 4.710642040457344, "grad_norm": 0.17047763979688688, "learning_rate": 0.0004544081925409221, "loss": 3.025763511657715, "step": 8036, "token_acc": 0.2959154232956887 }, { "epoch": 4.711228378774553, "grad_norm": 0.21569515342633444, "learning_rate": 0.0004543942412751239, "loss": 3.12408447265625, "step": 8037, "token_acc": 0.28217110275118396 }, { "epoch": 4.711814717091762, "grad_norm": 0.2045754005974824, "learning_rate": 0.0004543802880893172, "loss": 3.060786247253418, "step": 8038, "token_acc": 0.2890259212381276 }, { "epoch": 4.7124010554089715, "grad_norm": 0.2336719882980407, "learning_rate": 0.00045436633298363306, "loss": 3.0467286109924316, "step": 8039, "token_acc": 0.2926053566490097 }, { "epoch": 4.71298739372618, "grad_norm": 0.20423093338516535, "learning_rate": 0.00045435237595820266, "loss": 3.0679893493652344, "step": 8040, "token_acc": 0.28922366008060757 }, { "epoch": 4.713573732043389, "grad_norm": 0.24072165882048077, "learning_rate": 0.000454338417013157, "loss": 3.126478672027588, "step": 8041, "token_acc": 0.2820371577237791 }, { "epoch": 4.714160070360598, "grad_norm": 0.16930982297968283, "learning_rate": 0.00045432445614862737, "loss": 2.9863779544830322, "step": 8042, "token_acc": 0.29998009335170334 }, { "epoch": 4.714746408677807, "grad_norm": 0.2538909878412488, "learning_rate": 0.0004543104933647447, "loss": 3.013861656188965, "step": 8043, "token_acc": 0.29724163913227203 }, { "epoch": 4.715332746995016, "grad_norm": 0.16732184573237052, "learning_rate": 0.00045429652866164026, "loss": 3.018298387527466, "step": 8044, "token_acc": 0.2970602479270293 }, { "epoch": 4.715919085312225, "grad_norm": 0.22296941846645618, "learning_rate": 0.0004542825620394453, "loss": 3.0446643829345703, "step": 8045, "token_acc": 0.2944485846732938 }, { "epoch": 4.716505423629434, "grad_norm": 0.1654179798220177, "learning_rate": 0.00045426859349829085, "loss": 3.099398612976074, "step": 8046, "token_acc": 0.2870337914077152 }, { "epoch": 4.7170917619466435, "grad_norm": 0.24734720906805324, "learning_rate": 0.00045425462303830835, "loss": 3.021082878112793, "step": 8047, "token_acc": 0.2953848363850146 }, { "epoch": 4.717678100263853, "grad_norm": 0.18730242600767572, "learning_rate": 0.0004542406506596288, "loss": 3.0749564170837402, "step": 8048, "token_acc": 0.2890789883176898 }, { "epoch": 4.718264438581061, "grad_norm": 0.2050954047392026, "learning_rate": 0.0004542266763623836, "loss": 3.036804437637329, "step": 8049, "token_acc": 0.2942367114910477 }, { "epoch": 4.71885077689827, "grad_norm": 0.16926769691804688, "learning_rate": 0.0004542127001467039, "loss": 3.0559167861938477, "step": 8050, "token_acc": 0.29131803059690947 }, { "epoch": 4.719437115215479, "grad_norm": 0.20232710448139002, "learning_rate": 0.0004541987220127211, "loss": 3.0608155727386475, "step": 8051, "token_acc": 0.29205413052070983 }, { "epoch": 4.720023453532688, "grad_norm": 0.1997354098802789, "learning_rate": 0.00045418474196056645, "loss": 3.0422816276550293, "step": 8052, "token_acc": 0.29329789753440516 }, { "epoch": 4.720609791849897, "grad_norm": 0.18972683442226776, "learning_rate": 0.00045417075999037136, "loss": 3.064742088317871, "step": 8053, "token_acc": 0.290101933739221 }, { "epoch": 4.721196130167106, "grad_norm": 0.17770215793419686, "learning_rate": 0.0004541567761022671, "loss": 3.043896198272705, "step": 8054, "token_acc": 0.2926433882451854 }, { "epoch": 4.7217824684843155, "grad_norm": 0.17708788766762143, "learning_rate": 0.00045414279029638496, "loss": 3.012117862701416, "step": 8055, "token_acc": 0.2992648970619519 }, { "epoch": 4.722368806801525, "grad_norm": 0.20225351446492323, "learning_rate": 0.0004541288025728564, "loss": 3.025546073913574, "step": 8056, "token_acc": 0.2972456649172472 }, { "epoch": 4.722955145118734, "grad_norm": 0.17179949720160054, "learning_rate": 0.0004541148129318129, "loss": 3.007215976715088, "step": 8057, "token_acc": 0.29916697228480116 }, { "epoch": 4.723541483435943, "grad_norm": 0.17614345092399, "learning_rate": 0.00045410082137338574, "loss": 3.034902811050415, "step": 8058, "token_acc": 0.2942907337571108 }, { "epoch": 4.724127821753152, "grad_norm": 0.19580686581221662, "learning_rate": 0.0004540868278977063, "loss": 3.0407934188842773, "step": 8059, "token_acc": 0.29171732354826585 }, { "epoch": 4.724714160070361, "grad_norm": 0.2228798767607216, "learning_rate": 0.00045407283250490624, "loss": 3.0703625679016113, "step": 8060, "token_acc": 0.29044849586932325 }, { "epoch": 4.725300498387569, "grad_norm": 0.1840369219953886, "learning_rate": 0.0004540588351951168, "loss": 3.016350507736206, "step": 8061, "token_acc": 0.29696571424028917 }, { "epoch": 4.725886836704778, "grad_norm": 0.182385789924304, "learning_rate": 0.0004540448359684697, "loss": 3.0543291568756104, "step": 8062, "token_acc": 0.2927285872905208 }, { "epoch": 4.7264731750219875, "grad_norm": 0.2201999128320826, "learning_rate": 0.0004540308348250962, "loss": 3.0787839889526367, "step": 8063, "token_acc": 0.2894356155591958 }, { "epoch": 4.727059513339197, "grad_norm": 0.17325986175214206, "learning_rate": 0.00045401683176512803, "loss": 3.042766571044922, "step": 8064, "token_acc": 0.2946817488481316 }, { "epoch": 4.727645851656406, "grad_norm": 0.2061233502749803, "learning_rate": 0.00045400282678869665, "loss": 3.054119348526001, "step": 8065, "token_acc": 0.29192980957617753 }, { "epoch": 4.728232189973615, "grad_norm": 0.2962575454414807, "learning_rate": 0.00045398881989593354, "loss": 3.067460536956787, "step": 8066, "token_acc": 0.2888792002903099 }, { "epoch": 4.728818528290824, "grad_norm": 0.265365653712062, "learning_rate": 0.0004539748110869704, "loss": 3.083008289337158, "step": 8067, "token_acc": 0.2890790054397359 }, { "epoch": 4.729404866608033, "grad_norm": 0.16617020537490407, "learning_rate": 0.0004539608003619387, "loss": 3.030125141143799, "step": 8068, "token_acc": 0.29540970053995874 }, { "epoch": 4.729991204925242, "grad_norm": 0.20107062891692165, "learning_rate": 0.0004539467877209702, "loss": 3.0496387481689453, "step": 8069, "token_acc": 0.29399727873801473 }, { "epoch": 4.730577543242451, "grad_norm": 0.18657548935829188, "learning_rate": 0.0004539327731641964, "loss": 3.071558952331543, "step": 8070, "token_acc": 0.29039538180119256 }, { "epoch": 4.7311638815596595, "grad_norm": 0.18021357722852507, "learning_rate": 0.000453918756691749, "loss": 3.0159921646118164, "step": 8071, "token_acc": 0.2958962893047701 }, { "epoch": 4.731750219876869, "grad_norm": 0.20441379033500795, "learning_rate": 0.0004539047383037597, "loss": 3.0333118438720703, "step": 8072, "token_acc": 0.29511471322996363 }, { "epoch": 4.732336558194078, "grad_norm": 0.20617762251715366, "learning_rate": 0.00045389071800036016, "loss": 3.039820671081543, "step": 8073, "token_acc": 0.29275729951116397 }, { "epoch": 4.732922896511287, "grad_norm": 0.17431825120320568, "learning_rate": 0.00045387669578168203, "loss": 3.0579569339752197, "step": 8074, "token_acc": 0.2910923535518236 }, { "epoch": 4.733509234828496, "grad_norm": 0.21101568332120535, "learning_rate": 0.0004538626716478571, "loss": 3.083423614501953, "step": 8075, "token_acc": 0.2892961347003274 }, { "epoch": 4.734095573145705, "grad_norm": 0.21652294443018716, "learning_rate": 0.0004538486455990171, "loss": 3.033292293548584, "step": 8076, "token_acc": 0.2943113025026524 }, { "epoch": 4.734681911462914, "grad_norm": 0.20568841019636736, "learning_rate": 0.0004538346176352937, "loss": 3.0142390727996826, "step": 8077, "token_acc": 0.29823160477635935 }, { "epoch": 4.735268249780123, "grad_norm": 0.1738444287100953, "learning_rate": 0.0004538205877568187, "loss": 3.0962371826171875, "step": 8078, "token_acc": 0.2856865604313754 }, { "epoch": 4.735854588097332, "grad_norm": 0.17370104473910622, "learning_rate": 0.000453806555963724, "loss": 3.018662929534912, "step": 8079, "token_acc": 0.2981563274884874 }, { "epoch": 4.7364409264145415, "grad_norm": 0.20344592926770422, "learning_rate": 0.00045379252225614134, "loss": 3.0663442611694336, "step": 8080, "token_acc": 0.2899164132122424 }, { "epoch": 4.737027264731751, "grad_norm": 0.19208033314034037, "learning_rate": 0.0004537784866342025, "loss": 3.0330026149749756, "step": 8081, "token_acc": 0.29588172381251965 }, { "epoch": 4.73761360304896, "grad_norm": 0.1630891015780073, "learning_rate": 0.00045376444909803947, "loss": 3.041818857192993, "step": 8082, "token_acc": 0.29393436611266544 }, { "epoch": 4.738199941366168, "grad_norm": 0.1639706936578414, "learning_rate": 0.0004537504096477839, "loss": 3.0361692905426025, "step": 8083, "token_acc": 0.2946450738610502 }, { "epoch": 4.738786279683377, "grad_norm": 0.16488294429107092, "learning_rate": 0.0004537363682835679, "loss": 3.023923873901367, "step": 8084, "token_acc": 0.29831159351296455 }, { "epoch": 4.739372618000586, "grad_norm": 0.18713907803508287, "learning_rate": 0.0004537223250055232, "loss": 3.045753002166748, "step": 8085, "token_acc": 0.29341514048093775 }, { "epoch": 4.739958956317795, "grad_norm": 0.17128204876607012, "learning_rate": 0.0004537082798137818, "loss": 3.054975986480713, "step": 8086, "token_acc": 0.29001805285534216 }, { "epoch": 4.740545294635004, "grad_norm": 0.1914184695653119, "learning_rate": 0.0004536942327084755, "loss": 3.0377347469329834, "step": 8087, "token_acc": 0.2940427542285795 }, { "epoch": 4.7411316329522135, "grad_norm": 0.2163891735271536, "learning_rate": 0.0004536801836897365, "loss": 3.063438653945923, "step": 8088, "token_acc": 0.289966543298252 }, { "epoch": 4.741717971269423, "grad_norm": 0.1882115107797921, "learning_rate": 0.00045366613275769663, "loss": 3.0624465942382812, "step": 8089, "token_acc": 0.2902184467915058 }, { "epoch": 4.742304309586632, "grad_norm": 0.18990157234270733, "learning_rate": 0.0004536520799124878, "loss": 3.0651159286499023, "step": 8090, "token_acc": 0.29020775612231076 }, { "epoch": 4.742890647903841, "grad_norm": 0.21256366216363703, "learning_rate": 0.0004536380251542422, "loss": 3.044762134552002, "step": 8091, "token_acc": 0.2933580380765901 }, { "epoch": 4.743476986221049, "grad_norm": 0.15986233911456202, "learning_rate": 0.00045362396848309174, "loss": 3.041992664337158, "step": 8092, "token_acc": 0.29472166701431335 }, { "epoch": 4.744063324538258, "grad_norm": 0.20744147783222688, "learning_rate": 0.0004536099098991684, "loss": 3.0738940238952637, "step": 8093, "token_acc": 0.28922035530821916 }, { "epoch": 4.744649662855467, "grad_norm": 0.3042606379603551, "learning_rate": 0.0004535958494026044, "loss": 3.033778190612793, "step": 8094, "token_acc": 0.2963875947335409 }, { "epoch": 4.745236001172676, "grad_norm": 0.18330114323403998, "learning_rate": 0.0004535817869935317, "loss": 2.9897680282592773, "step": 8095, "token_acc": 0.3026168808135995 }, { "epoch": 4.7458223394898855, "grad_norm": 0.22140905450212867, "learning_rate": 0.0004535677226720825, "loss": 3.077850818634033, "step": 8096, "token_acc": 0.2886442440166147 }, { "epoch": 4.746408677807095, "grad_norm": 0.2612011598179863, "learning_rate": 0.0004535536564383888, "loss": 3.0762956142425537, "step": 8097, "token_acc": 0.2892958083085683 }, { "epoch": 4.746995016124304, "grad_norm": 0.1725996598608016, "learning_rate": 0.0004535395882925828, "loss": 3.040656566619873, "step": 8098, "token_acc": 0.29342643567856963 }, { "epoch": 4.747581354441513, "grad_norm": 0.25189283725576816, "learning_rate": 0.0004535255182347967, "loss": 3.068525791168213, "step": 8099, "token_acc": 0.29079293165228237 }, { "epoch": 4.748167692758722, "grad_norm": 0.20699869487012704, "learning_rate": 0.00045351144626516255, "loss": 3.0454187393188477, "step": 8100, "token_acc": 0.29314732370069163 }, { "epoch": 4.748754031075931, "grad_norm": 0.1940087213995694, "learning_rate": 0.0004534973723838126, "loss": 3.0376486778259277, "step": 8101, "token_acc": 0.29510017925856163 }, { "epoch": 4.74934036939314, "grad_norm": 0.261808181763256, "learning_rate": 0.00045348329659087905, "loss": 3.0620203018188477, "step": 8102, "token_acc": 0.28954506744609043 }, { "epoch": 4.749926707710349, "grad_norm": 0.16918116494961782, "learning_rate": 0.0004534692188864942, "loss": 3.0556859970092773, "step": 8103, "token_acc": 0.2895942246106545 }, { "epoch": 4.7505130460275575, "grad_norm": 0.23963193894105225, "learning_rate": 0.00045345513927079006, "loss": 3.0365610122680664, "step": 8104, "token_acc": 0.2955468913585679 }, { "epoch": 4.751099384344767, "grad_norm": 0.18925023481667588, "learning_rate": 0.0004534410577438992, "loss": 3.0576319694519043, "step": 8105, "token_acc": 0.29056269195953355 }, { "epoch": 4.751685722661976, "grad_norm": 0.22301856758163083, "learning_rate": 0.0004534269743059537, "loss": 3.036783218383789, "step": 8106, "token_acc": 0.2944473406421259 }, { "epoch": 4.752272060979185, "grad_norm": 0.182574319055091, "learning_rate": 0.0004534128889570859, "loss": 3.048043727874756, "step": 8107, "token_acc": 0.2928452107930576 }, { "epoch": 4.752858399296394, "grad_norm": 0.22520703911979323, "learning_rate": 0.0004533988016974281, "loss": 3.028371810913086, "step": 8108, "token_acc": 0.29737944415007894 }, { "epoch": 4.753444737613603, "grad_norm": 0.2028047805188071, "learning_rate": 0.00045338471252711254, "loss": 3.048466682434082, "step": 8109, "token_acc": 0.2934945034275205 }, { "epoch": 4.754031075930812, "grad_norm": 0.21838439149603234, "learning_rate": 0.0004533706214462718, "loss": 3.054708480834961, "step": 8110, "token_acc": 0.29312563649168316 }, { "epoch": 4.754617414248021, "grad_norm": 0.21685423156794087, "learning_rate": 0.00045335652845503806, "loss": 3.051158905029297, "step": 8111, "token_acc": 0.29283435621705556 }, { "epoch": 4.75520375256523, "grad_norm": 0.20278054603934012, "learning_rate": 0.00045334243355354384, "loss": 3.0235133171081543, "step": 8112, "token_acc": 0.29648049470429577 }, { "epoch": 4.7557900908824395, "grad_norm": 0.1623626028787463, "learning_rate": 0.00045332833674192137, "loss": 3.0675442218780518, "step": 8113, "token_acc": 0.29116748702390904 }, { "epoch": 4.756376429199648, "grad_norm": 0.19889567768394695, "learning_rate": 0.00045331423802030325, "loss": 3.0920228958129883, "step": 8114, "token_acc": 0.2871832322306208 }, { "epoch": 4.756962767516857, "grad_norm": 0.19585973363158735, "learning_rate": 0.0004533001373888217, "loss": 3.024867534637451, "step": 8115, "token_acc": 0.29599368597147674 }, { "epoch": 4.757549105834066, "grad_norm": 0.24806897942540174, "learning_rate": 0.00045328603484760945, "loss": 3.022796869277954, "step": 8116, "token_acc": 0.29643810308585766 }, { "epoch": 4.758135444151275, "grad_norm": 0.22466475504730887, "learning_rate": 0.00045327193039679877, "loss": 3.0358481407165527, "step": 8117, "token_acc": 0.29646413815022776 }, { "epoch": 4.758721782468484, "grad_norm": 0.1693903426671957, "learning_rate": 0.0004532578240365222, "loss": 3.0672290325164795, "step": 8118, "token_acc": 0.2913440565657401 }, { "epoch": 4.759308120785693, "grad_norm": 0.2255760642157718, "learning_rate": 0.0004532437157669123, "loss": 3.0258865356445312, "step": 8119, "token_acc": 0.2964518186676649 }, { "epoch": 4.759894459102902, "grad_norm": 0.18568045481514533, "learning_rate": 0.00045322960558810156, "loss": 3.054309129714966, "step": 8120, "token_acc": 0.2926073344555795 }, { "epoch": 4.7604807974201115, "grad_norm": 0.1533671248873215, "learning_rate": 0.0004532154935002225, "loss": 3.072385311126709, "step": 8121, "token_acc": 0.2886014066282661 }, { "epoch": 4.761067135737321, "grad_norm": 0.18205365805874582, "learning_rate": 0.0004532013795034078, "loss": 3.0103516578674316, "step": 8122, "token_acc": 0.29778995522626006 }, { "epoch": 4.76165347405453, "grad_norm": 0.20593692336336708, "learning_rate": 0.00045318726359778983, "loss": 3.027998924255371, "step": 8123, "token_acc": 0.2978503300570159 }, { "epoch": 4.762239812371739, "grad_norm": 0.16898501082119435, "learning_rate": 0.00045317314578350145, "loss": 3.071122169494629, "step": 8124, "token_acc": 0.2890321379535035 }, { "epoch": 4.762826150688948, "grad_norm": 0.26975581748527555, "learning_rate": 0.00045315902606067506, "loss": 3.0170068740844727, "step": 8125, "token_acc": 0.29790003777594787 }, { "epoch": 4.763412489006156, "grad_norm": 0.18523025922802763, "learning_rate": 0.0004531449044294434, "loss": 3.067138195037842, "step": 8126, "token_acc": 0.2904073621079136 }, { "epoch": 4.763998827323365, "grad_norm": 0.26183581725698735, "learning_rate": 0.0004531307808899391, "loss": 3.0312914848327637, "step": 8127, "token_acc": 0.2961015665274432 }, { "epoch": 4.764585165640574, "grad_norm": 0.24413504086240792, "learning_rate": 0.00045311665544229483, "loss": 3.0271964073181152, "step": 8128, "token_acc": 0.2956888521952665 }, { "epoch": 4.7651715039577835, "grad_norm": 0.20036577418287554, "learning_rate": 0.0004531025280866433, "loss": 3.060777187347412, "step": 8129, "token_acc": 0.29321908655789536 }, { "epoch": 4.765757842274993, "grad_norm": 0.29147273222544895, "learning_rate": 0.0004530883988231172, "loss": 3.001120090484619, "step": 8130, "token_acc": 0.3002125899600417 }, { "epoch": 4.766344180592202, "grad_norm": 0.1809316619435025, "learning_rate": 0.00045307426765184923, "loss": 2.9764904975891113, "step": 8131, "token_acc": 0.3048796502497825 }, { "epoch": 4.766930518909411, "grad_norm": 0.24624316595138132, "learning_rate": 0.0004530601345729722, "loss": 3.014728307723999, "step": 8132, "token_acc": 0.2974370256297437 }, { "epoch": 4.76751685722662, "grad_norm": 0.2101937227298329, "learning_rate": 0.00045304599958661884, "loss": 3.0165584087371826, "step": 8133, "token_acc": 0.2975139579018655 }, { "epoch": 4.768103195543829, "grad_norm": 0.22798483227277452, "learning_rate": 0.0004530318626929219, "loss": 3.0670084953308105, "step": 8134, "token_acc": 0.2887177210893235 }, { "epoch": 4.768689533861037, "grad_norm": 0.2102260759242806, "learning_rate": 0.00045301772389201426, "loss": 3.062410593032837, "step": 8135, "token_acc": 0.29182172266765033 }, { "epoch": 4.7692758721782464, "grad_norm": 0.19254990672662414, "learning_rate": 0.0004530035831840286, "loss": 3.0415048599243164, "step": 8136, "token_acc": 0.2938247142478973 }, { "epoch": 4.769862210495456, "grad_norm": 0.21991333772621247, "learning_rate": 0.00045298944056909775, "loss": 3.1133790016174316, "step": 8137, "token_acc": 0.28484917526013415 }, { "epoch": 4.770448548812665, "grad_norm": 0.1867215356631029, "learning_rate": 0.00045297529604735475, "loss": 3.0927886962890625, "step": 8138, "token_acc": 0.2852247736920166 }, { "epoch": 4.771034887129874, "grad_norm": 0.21214681675154018, "learning_rate": 0.0004529611496189323, "loss": 3.014636993408203, "step": 8139, "token_acc": 0.299808054506345 }, { "epoch": 4.771621225447083, "grad_norm": 0.1975757320931099, "learning_rate": 0.0004529470012839634, "loss": 3.045872211456299, "step": 8140, "token_acc": 0.2944522074096454 }, { "epoch": 4.772207563764292, "grad_norm": 0.19785471692496498, "learning_rate": 0.00045293285104258094, "loss": 3.0008597373962402, "step": 8141, "token_acc": 0.29874825357226786 }, { "epoch": 4.772793902081501, "grad_norm": 0.18860934253743214, "learning_rate": 0.00045291869889491765, "loss": 3.0473556518554688, "step": 8142, "token_acc": 0.29156361566720135 }, { "epoch": 4.77338024039871, "grad_norm": 0.19997628761448896, "learning_rate": 0.00045290454484110676, "loss": 3.0097289085388184, "step": 8143, "token_acc": 0.2989769644897903 }, { "epoch": 4.773966578715919, "grad_norm": 0.17370910403847412, "learning_rate": 0.00045289038888128103, "loss": 3.0415279865264893, "step": 8144, "token_acc": 0.29445170513091407 }, { "epoch": 4.7745529170331285, "grad_norm": 0.2010236338999224, "learning_rate": 0.00045287623101557354, "loss": 3.0398483276367188, "step": 8145, "token_acc": 0.29441102971960237 }, { "epoch": 4.775139255350338, "grad_norm": 0.17060059987578335, "learning_rate": 0.00045286207124411716, "loss": 3.04191255569458, "step": 8146, "token_acc": 0.292796216751181 }, { "epoch": 4.775725593667546, "grad_norm": 0.20614815249002116, "learning_rate": 0.00045284790956704504, "loss": 3.0324440002441406, "step": 8147, "token_acc": 0.297024289242299 }, { "epoch": 4.776311931984755, "grad_norm": 0.2024526324918473, "learning_rate": 0.00045283374598449014, "loss": 3.0417675971984863, "step": 8148, "token_acc": 0.2946282303313265 }, { "epoch": 4.776898270301964, "grad_norm": 0.19714847562096544, "learning_rate": 0.00045281958049658545, "loss": 3.061729907989502, "step": 8149, "token_acc": 0.28997016224280475 }, { "epoch": 4.777484608619173, "grad_norm": 0.19341047965954822, "learning_rate": 0.00045280541310346417, "loss": 3.0443923473358154, "step": 8150, "token_acc": 0.29369112851159634 }, { "epoch": 4.778070946936382, "grad_norm": 0.16219366507663802, "learning_rate": 0.0004527912438052593, "loss": 3.0006027221679688, "step": 8151, "token_acc": 0.29967843039405645 }, { "epoch": 4.778657285253591, "grad_norm": 0.18374607898979095, "learning_rate": 0.000452777072602104, "loss": 3.0871877670288086, "step": 8152, "token_acc": 0.2887636686242276 }, { "epoch": 4.7792436235708005, "grad_norm": 0.1794223544060227, "learning_rate": 0.0004527628994941313, "loss": 2.9948267936706543, "step": 8153, "token_acc": 0.2999787293826006 }, { "epoch": 4.77982996188801, "grad_norm": 0.17348813035371666, "learning_rate": 0.00045274872448147443, "loss": 3.055471897125244, "step": 8154, "token_acc": 0.2917260285516402 }, { "epoch": 4.780416300205219, "grad_norm": 0.22187357998242838, "learning_rate": 0.0004527345475642665, "loss": 3.077023983001709, "step": 8155, "token_acc": 0.28943039249276187 }, { "epoch": 4.781002638522428, "grad_norm": 0.1798485718682848, "learning_rate": 0.00045272036874264063, "loss": 3.043170213699341, "step": 8156, "token_acc": 0.2944234564714655 }, { "epoch": 4.781588976839636, "grad_norm": 0.18884024698368765, "learning_rate": 0.00045270618801673005, "loss": 3.071423053741455, "step": 8157, "token_acc": 0.28846863371388215 }, { "epoch": 4.782175315156845, "grad_norm": 0.17262680775682385, "learning_rate": 0.00045269200538666804, "loss": 3.0400078296661377, "step": 8158, "token_acc": 0.2937985813849268 }, { "epoch": 4.782761653474054, "grad_norm": 0.19535359249509412, "learning_rate": 0.00045267782085258774, "loss": 3.061572551727295, "step": 8159, "token_acc": 0.291996396044333 }, { "epoch": 4.783347991791263, "grad_norm": 0.20809406727162685, "learning_rate": 0.00045266363441462247, "loss": 3.092970371246338, "step": 8160, "token_acc": 0.2865207239058092 }, { "epoch": 4.7839343301084725, "grad_norm": 0.22206959342920374, "learning_rate": 0.00045264944607290535, "loss": 3.0564486980438232, "step": 8161, "token_acc": 0.2924214652415752 }, { "epoch": 4.784520668425682, "grad_norm": 0.22461891425871752, "learning_rate": 0.00045263525582756985, "loss": 3.0112791061401367, "step": 8162, "token_acc": 0.2982998579178507 }, { "epoch": 4.785107006742891, "grad_norm": 0.18417368350067845, "learning_rate": 0.0004526210636787492, "loss": 3.0508201122283936, "step": 8163, "token_acc": 0.29390767822064173 }, { "epoch": 4.7856933450601, "grad_norm": 0.18823157095266424, "learning_rate": 0.0004526068696265766, "loss": 3.050769329071045, "step": 8164, "token_acc": 0.28980519530776355 }, { "epoch": 4.786279683377309, "grad_norm": 0.2190995218699739, "learning_rate": 0.0004525926736711855, "loss": 3.0141258239746094, "step": 8165, "token_acc": 0.2951894153097384 }, { "epoch": 4.786866021694518, "grad_norm": 0.15959001488573019, "learning_rate": 0.0004525784758127093, "loss": 3.0545763969421387, "step": 8166, "token_acc": 0.29337849360994445 }, { "epoch": 4.787452360011727, "grad_norm": 0.18967654705886594, "learning_rate": 0.00045256427605128125, "loss": 3.002711296081543, "step": 8167, "token_acc": 0.30008689075900685 }, { "epoch": 4.788038698328936, "grad_norm": 0.18138179995080322, "learning_rate": 0.00045255007438703475, "loss": 3.016913414001465, "step": 8168, "token_acc": 0.2981850876629632 }, { "epoch": 4.7886250366461445, "grad_norm": 0.16774213377523908, "learning_rate": 0.00045253587082010326, "loss": 3.0596976280212402, "step": 8169, "token_acc": 0.29020961351875807 }, { "epoch": 4.789211374963354, "grad_norm": 0.2369984921873028, "learning_rate": 0.00045252166535062025, "loss": 2.9944701194763184, "step": 8170, "token_acc": 0.30022385158773773 }, { "epoch": 4.789797713280563, "grad_norm": 0.2788138952444477, "learning_rate": 0.00045250745797871896, "loss": 3.0271573066711426, "step": 8171, "token_acc": 0.29560932937105217 }, { "epoch": 4.790384051597772, "grad_norm": 0.18487157099135848, "learning_rate": 0.00045249324870453314, "loss": 2.9938201904296875, "step": 8172, "token_acc": 0.29896432976973686 }, { "epoch": 4.790970389914981, "grad_norm": 0.2744353114017672, "learning_rate": 0.000452479037528196, "loss": 3.0438570976257324, "step": 8173, "token_acc": 0.2941330925517578 }, { "epoch": 4.79155672823219, "grad_norm": 0.18442693313922134, "learning_rate": 0.00045246482444984116, "loss": 3.039293050765991, "step": 8174, "token_acc": 0.2922290914434461 }, { "epoch": 4.792143066549399, "grad_norm": 0.27110192332513094, "learning_rate": 0.00045245060946960214, "loss": 3.028707504272461, "step": 8175, "token_acc": 0.29495353375092176 }, { "epoch": 4.792729404866608, "grad_norm": 0.26498931255971836, "learning_rate": 0.0004524363925876125, "loss": 3.061530590057373, "step": 8176, "token_acc": 0.29229383563955014 }, { "epoch": 4.793315743183817, "grad_norm": 0.19185896721747667, "learning_rate": 0.00045242217380400565, "loss": 3.0260987281799316, "step": 8177, "token_acc": 0.29495470831948367 }, { "epoch": 4.7939020815010265, "grad_norm": 0.21726649549631039, "learning_rate": 0.00045240795311891533, "loss": 3.035027265548706, "step": 8178, "token_acc": 0.2940701044514113 }, { "epoch": 4.794488419818235, "grad_norm": 0.17469648816618108, "learning_rate": 0.00045239373053247494, "loss": 3.041036367416382, "step": 8179, "token_acc": 0.2952447544866687 }, { "epoch": 4.795074758135444, "grad_norm": 0.2127138688452311, "learning_rate": 0.00045237950604481823, "loss": 3.0151727199554443, "step": 8180, "token_acc": 0.2955632617675557 }, { "epoch": 4.795661096452653, "grad_norm": 0.17420725086065458, "learning_rate": 0.00045236527965607877, "loss": 3.1032662391662598, "step": 8181, "token_acc": 0.2856022240051512 }, { "epoch": 4.796247434769862, "grad_norm": 0.20271027198464547, "learning_rate": 0.00045235105136639023, "loss": 3.096384286880493, "step": 8182, "token_acc": 0.28719787774976274 }, { "epoch": 4.796833773087071, "grad_norm": 0.19099116246628006, "learning_rate": 0.0004523368211758862, "loss": 3.09938383102417, "step": 8183, "token_acc": 0.28452029854242306 }, { "epoch": 4.79742011140428, "grad_norm": 0.21006969439211812, "learning_rate": 0.00045232258908470036, "loss": 3.039801597595215, "step": 8184, "token_acc": 0.2937149077575341 }, { "epoch": 4.798006449721489, "grad_norm": 0.20888067473454164, "learning_rate": 0.00045230835509296654, "loss": 3.06699275970459, "step": 8185, "token_acc": 0.29119142094635925 }, { "epoch": 4.7985927880386985, "grad_norm": 0.21996725334814787, "learning_rate": 0.0004522941192008182, "loss": 3.061077356338501, "step": 8186, "token_acc": 0.29046982128196347 }, { "epoch": 4.799179126355908, "grad_norm": 0.17691418287945837, "learning_rate": 0.0004522798814083893, "loss": 3.0206832885742188, "step": 8187, "token_acc": 0.29874474556937963 }, { "epoch": 4.799765464673117, "grad_norm": 0.21226660149099696, "learning_rate": 0.0004522656417158134, "loss": 3.036076068878174, "step": 8188, "token_acc": 0.29257932699185596 }, { "epoch": 4.800351802990326, "grad_norm": 0.21309178625271893, "learning_rate": 0.0004522514001232244, "loss": 3.027238368988037, "step": 8189, "token_acc": 0.296584487673151 }, { "epoch": 4.800938141307535, "grad_norm": 0.18131793264879323, "learning_rate": 0.0004522371566307561, "loss": 2.99778151512146, "step": 8190, "token_acc": 0.2997679873867176 }, { "epoch": 4.801524479624743, "grad_norm": 0.1901875517328491, "learning_rate": 0.00045222291123854215, "loss": 2.9859070777893066, "step": 8191, "token_acc": 0.30013573720413744 }, { "epoch": 4.802110817941952, "grad_norm": 0.1819594070891484, "learning_rate": 0.00045220866394671646, "loss": 3.0240635871887207, "step": 8192, "token_acc": 0.296548807394612 }, { "epoch": 4.802697156259161, "grad_norm": 0.2343654360576861, "learning_rate": 0.0004521944147554129, "loss": 3.055288314819336, "step": 8193, "token_acc": 0.2927331309443154 }, { "epoch": 4.8032834945763705, "grad_norm": 0.20147362533730825, "learning_rate": 0.0004521801636647652, "loss": 3.0612642765045166, "step": 8194, "token_acc": 0.2904154286584595 }, { "epoch": 4.80386983289358, "grad_norm": 0.19282735560067807, "learning_rate": 0.0004521659106749073, "loss": 3.074296712875366, "step": 8195, "token_acc": 0.28973154136786927 }, { "epoch": 4.804456171210789, "grad_norm": 0.17765576316857848, "learning_rate": 0.0004521516557859732, "loss": 3.0530242919921875, "step": 8196, "token_acc": 0.29366405731145684 }, { "epoch": 4.805042509527998, "grad_norm": 0.18331584291285877, "learning_rate": 0.0004521373989980966, "loss": 3.002938747406006, "step": 8197, "token_acc": 0.29919208429089356 }, { "epoch": 4.805628847845207, "grad_norm": 0.2152110024918458, "learning_rate": 0.00045212314031141155, "loss": 3.0662336349487305, "step": 8198, "token_acc": 0.28877844414166015 }, { "epoch": 4.806215186162416, "grad_norm": 0.1759259362179971, "learning_rate": 0.000452108879726052, "loss": 3.037829637527466, "step": 8199, "token_acc": 0.2949736690473957 }, { "epoch": 4.806801524479624, "grad_norm": 0.18484937935006487, "learning_rate": 0.0004520946172421518, "loss": 3.087880849838257, "step": 8200, "token_acc": 0.2887245850842565 }, { "epoch": 4.807387862796833, "grad_norm": 0.20921574199576717, "learning_rate": 0.000452080352859845, "loss": 3.0339581966400146, "step": 8201, "token_acc": 0.2952201231450813 }, { "epoch": 4.8079742011140425, "grad_norm": 0.172132749850909, "learning_rate": 0.0004520660865792656, "loss": 3.0557827949523926, "step": 8202, "token_acc": 0.2917493454503807 }, { "epoch": 4.808560539431252, "grad_norm": 0.2081584948715618, "learning_rate": 0.00045205181840054766, "loss": 3.0303049087524414, "step": 8203, "token_acc": 0.29475954662797393 }, { "epoch": 4.809146877748461, "grad_norm": 0.20778819285555394, "learning_rate": 0.0004520375483238251, "loss": 3.0470879077911377, "step": 8204, "token_acc": 0.2916244336789122 }, { "epoch": 4.80973321606567, "grad_norm": 0.1669762076257186, "learning_rate": 0.00045202327634923204, "loss": 3.0396885871887207, "step": 8205, "token_acc": 0.2959067814184985 }, { "epoch": 4.810319554382879, "grad_norm": 0.18624737149698814, "learning_rate": 0.0004520090024769025, "loss": 3.0731754302978516, "step": 8206, "token_acc": 0.2907396876997167 }, { "epoch": 4.810905892700088, "grad_norm": 0.18949841536672962, "learning_rate": 0.00045199472670697064, "loss": 3.0350866317749023, "step": 8207, "token_acc": 0.29409631936533875 }, { "epoch": 4.811492231017297, "grad_norm": 0.1847181395678015, "learning_rate": 0.0004519804490395705, "loss": 3.0365662574768066, "step": 8208, "token_acc": 0.29595431090420327 }, { "epoch": 4.812078569334506, "grad_norm": 0.20350569033264362, "learning_rate": 0.00045196616947483617, "loss": 3.030475378036499, "step": 8209, "token_acc": 0.29753115177148304 }, { "epoch": 4.812664907651715, "grad_norm": 0.17775173772891112, "learning_rate": 0.0004519518880129019, "loss": 3.04473876953125, "step": 8210, "token_acc": 0.2923811557549764 }, { "epoch": 4.8132512459689245, "grad_norm": 0.22266242487145857, "learning_rate": 0.00045193760465390173, "loss": 3.0540578365325928, "step": 8211, "token_acc": 0.2926404643951535 }, { "epoch": 4.813837584286133, "grad_norm": 0.1816731747013646, "learning_rate": 0.0004519233193979699, "loss": 3.0376217365264893, "step": 8212, "token_acc": 0.2953960178380755 }, { "epoch": 4.814423922603342, "grad_norm": 0.18542101540997544, "learning_rate": 0.0004519090322452405, "loss": 3.030982255935669, "step": 8213, "token_acc": 0.29635619332916263 }, { "epoch": 4.815010260920551, "grad_norm": 0.24811290343673947, "learning_rate": 0.00045189474319584796, "loss": 3.081815719604492, "step": 8214, "token_acc": 0.28862605648319933 }, { "epoch": 4.81559659923776, "grad_norm": 0.2081511746404867, "learning_rate": 0.00045188045224992625, "loss": 3.0511112213134766, "step": 8215, "token_acc": 0.29156705513014475 }, { "epoch": 4.816182937554969, "grad_norm": 0.1963137924701037, "learning_rate": 0.00045186615940760977, "loss": 3.0530755519866943, "step": 8216, "token_acc": 0.2918859626267574 }, { "epoch": 4.816769275872178, "grad_norm": 0.19799720595699524, "learning_rate": 0.00045185186466903274, "loss": 3.0408077239990234, "step": 8217, "token_acc": 0.29368820130730666 }, { "epoch": 4.817355614189387, "grad_norm": 0.23603834475105998, "learning_rate": 0.0004518375680343294, "loss": 3.0679879188537598, "step": 8218, "token_acc": 0.2906354215566295 }, { "epoch": 4.8179419525065965, "grad_norm": 0.28441550401993465, "learning_rate": 0.00045182326950363416, "loss": 3.0513529777526855, "step": 8219, "token_acc": 0.2924095771777891 }, { "epoch": 4.818528290823806, "grad_norm": 0.1913758323227508, "learning_rate": 0.00045180896907708127, "loss": 3.0455126762390137, "step": 8220, "token_acc": 0.2927460269462084 }, { "epoch": 4.819114629141015, "grad_norm": 0.21072786446966812, "learning_rate": 0.00045179466675480495, "loss": 3.0480964183807373, "step": 8221, "token_acc": 0.29236188975812266 }, { "epoch": 4.819700967458223, "grad_norm": 0.24379155056108015, "learning_rate": 0.00045178036253693975, "loss": 3.042239189147949, "step": 8222, "token_acc": 0.2947335000611567 }, { "epoch": 4.820287305775432, "grad_norm": 0.19739827143635144, "learning_rate": 0.0004517660564236199, "loss": 3.0239672660827637, "step": 8223, "token_acc": 0.2958702041857692 }, { "epoch": 4.820873644092641, "grad_norm": 0.2555757429696557, "learning_rate": 0.0004517517484149799, "loss": 3.031019687652588, "step": 8224, "token_acc": 0.2956024582303159 }, { "epoch": 4.82145998240985, "grad_norm": 0.22245925970434688, "learning_rate": 0.000451737438511154, "loss": 3.0054047107696533, "step": 8225, "token_acc": 0.29983288814032133 }, { "epoch": 4.822046320727059, "grad_norm": 0.2032970500041507, "learning_rate": 0.00045172312671227675, "loss": 3.075723886489868, "step": 8226, "token_acc": 0.28908207181551504 }, { "epoch": 4.8226326590442685, "grad_norm": 0.21796064979669263, "learning_rate": 0.0004517088130184826, "loss": 3.0939009189605713, "step": 8227, "token_acc": 0.28656539883891496 }, { "epoch": 4.823218997361478, "grad_norm": 0.16445290626758913, "learning_rate": 0.00045169449742990585, "loss": 3.0636043548583984, "step": 8228, "token_acc": 0.2899118587863573 }, { "epoch": 4.823805335678687, "grad_norm": 0.19598423088415046, "learning_rate": 0.0004516801799466812, "loss": 3.0252432823181152, "step": 8229, "token_acc": 0.2951720127818749 }, { "epoch": 4.824391673995896, "grad_norm": 0.1687893885374334, "learning_rate": 0.000451665860568943, "loss": 3.0518088340759277, "step": 8230, "token_acc": 0.2929648631840796 }, { "epoch": 4.824978012313105, "grad_norm": 0.20259312588275857, "learning_rate": 0.00045165153929682575, "loss": 3.0011117458343506, "step": 8231, "token_acc": 0.29926516548221216 }, { "epoch": 4.825564350630314, "grad_norm": 0.1672952011168371, "learning_rate": 0.000451637216130464, "loss": 3.0376741886138916, "step": 8232, "token_acc": 0.29391056535769666 }, { "epoch": 4.826150688947523, "grad_norm": 0.18548770455887906, "learning_rate": 0.0004516228910699923, "loss": 3.0299410820007324, "step": 8233, "token_acc": 0.2949940735617498 }, { "epoch": 4.826737027264731, "grad_norm": 0.18770707113638524, "learning_rate": 0.0004516085641155453, "loss": 3.0248372554779053, "step": 8234, "token_acc": 0.29551460814720215 }, { "epoch": 4.8273233655819405, "grad_norm": 0.2653974354565007, "learning_rate": 0.0004515942352672575, "loss": 3.05552339553833, "step": 8235, "token_acc": 0.2907236212942302 }, { "epoch": 4.82790970389915, "grad_norm": 0.198611981178459, "learning_rate": 0.0004515799045252634, "loss": 3.047046184539795, "step": 8236, "token_acc": 0.2924780460395236 }, { "epoch": 4.828496042216359, "grad_norm": 0.18607962794455024, "learning_rate": 0.0004515655718896979, "loss": 3.046444892883301, "step": 8237, "token_acc": 0.2921292344643189 }, { "epoch": 4.829082380533568, "grad_norm": 0.20294213635802263, "learning_rate": 0.00045155123736069535, "loss": 3.0549964904785156, "step": 8238, "token_acc": 0.29127074720799656 }, { "epoch": 4.829668718850777, "grad_norm": 0.1861743916212333, "learning_rate": 0.0004515369009383906, "loss": 3.0308034420013428, "step": 8239, "token_acc": 0.29709506341276964 }, { "epoch": 4.830255057167986, "grad_norm": 0.21265727641343649, "learning_rate": 0.00045152256262291813, "loss": 3.055305004119873, "step": 8240, "token_acc": 0.29209571352257446 }, { "epoch": 4.830841395485195, "grad_norm": 0.18001984024110576, "learning_rate": 0.00045150822241441283, "loss": 3.064025402069092, "step": 8241, "token_acc": 0.29079793030437157 }, { "epoch": 4.831427733802404, "grad_norm": 0.20289555218635544, "learning_rate": 0.0004514938803130093, "loss": 3.0849294662475586, "step": 8242, "token_acc": 0.285881336648266 }, { "epoch": 4.8320140721196125, "grad_norm": 0.1642261230291842, "learning_rate": 0.0004514795363188423, "loss": 3.041956663131714, "step": 8243, "token_acc": 0.29481787835838813 }, { "epoch": 4.832600410436822, "grad_norm": 0.1960141560385876, "learning_rate": 0.0004514651904320465, "loss": 3.0624804496765137, "step": 8244, "token_acc": 0.2904554927695547 }, { "epoch": 4.833186748754031, "grad_norm": 0.19378282849160142, "learning_rate": 0.0004514508426527567, "loss": 3.0687248706817627, "step": 8245, "token_acc": 0.28916678115851663 }, { "epoch": 4.83377308707124, "grad_norm": 0.17614857788405613, "learning_rate": 0.0004514364929811078, "loss": 3.0405640602111816, "step": 8246, "token_acc": 0.2956237891775696 }, { "epoch": 4.834359425388449, "grad_norm": 0.21554557132842408, "learning_rate": 0.00045142214141723436, "loss": 3.0461223125457764, "step": 8247, "token_acc": 0.2941286401456058 }, { "epoch": 4.834945763705658, "grad_norm": 0.1848103303675837, "learning_rate": 0.00045140778796127134, "loss": 3.0603389739990234, "step": 8248, "token_acc": 0.29088755197324107 }, { "epoch": 4.835532102022867, "grad_norm": 0.16538575200036382, "learning_rate": 0.00045139343261335363, "loss": 3.0047850608825684, "step": 8249, "token_acc": 0.29904514327908316 }, { "epoch": 4.836118440340076, "grad_norm": 0.18695626308268848, "learning_rate": 0.00045137907537361595, "loss": 3.053900957107544, "step": 8250, "token_acc": 0.2920105026256564 }, { "epoch": 4.836704778657285, "grad_norm": 0.17796180889392935, "learning_rate": 0.0004513647162421932, "loss": 3.0795178413391113, "step": 8251, "token_acc": 0.28806730407916165 }, { "epoch": 4.8372911169744945, "grad_norm": 0.21399315031831678, "learning_rate": 0.0004513503552192203, "loss": 3.0447440147399902, "step": 8252, "token_acc": 0.2921939170213454 }, { "epoch": 4.837877455291704, "grad_norm": 0.23217207060786355, "learning_rate": 0.00045133599230483224, "loss": 3.0184664726257324, "step": 8253, "token_acc": 0.2983459732372046 }, { "epoch": 4.838463793608913, "grad_norm": 0.1860159529148665, "learning_rate": 0.0004513216274991637, "loss": 3.050820827484131, "step": 8254, "token_acc": 0.2931625750591988 }, { "epoch": 4.839050131926121, "grad_norm": 0.2258048963594445, "learning_rate": 0.0004513072608023498, "loss": 3.0180134773254395, "step": 8255, "token_acc": 0.29864361089767966 }, { "epoch": 4.83963647024333, "grad_norm": 0.24163014506769456, "learning_rate": 0.00045129289221452546, "loss": 3.06150484085083, "step": 8256, "token_acc": 0.29170875787767997 }, { "epoch": 4.840222808560539, "grad_norm": 0.17968296213181534, "learning_rate": 0.0004512785217358256, "loss": 2.997995376586914, "step": 8257, "token_acc": 0.2969716894589765 }, { "epoch": 4.840809146877748, "grad_norm": 0.20175018047072732, "learning_rate": 0.0004512641493663853, "loss": 3.022812843322754, "step": 8258, "token_acc": 0.2954193298469882 }, { "epoch": 4.841395485194957, "grad_norm": 0.20540612165688657, "learning_rate": 0.0004512497751063395, "loss": 3.0291857719421387, "step": 8259, "token_acc": 0.29613671258352614 }, { "epoch": 4.8419818235121665, "grad_norm": 0.18518194037587807, "learning_rate": 0.00045123539895582326, "loss": 3.036161422729492, "step": 8260, "token_acc": 0.29468225239890333 }, { "epoch": 4.842568161829376, "grad_norm": 0.19798072263496472, "learning_rate": 0.00045122102091497165, "loss": 3.037208080291748, "step": 8261, "token_acc": 0.29427519162946864 }, { "epoch": 4.843154500146585, "grad_norm": 0.23858512454360034, "learning_rate": 0.0004512066409839196, "loss": 3.0512466430664062, "step": 8262, "token_acc": 0.29195662904161895 }, { "epoch": 4.843740838463794, "grad_norm": 0.1775303186718838, "learning_rate": 0.0004511922591628024, "loss": 3.0311052799224854, "step": 8263, "token_acc": 0.29518913333837354 }, { "epoch": 4.844327176781003, "grad_norm": 0.1686940828604769, "learning_rate": 0.0004511778754517549, "loss": 3.0203659534454346, "step": 8264, "token_acc": 0.29614443662785206 }, { "epoch": 4.844913515098211, "grad_norm": 0.21986746919202957, "learning_rate": 0.00045116348985091247, "loss": 3.0562288761138916, "step": 8265, "token_acc": 0.2913219780338924 }, { "epoch": 4.84549985341542, "grad_norm": 0.1658507770588668, "learning_rate": 0.0004511491023604101, "loss": 3.034461498260498, "step": 8266, "token_acc": 0.294979401094509 }, { "epoch": 4.8460861917326294, "grad_norm": 0.2854573165424869, "learning_rate": 0.000451134712980383, "loss": 3.0482699871063232, "step": 8267, "token_acc": 0.29244876028520106 }, { "epoch": 4.846672530049839, "grad_norm": 0.40747935345055003, "learning_rate": 0.00045112032171096625, "loss": 3.0340206623077393, "step": 8268, "token_acc": 0.2935023397209433 }, { "epoch": 4.847258868367048, "grad_norm": 0.1714339875190739, "learning_rate": 0.00045110592855229504, "loss": 3.0818023681640625, "step": 8269, "token_acc": 0.2880880466488184 }, { "epoch": 4.847845206684257, "grad_norm": 0.28611751791994827, "learning_rate": 0.0004510915335045047, "loss": 3.025582790374756, "step": 8270, "token_acc": 0.29641714266276975 }, { "epoch": 4.848431545001466, "grad_norm": 0.20687179007412346, "learning_rate": 0.0004510771365677304, "loss": 3.0459704399108887, "step": 8271, "token_acc": 0.29262902906861776 }, { "epoch": 4.849017883318675, "grad_norm": 0.2540211393347662, "learning_rate": 0.00045106273774210727, "loss": 3.122156858444214, "step": 8272, "token_acc": 0.283641586611018 }, { "epoch": 4.849604221635884, "grad_norm": 0.16729101340677663, "learning_rate": 0.0004510483370277707, "loss": 3.0336146354675293, "step": 8273, "token_acc": 0.29466767231136415 }, { "epoch": 4.850190559953093, "grad_norm": 0.22966833244692123, "learning_rate": 0.00045103393442485595, "loss": 3.016200304031372, "step": 8274, "token_acc": 0.29795679616238346 }, { "epoch": 4.850776898270302, "grad_norm": 0.19149761144065783, "learning_rate": 0.0004510195299334983, "loss": 3.0628151893615723, "step": 8275, "token_acc": 0.29037685588834056 }, { "epoch": 4.8513632365875115, "grad_norm": 0.19339022669337116, "learning_rate": 0.00045100512355383295, "loss": 3.0466232299804688, "step": 8276, "token_acc": 0.2924061979095684 }, { "epoch": 4.85194957490472, "grad_norm": 0.254629534085646, "learning_rate": 0.00045099071528599533, "loss": 3.0473294258117676, "step": 8277, "token_acc": 0.29009173832361934 }, { "epoch": 4.852535913221929, "grad_norm": 0.1771371554988384, "learning_rate": 0.0004509763051301209, "loss": 3.0144152641296387, "step": 8278, "token_acc": 0.2977145651515818 }, { "epoch": 4.853122251539138, "grad_norm": 0.1982112277988596, "learning_rate": 0.0004509618930863448, "loss": 3.0239410400390625, "step": 8279, "token_acc": 0.29586634309841986 }, { "epoch": 4.853708589856347, "grad_norm": 0.1711864941313512, "learning_rate": 0.0004509474791548025, "loss": 3.039302110671997, "step": 8280, "token_acc": 0.2947339220740047 }, { "epoch": 4.854294928173556, "grad_norm": 0.20694308895413283, "learning_rate": 0.0004509330633356294, "loss": 3.0800561904907227, "step": 8281, "token_acc": 0.2875939316602864 }, { "epoch": 4.854881266490765, "grad_norm": 0.16532086307565366, "learning_rate": 0.000450918645628961, "loss": 3.0677409172058105, "step": 8282, "token_acc": 0.29062026791683954 }, { "epoch": 4.855467604807974, "grad_norm": 0.1937538227513767, "learning_rate": 0.0004509042260349327, "loss": 3.0369300842285156, "step": 8283, "token_acc": 0.2940584247452165 }, { "epoch": 4.8560539431251835, "grad_norm": 0.1627952674649333, "learning_rate": 0.00045088980455367986, "loss": 3.0220439434051514, "step": 8284, "token_acc": 0.29744786746600166 }, { "epoch": 4.856640281442393, "grad_norm": 0.21819419703387286, "learning_rate": 0.000450875381185338, "loss": 3.028167247772217, "step": 8285, "token_acc": 0.2945527040079206 }, { "epoch": 4.857226619759601, "grad_norm": 0.16456155947307877, "learning_rate": 0.0004508609559300426, "loss": 3.105139970779419, "step": 8286, "token_acc": 0.28367108064300733 }, { "epoch": 4.85781295807681, "grad_norm": 0.2268339870359941, "learning_rate": 0.0004508465287879292, "loss": 3.05922269821167, "step": 8287, "token_acc": 0.29059741904667347 }, { "epoch": 4.858399296394019, "grad_norm": 0.1781345172138699, "learning_rate": 0.0004508320997591334, "loss": 3.049931049346924, "step": 8288, "token_acc": 0.292901339728101 }, { "epoch": 4.858985634711228, "grad_norm": 0.18781316937135573, "learning_rate": 0.0004508176688437905, "loss": 3.0567965507507324, "step": 8289, "token_acc": 0.2920211536775897 }, { "epoch": 4.859571973028437, "grad_norm": 0.18221711413082994, "learning_rate": 0.00045080323604203634, "loss": 3.0802602767944336, "step": 8290, "token_acc": 0.2882619069753029 }, { "epoch": 4.860158311345646, "grad_norm": 0.18969375410013278, "learning_rate": 0.00045078880135400625, "loss": 3.0400662422180176, "step": 8291, "token_acc": 0.2929359782490714 }, { "epoch": 4.8607446496628555, "grad_norm": 0.21968750432244005, "learning_rate": 0.000450774364779836, "loss": 3.0454132556915283, "step": 8292, "token_acc": 0.2932408477438989 }, { "epoch": 4.861330987980065, "grad_norm": 0.16812389983138806, "learning_rate": 0.0004507599263196611, "loss": 3.0248780250549316, "step": 8293, "token_acc": 0.2972627222774831 }, { "epoch": 4.861917326297274, "grad_norm": 0.19806776583358124, "learning_rate": 0.0004507454859736172, "loss": 3.021618366241455, "step": 8294, "token_acc": 0.29810110859236716 }, { "epoch": 4.862503664614483, "grad_norm": 0.1587022058730712, "learning_rate": 0.00045073104374184, "loss": 2.9967684745788574, "step": 8295, "token_acc": 0.30186555629826706 }, { "epoch": 4.863090002931692, "grad_norm": 0.18583149112933373, "learning_rate": 0.00045071659962446514, "loss": 3.0586111545562744, "step": 8296, "token_acc": 0.29162186839844617 }, { "epoch": 4.863676341248901, "grad_norm": 0.1426706138827495, "learning_rate": 0.0004507021536216283, "loss": 3.0647144317626953, "step": 8297, "token_acc": 0.2904473311963871 }, { "epoch": 4.86426267956611, "grad_norm": 0.19014343454350513, "learning_rate": 0.0004506877057334652, "loss": 3.049382209777832, "step": 8298, "token_acc": 0.2905761107219427 }, { "epoch": 4.864849017883318, "grad_norm": 0.17736289521929513, "learning_rate": 0.0004506732559601115, "loss": 3.039126396179199, "step": 8299, "token_acc": 0.2933060522586501 }, { "epoch": 4.8654353562005275, "grad_norm": 0.15575164779609152, "learning_rate": 0.000450658804301703, "loss": 3.022524356842041, "step": 8300, "token_acc": 0.2986661016106712 }, { "epoch": 4.866021694517737, "grad_norm": 0.17094831537689123, "learning_rate": 0.0004506443507583754, "loss": 3.058741569519043, "step": 8301, "token_acc": 0.291617371682872 }, { "epoch": 4.866608032834946, "grad_norm": 0.20711076560870667, "learning_rate": 0.00045062989533026443, "loss": 3.028104305267334, "step": 8302, "token_acc": 0.29600004277457265 }, { "epoch": 4.867194371152155, "grad_norm": 0.16370778000011313, "learning_rate": 0.000450615438017506, "loss": 3.0587964057922363, "step": 8303, "token_acc": 0.2905246305546748 }, { "epoch": 4.867780709469364, "grad_norm": 0.18332750971283526, "learning_rate": 0.0004506009788202359, "loss": 3.0489614009857178, "step": 8304, "token_acc": 0.29359181851909605 }, { "epoch": 4.868367047786573, "grad_norm": 0.2004335696244491, "learning_rate": 0.00045058651773858995, "loss": 3.041551113128662, "step": 8305, "token_acc": 0.2934721051436565 }, { "epoch": 4.868953386103782, "grad_norm": 0.16675080067893025, "learning_rate": 0.0004505720547727039, "loss": 3.0671846866607666, "step": 8306, "token_acc": 0.28879350490033023 }, { "epoch": 4.869539724420991, "grad_norm": 0.1827681620566002, "learning_rate": 0.0004505575899227137, "loss": 3.073871612548828, "step": 8307, "token_acc": 0.2885374478980741 }, { "epoch": 4.8701260627381995, "grad_norm": 0.2854804878055513, "learning_rate": 0.0004505431231887551, "loss": 3.0441880226135254, "step": 8308, "token_acc": 0.2926706482689183 }, { "epoch": 4.870712401055409, "grad_norm": 0.26288764627398586, "learning_rate": 0.00045052865457096417, "loss": 3.0122766494750977, "step": 8309, "token_acc": 0.298260049007634 }, { "epoch": 4.871298739372618, "grad_norm": 0.1667566720385298, "learning_rate": 0.00045051418406947673, "loss": 3.0150389671325684, "step": 8310, "token_acc": 0.2979929896400228 }, { "epoch": 4.871885077689827, "grad_norm": 0.1901123541683121, "learning_rate": 0.0004504997116844287, "loss": 3.0392978191375732, "step": 8311, "token_acc": 0.29520058136473265 }, { "epoch": 4.872471416007036, "grad_norm": 0.17376535300026733, "learning_rate": 0.0004504852374159561, "loss": 3.0811309814453125, "step": 8312, "token_acc": 0.28645390165508594 }, { "epoch": 4.873057754324245, "grad_norm": 0.18689811189213562, "learning_rate": 0.0004504707612641948, "loss": 3.0977230072021484, "step": 8313, "token_acc": 0.28651914148230284 }, { "epoch": 4.873644092641454, "grad_norm": 0.2114725174105723, "learning_rate": 0.00045045628322928097, "loss": 3.05631685256958, "step": 8314, "token_acc": 0.2919886294330096 }, { "epoch": 4.874230430958663, "grad_norm": 0.16258329420094583, "learning_rate": 0.00045044180331135043, "loss": 3.0620105266571045, "step": 8315, "token_acc": 0.2900593272663821 }, { "epoch": 4.874816769275872, "grad_norm": 0.187609521637445, "learning_rate": 0.0004504273215105391, "loss": 2.9953415393829346, "step": 8316, "token_acc": 0.3011473400867997 }, { "epoch": 4.8754031075930815, "grad_norm": 0.2081663268046037, "learning_rate": 0.0004504128378269833, "loss": 3.0131211280822754, "step": 8317, "token_acc": 0.2985325034434905 }, { "epoch": 4.875989445910291, "grad_norm": 0.24492182453415076, "learning_rate": 0.00045039835226081895, "loss": 3.065298080444336, "step": 8318, "token_acc": 0.2905076163640601 }, { "epoch": 4.8765757842275, "grad_norm": 0.25909647115920226, "learning_rate": 0.0004503838648121821, "loss": 3.0742697715759277, "step": 8319, "token_acc": 0.28982599762172484 }, { "epoch": 4.877162122544708, "grad_norm": 0.19140658749870088, "learning_rate": 0.00045036937548120884, "loss": 3.087031841278076, "step": 8320, "token_acc": 0.2890294795713615 }, { "epoch": 4.877748460861917, "grad_norm": 0.19781086809894965, "learning_rate": 0.0004503548842680353, "loss": 3.0446617603302, "step": 8321, "token_acc": 0.2913714036571465 }, { "epoch": 4.878334799179126, "grad_norm": 0.2470620086619544, "learning_rate": 0.0004503403911727976, "loss": 2.9955687522888184, "step": 8322, "token_acc": 0.3003205216558866 }, { "epoch": 4.878921137496335, "grad_norm": 0.1884856833319998, "learning_rate": 0.00045032589619563193, "loss": 3.050199508666992, "step": 8323, "token_acc": 0.2936966449884616 }, { "epoch": 4.879507475813544, "grad_norm": 0.18945612134356593, "learning_rate": 0.0004503113993366743, "loss": 3.0228309631347656, "step": 8324, "token_acc": 0.29616642594484444 }, { "epoch": 4.8800938141307535, "grad_norm": 0.22093690226448756, "learning_rate": 0.0004502969005960611, "loss": 3.0488386154174805, "step": 8325, "token_acc": 0.29358465454939375 }, { "epoch": 4.880680152447963, "grad_norm": 0.19358001192148847, "learning_rate": 0.0004502823999739284, "loss": 3.105501890182495, "step": 8326, "token_acc": 0.28564545272033726 }, { "epoch": 4.881266490765172, "grad_norm": 0.19056204384865041, "learning_rate": 0.0004502678974704124, "loss": 3.0377073287963867, "step": 8327, "token_acc": 0.2954767761237633 }, { "epoch": 4.881852829082381, "grad_norm": 0.18688258208672415, "learning_rate": 0.0004502533930856494, "loss": 3.05466890335083, "step": 8328, "token_acc": 0.29204103689126193 }, { "epoch": 4.88243916739959, "grad_norm": 0.1938227048346595, "learning_rate": 0.0004502388868197756, "loss": 3.0520758628845215, "step": 8329, "token_acc": 0.29202354020255644 }, { "epoch": 4.883025505716798, "grad_norm": 0.20166338096370262, "learning_rate": 0.0004502243786729273, "loss": 3.0213375091552734, "step": 8330, "token_acc": 0.29698114413891213 }, { "epoch": 4.883611844034007, "grad_norm": 0.19640365465814036, "learning_rate": 0.0004502098686452408, "loss": 3.0327935218811035, "step": 8331, "token_acc": 0.2969744925960857 }, { "epoch": 4.884198182351216, "grad_norm": 0.17461322311797992, "learning_rate": 0.0004501953567368523, "loss": 3.060469627380371, "step": 8332, "token_acc": 0.29185001257229065 }, { "epoch": 4.8847845206684255, "grad_norm": 0.17606568801465003, "learning_rate": 0.00045018084294789817, "loss": 2.9896316528320312, "step": 8333, "token_acc": 0.30154963004327795 }, { "epoch": 4.885370858985635, "grad_norm": 0.19634284557164672, "learning_rate": 0.0004501663272785149, "loss": 3.009902238845825, "step": 8334, "token_acc": 0.2979915670897055 }, { "epoch": 4.885957197302844, "grad_norm": 0.20945982070195124, "learning_rate": 0.00045015180972883865, "loss": 3.0668418407440186, "step": 8335, "token_acc": 0.2903726263454505 }, { "epoch": 4.886543535620053, "grad_norm": 0.22550718491910798, "learning_rate": 0.0004501372902990058, "loss": 3.0472984313964844, "step": 8336, "token_acc": 0.29208705804450485 }, { "epoch": 4.887129873937262, "grad_norm": 0.16972176005510667, "learning_rate": 0.00045012276898915286, "loss": 3.057155132293701, "step": 8337, "token_acc": 0.29188087765878906 }, { "epoch": 4.887716212254471, "grad_norm": 0.20458552388360932, "learning_rate": 0.0004501082457994161, "loss": 3.046921730041504, "step": 8338, "token_acc": 0.29151475310358776 }, { "epoch": 4.88830255057168, "grad_norm": 0.25106052448466787, "learning_rate": 0.0004500937207299321, "loss": 3.018035411834717, "step": 8339, "token_acc": 0.2961233446745001 }, { "epoch": 4.888888888888889, "grad_norm": 0.22364060941116343, "learning_rate": 0.0004500791937808372, "loss": 3.0083279609680176, "step": 8340, "token_acc": 0.2989196800991874 }, { "epoch": 4.889475227206098, "grad_norm": 0.17354286492597026, "learning_rate": 0.00045006466495226786, "loss": 2.984035015106201, "step": 8341, "token_acc": 0.30236009855767304 }, { "epoch": 4.890061565523307, "grad_norm": 0.23114899301911834, "learning_rate": 0.0004500501342443606, "loss": 3.058978796005249, "step": 8342, "token_acc": 0.29212389774067904 }, { "epoch": 4.890647903840516, "grad_norm": 0.2026893390288088, "learning_rate": 0.0004500356016572519, "loss": 3.0550312995910645, "step": 8343, "token_acc": 0.29288573987251754 }, { "epoch": 4.891234242157725, "grad_norm": 0.17149250991408108, "learning_rate": 0.00045002106719107827, "loss": 3.052339553833008, "step": 8344, "token_acc": 0.29255295666603476 }, { "epoch": 4.891820580474934, "grad_norm": 0.2197789867818995, "learning_rate": 0.00045000653084597625, "loss": 3.0772218704223633, "step": 8345, "token_acc": 0.29062587930494427 }, { "epoch": 4.892406918792143, "grad_norm": 0.18326869940463486, "learning_rate": 0.0004499919926220824, "loss": 2.9842095375061035, "step": 8346, "token_acc": 0.3022970780309227 }, { "epoch": 4.892993257109352, "grad_norm": 0.20745186260908338, "learning_rate": 0.00044997745251953324, "loss": 3.100586414337158, "step": 8347, "token_acc": 0.28470819275378667 }, { "epoch": 4.893579595426561, "grad_norm": 0.1883665072814455, "learning_rate": 0.0004499629105384654, "loss": 3.0712223052978516, "step": 8348, "token_acc": 0.2903380147645806 }, { "epoch": 4.89416593374377, "grad_norm": 0.18348760315422052, "learning_rate": 0.00044994836667901553, "loss": 3.09987735748291, "step": 8349, "token_acc": 0.2860487974098057 }, { "epoch": 4.8947522720609795, "grad_norm": 0.2027696144139854, "learning_rate": 0.0004499338209413201, "loss": 3.0382657051086426, "step": 8350, "token_acc": 0.2935398679532758 }, { "epoch": 4.895338610378188, "grad_norm": 0.19965863286683658, "learning_rate": 0.0004499192733255159, "loss": 3.0535008907318115, "step": 8351, "token_acc": 0.29194557053518116 }, { "epoch": 4.895924948695397, "grad_norm": 0.18190603494018373, "learning_rate": 0.00044990472383173944, "loss": 3.0735087394714355, "step": 8352, "token_acc": 0.2896419852061718 }, { "epoch": 4.896511287012606, "grad_norm": 0.17736298587099053, "learning_rate": 0.00044989017246012755, "loss": 3.0627965927124023, "step": 8353, "token_acc": 0.2904934873222063 }, { "epoch": 4.897097625329815, "grad_norm": 0.17534962681540694, "learning_rate": 0.0004498756192108168, "loss": 3.0565590858459473, "step": 8354, "token_acc": 0.29150343653689925 }, { "epoch": 4.897683963647024, "grad_norm": 0.17658781455656447, "learning_rate": 0.00044986106408394396, "loss": 3.021911382675171, "step": 8355, "token_acc": 0.2955750776141438 }, { "epoch": 4.898270301964233, "grad_norm": 0.18073190375119705, "learning_rate": 0.00044984650707964574, "loss": 3.048750400543213, "step": 8356, "token_acc": 0.29211815124743906 }, { "epoch": 4.898856640281442, "grad_norm": 0.21931290289951014, "learning_rate": 0.0004498319481980589, "loss": 3.0696911811828613, "step": 8357, "token_acc": 0.29000563867668755 }, { "epoch": 4.8994429785986515, "grad_norm": 0.20681385333499622, "learning_rate": 0.00044981738743932013, "loss": 3.043977737426758, "step": 8358, "token_acc": 0.2936123307162976 }, { "epoch": 4.900029316915861, "grad_norm": 0.2195677624572636, "learning_rate": 0.0004498028248035664, "loss": 2.9817733764648438, "step": 8359, "token_acc": 0.3024633979321101 }, { "epoch": 4.90061565523307, "grad_norm": 0.19002182913900476, "learning_rate": 0.0004497882602909342, "loss": 3.0093250274658203, "step": 8360, "token_acc": 0.29686161575288117 }, { "epoch": 4.901201993550279, "grad_norm": 0.18070873848864266, "learning_rate": 0.00044977369390156055, "loss": 3.0158705711364746, "step": 8361, "token_acc": 0.2980548084044751 }, { "epoch": 4.901788331867488, "grad_norm": 0.2158714788937025, "learning_rate": 0.0004497591256355823, "loss": 3.0400524139404297, "step": 8362, "token_acc": 0.2940378935503338 }, { "epoch": 4.902374670184696, "grad_norm": 0.1853061038186991, "learning_rate": 0.0004497445554931362, "loss": 3.075366973876953, "step": 8363, "token_acc": 0.2903067615468671 }, { "epoch": 4.902961008501905, "grad_norm": 0.1776333558850975, "learning_rate": 0.00044972998347435925, "loss": 3.1197237968444824, "step": 8364, "token_acc": 0.28254274144194585 }, { "epoch": 4.903547346819114, "grad_norm": 0.24706654707522493, "learning_rate": 0.0004497154095793881, "loss": 3.059683322906494, "step": 8365, "token_acc": 0.29093332403665395 }, { "epoch": 4.9041336851363235, "grad_norm": 0.21923055812986283, "learning_rate": 0.0004497008338083599, "loss": 3.052083969116211, "step": 8366, "token_acc": 0.2920365006695942 }, { "epoch": 4.904720023453533, "grad_norm": 0.1762778878808655, "learning_rate": 0.00044968625616141145, "loss": 3.047804355621338, "step": 8367, "token_acc": 0.2921642116272989 }, { "epoch": 4.905306361770742, "grad_norm": 0.23502957404280328, "learning_rate": 0.00044967167663867967, "loss": 3.0428833961486816, "step": 8368, "token_acc": 0.2928335959117207 }, { "epoch": 4.905892700087951, "grad_norm": 0.18944999126709933, "learning_rate": 0.00044965709524030155, "loss": 3.050354480743408, "step": 8369, "token_acc": 0.2933137078958278 }, { "epoch": 4.90647903840516, "grad_norm": 0.190684901994884, "learning_rate": 0.0004496425119664141, "loss": 3.0448107719421387, "step": 8370, "token_acc": 0.294128662991586 }, { "epoch": 4.907065376722369, "grad_norm": 0.20525728708595328, "learning_rate": 0.00044962792681715424, "loss": 3.0587708950042725, "step": 8371, "token_acc": 0.29055325261325404 }, { "epoch": 4.907651715039578, "grad_norm": 0.17082799780944988, "learning_rate": 0.00044961333979265904, "loss": 3.040635347366333, "step": 8372, "token_acc": 0.2939021096695786 }, { "epoch": 4.908238053356786, "grad_norm": 0.2253900685153574, "learning_rate": 0.00044959875089306545, "loss": 3.011045455932617, "step": 8373, "token_acc": 0.29766916818406675 }, { "epoch": 4.9088243916739955, "grad_norm": 0.18480579918933207, "learning_rate": 0.0004495841601185106, "loss": 3.058290958404541, "step": 8374, "token_acc": 0.2927882877296129 }, { "epoch": 4.909410729991205, "grad_norm": 0.1855846789344752, "learning_rate": 0.00044956956746913145, "loss": 3.056124210357666, "step": 8375, "token_acc": 0.2904010920685734 }, { "epoch": 4.909997068308414, "grad_norm": 0.18212893314110834, "learning_rate": 0.00044955497294506524, "loss": 3.076988935470581, "step": 8376, "token_acc": 0.28899212854012823 }, { "epoch": 4.910583406625623, "grad_norm": 0.16780978357273071, "learning_rate": 0.00044954037654644887, "loss": 3.0520377159118652, "step": 8377, "token_acc": 0.2937051469267592 }, { "epoch": 4.911169744942832, "grad_norm": 0.18441248686934283, "learning_rate": 0.0004495257782734196, "loss": 3.0441391468048096, "step": 8378, "token_acc": 0.2921616187820222 }, { "epoch": 4.911756083260041, "grad_norm": 0.18279474749233285, "learning_rate": 0.00044951117812611454, "loss": 3.0296130180358887, "step": 8379, "token_acc": 0.29467209180789455 }, { "epoch": 4.91234242157725, "grad_norm": 0.20790273114563643, "learning_rate": 0.0004494965761046707, "loss": 3.0382368564605713, "step": 8380, "token_acc": 0.29467800489408685 }, { "epoch": 4.912928759894459, "grad_norm": 0.23227007734565808, "learning_rate": 0.00044948197220922545, "loss": 3.037198066711426, "step": 8381, "token_acc": 0.29350065177542617 }, { "epoch": 4.913515098211668, "grad_norm": 0.20031712333540394, "learning_rate": 0.0004494673664399158, "loss": 3.0394139289855957, "step": 8382, "token_acc": 0.29347569713032123 }, { "epoch": 4.9141014365288775, "grad_norm": 0.24428980594366806, "learning_rate": 0.0004494527587968791, "loss": 3.0533525943756104, "step": 8383, "token_acc": 0.2946481249547153 }, { "epoch": 4.914687774846087, "grad_norm": 0.2562895142625386, "learning_rate": 0.0004494381492802524, "loss": 3.0395450592041016, "step": 8384, "token_acc": 0.2949973395546824 }, { "epoch": 4.915274113163295, "grad_norm": 0.18126657386303482, "learning_rate": 0.0004494235378901731, "loss": 2.9943675994873047, "step": 8385, "token_acc": 0.30025745604186144 }, { "epoch": 4.915860451480504, "grad_norm": 0.22415670999480147, "learning_rate": 0.0004494089246267784, "loss": 3.038001537322998, "step": 8386, "token_acc": 0.29297859603427995 }, { "epoch": 4.916446789797713, "grad_norm": 0.20437596850740655, "learning_rate": 0.00044939430949020553, "loss": 3.0979714393615723, "step": 8387, "token_acc": 0.2866406809408699 }, { "epoch": 4.917033128114922, "grad_norm": 0.16243876416894382, "learning_rate": 0.0004493796924805918, "loss": 3.051877975463867, "step": 8388, "token_acc": 0.2939679664889969 }, { "epoch": 4.917619466432131, "grad_norm": 0.1787584852061425, "learning_rate": 0.00044936507359807454, "loss": 3.0205438137054443, "step": 8389, "token_acc": 0.29548352830124186 }, { "epoch": 4.91820580474934, "grad_norm": 0.18996676743273985, "learning_rate": 0.0004493504528427911, "loss": 3.0381569862365723, "step": 8390, "token_acc": 0.2957881999232377 }, { "epoch": 4.9187921430665495, "grad_norm": 0.18363858065242186, "learning_rate": 0.00044933583021487876, "loss": 3.0685129165649414, "step": 8391, "token_acc": 0.2898426901129856 }, { "epoch": 4.919378481383759, "grad_norm": 0.17598128747187944, "learning_rate": 0.0004493212057144749, "loss": 3.0886130332946777, "step": 8392, "token_acc": 0.2866092542711534 }, { "epoch": 4.919964819700968, "grad_norm": 0.21282002634211616, "learning_rate": 0.0004493065793417169, "loss": 3.0419044494628906, "step": 8393, "token_acc": 0.2938258699316599 }, { "epoch": 4.920551158018176, "grad_norm": 0.18309837619247443, "learning_rate": 0.00044929195109674215, "loss": 3.040395736694336, "step": 8394, "token_acc": 0.2934880668164144 }, { "epoch": 4.921137496335385, "grad_norm": 0.21511746839746823, "learning_rate": 0.0004492773209796881, "loss": 2.9878604412078857, "step": 8395, "token_acc": 0.3020455307762728 }, { "epoch": 4.921723834652594, "grad_norm": 0.19210670608248406, "learning_rate": 0.0004492626889906921, "loss": 3.0452492237091064, "step": 8396, "token_acc": 0.2952712776891632 }, { "epoch": 4.922310172969803, "grad_norm": 0.17184419021105332, "learning_rate": 0.00044924805512989167, "loss": 3.051713228225708, "step": 8397, "token_acc": 0.29180905871542584 }, { "epoch": 4.9228965112870124, "grad_norm": 0.17294304880120165, "learning_rate": 0.00044923341939742423, "loss": 3.0094029903411865, "step": 8398, "token_acc": 0.300253817120162 }, { "epoch": 4.923482849604222, "grad_norm": 0.20626087898336617, "learning_rate": 0.0004492187817934273, "loss": 3.0717616081237793, "step": 8399, "token_acc": 0.2888186792016645 }, { "epoch": 4.924069187921431, "grad_norm": 0.2585863341558831, "learning_rate": 0.00044920414231803835, "loss": 2.993663787841797, "step": 8400, "token_acc": 0.299494367352537 }, { "epoch": 4.92465552623864, "grad_norm": 0.23513481016981097, "learning_rate": 0.0004491895009713949, "loss": 3.086247444152832, "step": 8401, "token_acc": 0.28693371163960973 }, { "epoch": 4.925241864555849, "grad_norm": 0.18735763527652263, "learning_rate": 0.0004491748577536346, "loss": 3.0475821495056152, "step": 8402, "token_acc": 0.2915089632978876 }, { "epoch": 4.925828202873058, "grad_norm": 0.1943066655460094, "learning_rate": 0.0004491602126648948, "loss": 3.042724609375, "step": 8403, "token_acc": 0.2942265611849857 }, { "epoch": 4.926414541190267, "grad_norm": 0.24815254749000878, "learning_rate": 0.00044914556570531324, "loss": 3.0480923652648926, "step": 8404, "token_acc": 0.2925051872042762 }, { "epoch": 4.927000879507476, "grad_norm": 0.20154569368434377, "learning_rate": 0.0004491309168750274, "loss": 3.031181573867798, "step": 8405, "token_acc": 0.2967442472201976 }, { "epoch": 4.927587217824685, "grad_norm": 0.16863884816826494, "learning_rate": 0.00044911626617417493, "loss": 3.032581329345703, "step": 8406, "token_acc": 0.2944937481614154 }, { "epoch": 4.928173556141894, "grad_norm": 0.19524799470391807, "learning_rate": 0.00044910161360289347, "loss": 3.026651382446289, "step": 8407, "token_acc": 0.29686870220216816 }, { "epoch": 4.928759894459103, "grad_norm": 0.17926796040222812, "learning_rate": 0.0004490869591613207, "loss": 3.0227315425872803, "step": 8408, "token_acc": 0.296401677615864 }, { "epoch": 4.929346232776312, "grad_norm": 0.1980439797916102, "learning_rate": 0.0004490723028495941, "loss": 3.0682120323181152, "step": 8409, "token_acc": 0.29048740104516996 }, { "epoch": 4.929932571093521, "grad_norm": 0.2049980627739991, "learning_rate": 0.0004490576446678515, "loss": 3.073478937149048, "step": 8410, "token_acc": 0.28903153158865136 }, { "epoch": 4.93051890941073, "grad_norm": 0.1725936879650742, "learning_rate": 0.0004490429846162306, "loss": 3.0452475547790527, "step": 8411, "token_acc": 0.29442753053056475 }, { "epoch": 4.931105247727939, "grad_norm": 0.20511734453037678, "learning_rate": 0.00044902832269486906, "loss": 3.0509352684020996, "step": 8412, "token_acc": 0.2913506236053026 }, { "epoch": 4.931691586045148, "grad_norm": 0.19243723442620878, "learning_rate": 0.00044901365890390464, "loss": 3.032176971435547, "step": 8413, "token_acc": 0.2955386243829845 }, { "epoch": 4.932277924362357, "grad_norm": 0.17401087709542093, "learning_rate": 0.00044899899324347503, "loss": 3.02199649810791, "step": 8414, "token_acc": 0.2973910120494389 }, { "epoch": 4.9328642626795665, "grad_norm": 0.17387788225458967, "learning_rate": 0.0004489843257137181, "loss": 3.0637059211730957, "step": 8415, "token_acc": 0.2921510978688968 }, { "epoch": 4.933450600996775, "grad_norm": 0.15565234178560305, "learning_rate": 0.0004489696563147715, "loss": 3.0710649490356445, "step": 8416, "token_acc": 0.28911078527812323 }, { "epoch": 4.934036939313984, "grad_norm": 0.21407191867385925, "learning_rate": 0.0004489549850467731, "loss": 3.0545597076416016, "step": 8417, "token_acc": 0.29166995782057153 }, { "epoch": 4.934623277631193, "grad_norm": 0.18146528789328698, "learning_rate": 0.0004489403119098607, "loss": 3.027956962585449, "step": 8418, "token_acc": 0.29537296803898144 }, { "epoch": 4.935209615948402, "grad_norm": 0.1909178526414011, "learning_rate": 0.0004489256369041722, "loss": 3.0069451332092285, "step": 8419, "token_acc": 0.2975427972516595 }, { "epoch": 4.935795954265611, "grad_norm": 0.2680027144177886, "learning_rate": 0.00044891096002984534, "loss": 3.0462937355041504, "step": 8420, "token_acc": 0.2944131534789339 }, { "epoch": 4.93638229258282, "grad_norm": 0.18486298008565794, "learning_rate": 0.0004488962812870181, "loss": 3.0483810901641846, "step": 8421, "token_acc": 0.29285166267288854 }, { "epoch": 4.936968630900029, "grad_norm": 0.21398703856565707, "learning_rate": 0.0004488816006758283, "loss": 3.0646841526031494, "step": 8422, "token_acc": 0.29133070211546686 }, { "epoch": 4.9375549692172385, "grad_norm": 0.25834315137136266, "learning_rate": 0.0004488669181964138, "loss": 3.059004783630371, "step": 8423, "token_acc": 0.29103705860500606 }, { "epoch": 4.938141307534448, "grad_norm": 0.1938012713376633, "learning_rate": 0.0004488522338489126, "loss": 3.0573654174804688, "step": 8424, "token_acc": 0.2920598909903723 }, { "epoch": 4.938727645851657, "grad_norm": 0.27086501898896626, "learning_rate": 0.00044883754763346267, "loss": 3.051037311553955, "step": 8425, "token_acc": 0.2919254658385093 }, { "epoch": 4.939313984168866, "grad_norm": 0.1750272176683943, "learning_rate": 0.00044882285955020194, "loss": 3.026841163635254, "step": 8426, "token_acc": 0.29719359025237846 }, { "epoch": 4.939900322486075, "grad_norm": 0.21844577399094703, "learning_rate": 0.0004488081695992683, "loss": 3.041978597640991, "step": 8427, "token_acc": 0.2940970953961473 }, { "epoch": 4.940486660803283, "grad_norm": 0.21203858606336096, "learning_rate": 0.0004487934777807998, "loss": 3.0423355102539062, "step": 8428, "token_acc": 0.29265449095991763 }, { "epoch": 4.941072999120492, "grad_norm": 0.22009538051523297, "learning_rate": 0.0004487787840949344, "loss": 3.0473384857177734, "step": 8429, "token_acc": 0.29266988527228366 }, { "epoch": 4.941659337437701, "grad_norm": 0.16780131165031184, "learning_rate": 0.00044876408854181026, "loss": 3.0276124477386475, "step": 8430, "token_acc": 0.2964890381335818 }, { "epoch": 4.9422456757549105, "grad_norm": 0.22262253304883894, "learning_rate": 0.0004487493911215654, "loss": 3.0023674964904785, "step": 8431, "token_acc": 0.2978106923567745 }, { "epoch": 4.94283201407212, "grad_norm": 0.2524767640831319, "learning_rate": 0.00044873469183433776, "loss": 3.076324224472046, "step": 8432, "token_acc": 0.2874964148861678 }, { "epoch": 4.943418352389329, "grad_norm": 0.19902012390108612, "learning_rate": 0.00044871999068026545, "loss": 3.091171979904175, "step": 8433, "token_acc": 0.28730561742648714 }, { "epoch": 4.944004690706538, "grad_norm": 0.22287508172812212, "learning_rate": 0.0004487052876594867, "loss": 3.052544355392456, "step": 8434, "token_acc": 0.29219458412603183 }, { "epoch": 4.944591029023747, "grad_norm": 0.19229753311911305, "learning_rate": 0.0004486905827721395, "loss": 3.0671579837799072, "step": 8435, "token_acc": 0.29092119648288334 }, { "epoch": 4.945177367340956, "grad_norm": 0.19403177690163473, "learning_rate": 0.00044867587601836196, "loss": 3.0288949012756348, "step": 8436, "token_acc": 0.296413758206598 }, { "epoch": 4.945763705658165, "grad_norm": 0.16584018438217044, "learning_rate": 0.0004486611673982923, "loss": 2.999239444732666, "step": 8437, "token_acc": 0.2976825803127993 }, { "epoch": 4.946350043975373, "grad_norm": 0.17676784923185568, "learning_rate": 0.00044864645691206875, "loss": 3.0527517795562744, "step": 8438, "token_acc": 0.2905328926517861 }, { "epoch": 4.9469363822925825, "grad_norm": 0.17727265723119953, "learning_rate": 0.0004486317445598293, "loss": 3.0688486099243164, "step": 8439, "token_acc": 0.2888766490233483 }, { "epoch": 4.947522720609792, "grad_norm": 0.19900594804450059, "learning_rate": 0.00044861703034171233, "loss": 3.031952142715454, "step": 8440, "token_acc": 0.2955611812617907 }, { "epoch": 4.948109058927001, "grad_norm": 0.23370636817737342, "learning_rate": 0.00044860231425785605, "loss": 3.0551018714904785, "step": 8441, "token_acc": 0.29185408371892824 }, { "epoch": 4.94869539724421, "grad_norm": 0.17542344362997406, "learning_rate": 0.0004485875963083985, "loss": 3.0284972190856934, "step": 8442, "token_acc": 0.2941579600054223 }, { "epoch": 4.949281735561419, "grad_norm": 0.2165878522868259, "learning_rate": 0.0004485728764934782, "loss": 3.058781147003174, "step": 8443, "token_acc": 0.29124957260461326 }, { "epoch": 4.949868073878628, "grad_norm": 0.2795911496589915, "learning_rate": 0.0004485581548132333, "loss": 3.048804521560669, "step": 8444, "token_acc": 0.29366312420669494 }, { "epoch": 4.950454412195837, "grad_norm": 0.2341740046727812, "learning_rate": 0.00044854343126780205, "loss": 3.023752212524414, "step": 8445, "token_acc": 0.2948938825535808 }, { "epoch": 4.951040750513046, "grad_norm": 0.17603102858021652, "learning_rate": 0.00044852870585732285, "loss": 3.08232045173645, "step": 8446, "token_acc": 0.2891856876520713 }, { "epoch": 4.951627088830255, "grad_norm": 0.22981735979831966, "learning_rate": 0.000448513978581934, "loss": 3.035311222076416, "step": 8447, "token_acc": 0.2942813590740689 }, { "epoch": 4.9522134271474645, "grad_norm": 0.1935709290079015, "learning_rate": 0.00044849924944177376, "loss": 3.0725202560424805, "step": 8448, "token_acc": 0.2883824070325297 }, { "epoch": 4.952799765464674, "grad_norm": 0.19138312580724745, "learning_rate": 0.00044848451843698054, "loss": 3.099198579788208, "step": 8449, "token_acc": 0.28619535197200535 }, { "epoch": 4.953386103781882, "grad_norm": 0.1954819700702093, "learning_rate": 0.0004484697855676928, "loss": 3.038224458694458, "step": 8450, "token_acc": 0.2943162460930906 }, { "epoch": 4.953972442099091, "grad_norm": 0.18478407726279455, "learning_rate": 0.00044845505083404883, "loss": 3.0504660606384277, "step": 8451, "token_acc": 0.2914338553036333 }, { "epoch": 4.9545587804163, "grad_norm": 0.21982256431040356, "learning_rate": 0.0004484403142361871, "loss": 3.073812484741211, "step": 8452, "token_acc": 0.28880761549036393 }, { "epoch": 4.955145118733509, "grad_norm": 0.15150478860054567, "learning_rate": 0.000448425575774246, "loss": 3.0459184646606445, "step": 8453, "token_acc": 0.29251594992805025 }, { "epoch": 4.955731457050718, "grad_norm": 0.25279749122182954, "learning_rate": 0.000448410835448364, "loss": 3.0496554374694824, "step": 8454, "token_acc": 0.29421970768505423 }, { "epoch": 4.956317795367927, "grad_norm": 0.185920640670047, "learning_rate": 0.0004483960932586796, "loss": 3.043316125869751, "step": 8455, "token_acc": 0.2925231327408219 }, { "epoch": 4.9569041336851365, "grad_norm": 0.21280155707736984, "learning_rate": 0.00044838134920533113, "loss": 3.0720698833465576, "step": 8456, "token_acc": 0.2875225577555225 }, { "epoch": 4.957490472002346, "grad_norm": 0.1635273802547219, "learning_rate": 0.00044836660328845734, "loss": 3.001539468765259, "step": 8457, "token_acc": 0.3003304689305863 }, { "epoch": 4.958076810319555, "grad_norm": 0.23599160678515302, "learning_rate": 0.00044835185550819656, "loss": 3.0260205268859863, "step": 8458, "token_acc": 0.2951222933377665 }, { "epoch": 4.958663148636763, "grad_norm": 0.17374224714152364, "learning_rate": 0.00044833710586468734, "loss": 3.0334410667419434, "step": 8459, "token_acc": 0.29562775250132745 }, { "epoch": 4.959249486953972, "grad_norm": 0.18994841096833417, "learning_rate": 0.00044832235435806836, "loss": 3.0350823402404785, "step": 8460, "token_acc": 0.2939990953544578 }, { "epoch": 4.959835825271181, "grad_norm": 0.20373880902726077, "learning_rate": 0.000448307600988478, "loss": 3.0292789936065674, "step": 8461, "token_acc": 0.29658730675699146 }, { "epoch": 4.96042216358839, "grad_norm": 0.19160635095151796, "learning_rate": 0.000448292845756055, "loss": 3.0338363647460938, "step": 8462, "token_acc": 0.29387773853033056 }, { "epoch": 4.961008501905599, "grad_norm": 0.18814175247275405, "learning_rate": 0.00044827808866093795, "loss": 3.0172781944274902, "step": 8463, "token_acc": 0.2958845948978906 }, { "epoch": 4.9615948402228085, "grad_norm": 0.15790413504583187, "learning_rate": 0.00044826332970326546, "loss": 3.082944631576538, "step": 8464, "token_acc": 0.29010316783380147 }, { "epoch": 4.962181178540018, "grad_norm": 0.19753814863712896, "learning_rate": 0.0004482485688831761, "loss": 3.0133426189422607, "step": 8465, "token_acc": 0.2969017040230418 }, { "epoch": 4.962767516857227, "grad_norm": 0.1783821299043102, "learning_rate": 0.00044823380620080856, "loss": 3.0817551612854004, "step": 8466, "token_acc": 0.2869654310574562 }, { "epoch": 4.963353855174436, "grad_norm": 0.154776354743497, "learning_rate": 0.0004482190416563016, "loss": 3.047630786895752, "step": 8467, "token_acc": 0.292778070365666 }, { "epoch": 4.963940193491645, "grad_norm": 0.18721079148633007, "learning_rate": 0.0004482042752497937, "loss": 3.0521769523620605, "step": 8468, "token_acc": 0.2925576606157613 }, { "epoch": 4.964526531808854, "grad_norm": 0.16812589410650702, "learning_rate": 0.00044818950698142384, "loss": 3.0386338233947754, "step": 8469, "token_acc": 0.29484617263153695 }, { "epoch": 4.965112870126063, "grad_norm": 0.16126588716704487, "learning_rate": 0.00044817473685133057, "loss": 3.016763925552368, "step": 8470, "token_acc": 0.29731926951977494 }, { "epoch": 4.965699208443271, "grad_norm": 0.1861073486753079, "learning_rate": 0.0004481599648596528, "loss": 3.029355764389038, "step": 8471, "token_acc": 0.2959844679997769 }, { "epoch": 4.9662855467604805, "grad_norm": 0.15954852221429763, "learning_rate": 0.00044814519100652906, "loss": 2.9987335205078125, "step": 8472, "token_acc": 0.2990952068524661 }, { "epoch": 4.96687188507769, "grad_norm": 0.1508549938328051, "learning_rate": 0.0004481304152920983, "loss": 3.091033935546875, "step": 8473, "token_acc": 0.2885059395234034 }, { "epoch": 4.967458223394899, "grad_norm": 0.1940092176792674, "learning_rate": 0.0004481156377164993, "loss": 3.0040957927703857, "step": 8474, "token_acc": 0.2998250481654207 }, { "epoch": 4.968044561712108, "grad_norm": 0.15989476810568515, "learning_rate": 0.00044810085827987084, "loss": 3.0267391204833984, "step": 8475, "token_acc": 0.2978285471043834 }, { "epoch": 4.968630900029317, "grad_norm": 0.1607073915134726, "learning_rate": 0.00044808607698235175, "loss": 3.027008295059204, "step": 8476, "token_acc": 0.29731806510860803 }, { "epoch": 4.969217238346526, "grad_norm": 0.18141320557326815, "learning_rate": 0.0004480712938240809, "loss": 3.039196491241455, "step": 8477, "token_acc": 0.29435533621221466 }, { "epoch": 4.969803576663735, "grad_norm": 0.16751714163675135, "learning_rate": 0.0004480565088051971, "loss": 3.0556774139404297, "step": 8478, "token_acc": 0.2915108168875951 }, { "epoch": 4.970389914980944, "grad_norm": 0.19943968867100728, "learning_rate": 0.00044804172192583936, "loss": 3.064584732055664, "step": 8479, "token_acc": 0.2908790668259434 }, { "epoch": 4.970976253298153, "grad_norm": 0.20134586599079454, "learning_rate": 0.00044802693318614644, "loss": 3.0471487045288086, "step": 8480, "token_acc": 0.2918769141776977 }, { "epoch": 4.971562591615362, "grad_norm": 0.22889246757065373, "learning_rate": 0.0004480121425862574, "loss": 3.017061710357666, "step": 8481, "token_acc": 0.29851934322518886 }, { "epoch": 4.972148929932571, "grad_norm": 0.251719543799185, "learning_rate": 0.00044799735012631103, "loss": 3.0218594074249268, "step": 8482, "token_acc": 0.29698731737039635 }, { "epoch": 4.97273526824978, "grad_norm": 0.31610246255507385, "learning_rate": 0.0004479825558064464, "loss": 3.0540881156921387, "step": 8483, "token_acc": 0.2905552298281024 }, { "epoch": 4.973321606566989, "grad_norm": 0.31995209516591117, "learning_rate": 0.00044796775962680245, "loss": 3.0387189388275146, "step": 8484, "token_acc": 0.29417174238121785 }, { "epoch": 4.973907944884198, "grad_norm": 0.19001130653655088, "learning_rate": 0.00044795296158751816, "loss": 3.0949974060058594, "step": 8485, "token_acc": 0.28550962762369864 }, { "epoch": 4.974494283201407, "grad_norm": 0.338893150316457, "learning_rate": 0.0004479381616887325, "loss": 3.0726876258850098, "step": 8486, "token_acc": 0.2897717698601412 }, { "epoch": 4.975080621518616, "grad_norm": 0.2488511223537283, "learning_rate": 0.0004479233599305846, "loss": 3.054105758666992, "step": 8487, "token_acc": 0.2928103917294209 }, { "epoch": 4.975666959835825, "grad_norm": 0.2031860193183852, "learning_rate": 0.0004479085563132134, "loss": 3.0231683254241943, "step": 8488, "token_acc": 0.2981127383255043 }, { "epoch": 4.9762532981530345, "grad_norm": 0.19638629244786235, "learning_rate": 0.000447893750836758, "loss": 3.010496139526367, "step": 8489, "token_acc": 0.29813276712670955 }, { "epoch": 4.976839636470244, "grad_norm": 0.20089546929108917, "learning_rate": 0.00044787894350135747, "loss": 3.0292534828186035, "step": 8490, "token_acc": 0.29662090238766775 }, { "epoch": 4.977425974787453, "grad_norm": 0.1738929127512271, "learning_rate": 0.00044786413430715087, "loss": 3.0659685134887695, "step": 8491, "token_acc": 0.29235555749119435 }, { "epoch": 4.978012313104662, "grad_norm": 0.23466258092596798, "learning_rate": 0.0004478493232542774, "loss": 2.99967098236084, "step": 8492, "token_acc": 0.2996582449056971 }, { "epoch": 4.97859865142187, "grad_norm": 0.1971653290746943, "learning_rate": 0.0004478345103428761, "loss": 3.011901617050171, "step": 8493, "token_acc": 0.2988665145870601 }, { "epoch": 4.979184989739079, "grad_norm": 0.19890948915571374, "learning_rate": 0.00044781969557308634, "loss": 3.0285441875457764, "step": 8494, "token_acc": 0.29512861079922426 }, { "epoch": 4.979771328056288, "grad_norm": 0.18631944461211597, "learning_rate": 0.00044780487894504695, "loss": 2.9871339797973633, "step": 8495, "token_acc": 0.3008657274693057 }, { "epoch": 4.980357666373497, "grad_norm": 0.19414259449493296, "learning_rate": 0.00044779006045889727, "loss": 3.06040358543396, "step": 8496, "token_acc": 0.29125743828827144 }, { "epoch": 4.9809440046907065, "grad_norm": 0.18315458347144067, "learning_rate": 0.0004477752401147765, "loss": 3.048372983932495, "step": 8497, "token_acc": 0.29278189782679964 }, { "epoch": 4.981530343007916, "grad_norm": 0.19955835358100418, "learning_rate": 0.0004477604179128238, "loss": 3.068040370941162, "step": 8498, "token_acc": 0.29015434546668895 }, { "epoch": 4.982116681325125, "grad_norm": 0.20553149716816185, "learning_rate": 0.0004477455938531786, "loss": 3.0309255123138428, "step": 8499, "token_acc": 0.29595542198847985 }, { "epoch": 4.982703019642334, "grad_norm": 0.2061509054318814, "learning_rate": 0.00044773076793597997, "loss": 3.1177492141723633, "step": 8500, "token_acc": 0.28390821851157877 }, { "epoch": 4.983289357959543, "grad_norm": 0.1966155042669467, "learning_rate": 0.00044771594016136717, "loss": 3.032310724258423, "step": 8501, "token_acc": 0.2932850585318934 }, { "epoch": 4.983875696276751, "grad_norm": 0.19039385888055835, "learning_rate": 0.0004477011105294796, "loss": 3.033277988433838, "step": 8502, "token_acc": 0.2946482450294638 }, { "epoch": 4.98446203459396, "grad_norm": 0.22163328495939572, "learning_rate": 0.00044768627904045647, "loss": 3.1107263565063477, "step": 8503, "token_acc": 0.2842375717017208 }, { "epoch": 4.985048372911169, "grad_norm": 0.16008311321290122, "learning_rate": 0.00044767144569443705, "loss": 3.102465867996216, "step": 8504, "token_acc": 0.28646643658371573 }, { "epoch": 4.9856347112283785, "grad_norm": 0.1812685556242146, "learning_rate": 0.0004476566104915609, "loss": 3.0310678482055664, "step": 8505, "token_acc": 0.29558499180972775 }, { "epoch": 4.986221049545588, "grad_norm": 0.1606980837395255, "learning_rate": 0.00044764177343196716, "loss": 3.022752523422241, "step": 8506, "token_acc": 0.2974833614882055 }, { "epoch": 4.986807387862797, "grad_norm": 0.1745925139035418, "learning_rate": 0.0004476269345157953, "loss": 3.089613437652588, "step": 8507, "token_acc": 0.2873136485215372 }, { "epoch": 4.987393726180006, "grad_norm": 0.17011879919816955, "learning_rate": 0.00044761209374318467, "loss": 3.005892515182495, "step": 8508, "token_acc": 0.298244565739805 }, { "epoch": 4.987980064497215, "grad_norm": 0.17569017120956915, "learning_rate": 0.00044759725111427476, "loss": 3.094682455062866, "step": 8509, "token_acc": 0.28639099286793285 }, { "epoch": 4.988566402814424, "grad_norm": 0.21673600884941047, "learning_rate": 0.0004475824066292049, "loss": 3.0469343662261963, "step": 8510, "token_acc": 0.2944642465525396 }, { "epoch": 4.989152741131633, "grad_norm": 0.21086889051491386, "learning_rate": 0.00044756756028811463, "loss": 3.049027919769287, "step": 8511, "token_acc": 0.29565028250635705 }, { "epoch": 4.989739079448842, "grad_norm": 0.18365389741113794, "learning_rate": 0.00044755271209114336, "loss": 3.021958827972412, "step": 8512, "token_acc": 0.2965382455006425 }, { "epoch": 4.990325417766051, "grad_norm": 0.23840216935179523, "learning_rate": 0.0004475378620384305, "loss": 3.031259059906006, "step": 8513, "token_acc": 0.29362008254136535 }, { "epoch": 4.99091175608326, "grad_norm": 0.21317469902881198, "learning_rate": 0.00044752301013011557, "loss": 3.0104594230651855, "step": 8514, "token_acc": 0.2964538236458325 }, { "epoch": 4.991498094400469, "grad_norm": 0.16443007184496214, "learning_rate": 0.0004475081563663382, "loss": 3.0685951709747314, "step": 8515, "token_acc": 0.2900460483228155 }, { "epoch": 4.992084432717678, "grad_norm": 0.24284295559354518, "learning_rate": 0.00044749330074723786, "loss": 3.0139474868774414, "step": 8516, "token_acc": 0.2965865043971158 }, { "epoch": 4.992670771034887, "grad_norm": 0.22586884255268, "learning_rate": 0.00044747844327295406, "loss": 3.054715871810913, "step": 8517, "token_acc": 0.29274067784155394 }, { "epoch": 4.993257109352096, "grad_norm": 0.17839396651015887, "learning_rate": 0.0004474635839436264, "loss": 3.0711936950683594, "step": 8518, "token_acc": 0.29162238867032214 }, { "epoch": 4.993843447669305, "grad_norm": 0.24230121325702328, "learning_rate": 0.0004474487227593944, "loss": 3.076457977294922, "step": 8519, "token_acc": 0.2886245466571637 }, { "epoch": 4.994429785986514, "grad_norm": 0.26819486543111354, "learning_rate": 0.00044743385972039786, "loss": 3.0565733909606934, "step": 8520, "token_acc": 0.29193786090030366 }, { "epoch": 4.995016124303723, "grad_norm": 0.23128099413474382, "learning_rate": 0.0004474189948267761, "loss": 3.0763425827026367, "step": 8521, "token_acc": 0.2890938324007975 }, { "epoch": 4.9956024626209325, "grad_norm": 0.18968856147067403, "learning_rate": 0.00044740412807866897, "loss": 3.008232831954956, "step": 8522, "token_acc": 0.2992816946993663 }, { "epoch": 4.996188800938142, "grad_norm": 0.21347237809077374, "learning_rate": 0.00044738925947621603, "loss": 3.0376553535461426, "step": 8523, "token_acc": 0.29335778002516966 }, { "epoch": 4.99677513925535, "grad_norm": 0.22796372417114907, "learning_rate": 0.0004473743890195571, "loss": 3.0593647956848145, "step": 8524, "token_acc": 0.2915933351974604 }, { "epoch": 4.997361477572559, "grad_norm": 0.18002496742290655, "learning_rate": 0.0004473595167088316, "loss": 3.0697476863861084, "step": 8525, "token_acc": 0.2898295616768251 }, { "epoch": 4.997947815889768, "grad_norm": 0.18725066759491407, "learning_rate": 0.00044734464254417945, "loss": 3.061734676361084, "step": 8526, "token_acc": 0.2913460570047336 }, { "epoch": 4.998534154206977, "grad_norm": 0.20795528649445905, "learning_rate": 0.0004473297665257403, "loss": 3.073535442352295, "step": 8527, "token_acc": 0.28885737470338124 }, { "epoch": 4.999120492524186, "grad_norm": 0.1965688890500879, "learning_rate": 0.0004473148886536539, "loss": 3.06752872467041, "step": 8528, "token_acc": 0.29109355184860014 }, { "epoch": 4.999706830841395, "grad_norm": 0.21026564864050137, "learning_rate": 0.00044730000892806, "loss": 3.078951597213745, "step": 8529, "token_acc": 0.2876585085842509 }, { "epoch": 5.0, "grad_norm": 0.2651592235412727, "learning_rate": 0.00044728512734909845, "loss": 3.0628275871276855, "step": 8530, "token_acc": 0.2918714146552657 }, { "epoch": 5.0, "eval_loss": 3.0703072547912598, "eval_runtime": 6.5296, "eval_samples_per_second": 39.206, "eval_steps_per_second": 4.901, "eval_token_acc": 0.29058256050797504, "step": 8530 }, { "epoch": 5.000586338317209, "grad_norm": 0.21651653435947588, "learning_rate": 0.00044727024391690885, "loss": 2.9531311988830566, "step": 8531, "token_acc": 0.3049516339869281 }, { "epoch": 5.001172676634418, "grad_norm": 0.22572002856123086, "learning_rate": 0.00044725535863163125, "loss": 2.9991421699523926, "step": 8532, "token_acc": 0.2985009804482495 }, { "epoch": 5.001759014951627, "grad_norm": 0.24281064474440098, "learning_rate": 0.0004472404714934053, "loss": 3.0120253562927246, "step": 8533, "token_acc": 0.29733722155780185 }, { "epoch": 5.0023453532688364, "grad_norm": 0.21846299797166016, "learning_rate": 0.00044722558250237087, "loss": 3.0014803409576416, "step": 8534, "token_acc": 0.2964582164818813 }, { "epoch": 5.002931691586046, "grad_norm": 0.26891261717815335, "learning_rate": 0.0004472106916586679, "loss": 3.0168392658233643, "step": 8535, "token_acc": 0.29485815513858293 }, { "epoch": 5.003518029903254, "grad_norm": 0.29370236936690214, "learning_rate": 0.00044719579896243625, "loss": 2.9636363983154297, "step": 8536, "token_acc": 0.30344725248297355 }, { "epoch": 5.004104368220463, "grad_norm": 0.23663267126185183, "learning_rate": 0.00044718090441381574, "loss": 2.9611024856567383, "step": 8537, "token_acc": 0.3056797550680636 }, { "epoch": 5.004690706537672, "grad_norm": 0.23502856030757432, "learning_rate": 0.00044716600801294635, "loss": 2.9888916015625, "step": 8538, "token_acc": 0.2995657480357863 }, { "epoch": 5.005277044854881, "grad_norm": 0.2785406970987534, "learning_rate": 0.0004471511097599681, "loss": 2.949319839477539, "step": 8539, "token_acc": 0.30699204811367703 }, { "epoch": 5.00586338317209, "grad_norm": 0.2104951929700274, "learning_rate": 0.0004471362096550207, "loss": 2.9499707221984863, "step": 8540, "token_acc": 0.30632604347393666 }, { "epoch": 5.006449721489299, "grad_norm": 0.23910648249618885, "learning_rate": 0.0004471213076982443, "loss": 3.025766611099243, "step": 8541, "token_acc": 0.29633677725319163 }, { "epoch": 5.0070360598065085, "grad_norm": 0.18387754474718215, "learning_rate": 0.0004471064038897789, "loss": 2.986368417739868, "step": 8542, "token_acc": 0.2996799440574487 }, { "epoch": 5.007622398123718, "grad_norm": 0.2511605384709285, "learning_rate": 0.00044709149822976435, "loss": 2.977336883544922, "step": 8543, "token_acc": 0.30226935850791464 }, { "epoch": 5.008208736440927, "grad_norm": 0.17679463789528796, "learning_rate": 0.00044707659071834086, "loss": 2.9798312187194824, "step": 8544, "token_acc": 0.3011056879708022 }, { "epoch": 5.008795074758136, "grad_norm": 0.23623273149663992, "learning_rate": 0.0004470616813556483, "loss": 2.9449620246887207, "step": 8545, "token_acc": 0.30611298525260144 }, { "epoch": 5.009381413075345, "grad_norm": 0.1718111370840815, "learning_rate": 0.00044704677014182676, "loss": 2.9174575805664062, "step": 8546, "token_acc": 0.3086948525982212 }, { "epoch": 5.009967751392553, "grad_norm": 0.20379763075645269, "learning_rate": 0.00044703185707701637, "loss": 2.99887752532959, "step": 8547, "token_acc": 0.2984795981054149 }, { "epoch": 5.010554089709762, "grad_norm": 0.1741586693323542, "learning_rate": 0.0004470169421613572, "loss": 2.9578797817230225, "step": 8548, "token_acc": 0.3043419157340528 }, { "epoch": 5.011140428026971, "grad_norm": 0.19970138004436996, "learning_rate": 0.00044700202539498933, "loss": 2.9282913208007812, "step": 8549, "token_acc": 0.30906297369479363 }, { "epoch": 5.0117267663441805, "grad_norm": 0.1728795553689899, "learning_rate": 0.00044698710677805285, "loss": 2.9607439041137695, "step": 8550, "token_acc": 0.30377471854248167 }, { "epoch": 5.01231310466139, "grad_norm": 0.21480135240567783, "learning_rate": 0.00044697218631068803, "loss": 3.0034103393554688, "step": 8551, "token_acc": 0.2972327044025157 }, { "epoch": 5.012899442978599, "grad_norm": 0.1783247378388419, "learning_rate": 0.0004469572639930349, "loss": 3.0158700942993164, "step": 8552, "token_acc": 0.2955437111047521 }, { "epoch": 5.013485781295808, "grad_norm": 0.20283435136031588, "learning_rate": 0.0004469423398252337, "loss": 2.9621500968933105, "step": 8553, "token_acc": 0.3042521271294611 }, { "epoch": 5.014072119613017, "grad_norm": 0.19167515732003332, "learning_rate": 0.00044692741380742454, "loss": 2.9783835411071777, "step": 8554, "token_acc": 0.3015620935613269 }, { "epoch": 5.014658457930226, "grad_norm": 0.26863259409179513, "learning_rate": 0.00044691248593974774, "loss": 2.9493565559387207, "step": 8555, "token_acc": 0.30656807751147375 }, { "epoch": 5.015244796247435, "grad_norm": 0.2375637162298047, "learning_rate": 0.00044689755622234344, "loss": 2.97379732131958, "step": 8556, "token_acc": 0.301923826855583 }, { "epoch": 5.015831134564644, "grad_norm": 0.1840828937392791, "learning_rate": 0.000446882624655352, "loss": 2.9432685375213623, "step": 8557, "token_acc": 0.3075826990368313 }, { "epoch": 5.0164174728818525, "grad_norm": 0.2436454050862875, "learning_rate": 0.00044686769123891354, "loss": 3.0069971084594727, "step": 8558, "token_acc": 0.29715413054960077 }, { "epoch": 5.017003811199062, "grad_norm": 0.1661001629265361, "learning_rate": 0.0004468527559731684, "loss": 2.9651999473571777, "step": 8559, "token_acc": 0.3025444503214257 }, { "epoch": 5.017590149516271, "grad_norm": 0.1949425722001224, "learning_rate": 0.0004468378188582569, "loss": 2.979250907897949, "step": 8560, "token_acc": 0.30091147694872367 }, { "epoch": 5.01817648783348, "grad_norm": 0.1913962085446768, "learning_rate": 0.00044682287989431934, "loss": 2.978869915008545, "step": 8561, "token_acc": 0.3041833384290215 }, { "epoch": 5.018762826150689, "grad_norm": 0.19531430551856582, "learning_rate": 0.00044680793908149596, "loss": 3.003464698791504, "step": 8562, "token_acc": 0.2973874303408747 }, { "epoch": 5.019349164467898, "grad_norm": 0.20629626729860764, "learning_rate": 0.0004467929964199273, "loss": 2.987518310546875, "step": 8563, "token_acc": 0.2981450631628971 }, { "epoch": 5.019935502785107, "grad_norm": 0.20383607435168705, "learning_rate": 0.0004467780519097536, "loss": 2.96308970451355, "step": 8564, "token_acc": 0.3037341846218325 }, { "epoch": 5.020521841102316, "grad_norm": 0.17526559712721163, "learning_rate": 0.00044676310555111524, "loss": 2.9877824783325195, "step": 8565, "token_acc": 0.29959882726096576 }, { "epoch": 5.021108179419525, "grad_norm": 0.19633770639790632, "learning_rate": 0.0004467481573441527, "loss": 2.978888750076294, "step": 8566, "token_acc": 0.3016042345069544 }, { "epoch": 5.0216945177367345, "grad_norm": 0.1774046697756081, "learning_rate": 0.0004467332072890062, "loss": 2.9549670219421387, "step": 8567, "token_acc": 0.30421705034151275 }, { "epoch": 5.022280856053943, "grad_norm": 0.16027953312480472, "learning_rate": 0.0004467182553858165, "loss": 3.004926919937134, "step": 8568, "token_acc": 0.29784870559935295 }, { "epoch": 5.022867194371152, "grad_norm": 0.18253541399362658, "learning_rate": 0.0004467033016347238, "loss": 2.9257240295410156, "step": 8569, "token_acc": 0.3101169147820566 }, { "epoch": 5.023453532688361, "grad_norm": 0.16859814205712167, "learning_rate": 0.0004466883460358686, "loss": 2.9686279296875, "step": 8570, "token_acc": 0.30419109774129405 }, { "epoch": 5.02403987100557, "grad_norm": 0.19645762569258512, "learning_rate": 0.0004466733885893915, "loss": 2.975919723510742, "step": 8571, "token_acc": 0.3015831839971624 }, { "epoch": 5.024626209322779, "grad_norm": 0.2081157755620637, "learning_rate": 0.00044665842929543287, "loss": 2.998379707336426, "step": 8572, "token_acc": 0.29943394340486196 }, { "epoch": 5.025212547639988, "grad_norm": 0.20566448724773456, "learning_rate": 0.0004466434681541334, "loss": 2.9780659675598145, "step": 8573, "token_acc": 0.3004716055249646 }, { "epoch": 5.025798885957197, "grad_norm": 0.2012513031776449, "learning_rate": 0.0004466285051656334, "loss": 2.9635045528411865, "step": 8574, "token_acc": 0.3028517599995959 }, { "epoch": 5.0263852242744065, "grad_norm": 0.1864288736354299, "learning_rate": 0.0004466135403300736, "loss": 2.9872422218322754, "step": 8575, "token_acc": 0.30066036079759606 }, { "epoch": 5.026971562591616, "grad_norm": 0.19095984973709618, "learning_rate": 0.0004465985736475946, "loss": 2.9934439659118652, "step": 8576, "token_acc": 0.29861516313556624 }, { "epoch": 5.027557900908825, "grad_norm": 0.25724294608878046, "learning_rate": 0.0004465836051183368, "loss": 2.990786552429199, "step": 8577, "token_acc": 0.3000929178590243 }, { "epoch": 5.028144239226034, "grad_norm": 0.24779323184876134, "learning_rate": 0.000446568634742441, "loss": 2.9668638706207275, "step": 8578, "token_acc": 0.3024170853757125 }, { "epoch": 5.028730577543242, "grad_norm": 0.21021481836487746, "learning_rate": 0.00044655366252004775, "loss": 2.95998477935791, "step": 8579, "token_acc": 0.30392259771068086 }, { "epoch": 5.029316915860451, "grad_norm": 0.18225132934618143, "learning_rate": 0.00044653868845129767, "loss": 2.9551820755004883, "step": 8580, "token_acc": 0.303647634650001 }, { "epoch": 5.02990325417766, "grad_norm": 0.23513417208296597, "learning_rate": 0.0004465237125363315, "loss": 2.8932971954345703, "step": 8581, "token_acc": 0.315813628445965 }, { "epoch": 5.030489592494869, "grad_norm": 0.18365637324019077, "learning_rate": 0.00044650873477528985, "loss": 3.0181431770324707, "step": 8582, "token_acc": 0.29653990699712335 }, { "epoch": 5.0310759308120785, "grad_norm": 0.23236298813488257, "learning_rate": 0.0004464937551683134, "loss": 2.992175579071045, "step": 8583, "token_acc": 0.30067313073048946 }, { "epoch": 5.031662269129288, "grad_norm": 0.21459267018536887, "learning_rate": 0.0004464787737155429, "loss": 2.9586470127105713, "step": 8584, "token_acc": 0.30397193004605555 }, { "epoch": 5.032248607446497, "grad_norm": 0.19500355535004943, "learning_rate": 0.00044646379041711915, "loss": 2.9836082458496094, "step": 8585, "token_acc": 0.29986052261752555 }, { "epoch": 5.032834945763706, "grad_norm": 0.2225562938573435, "learning_rate": 0.0004464488052731828, "loss": 2.9796080589294434, "step": 8586, "token_acc": 0.30089126745311995 }, { "epoch": 5.033421284080915, "grad_norm": 0.16930396836052575, "learning_rate": 0.0004464338182838746, "loss": 2.990751266479492, "step": 8587, "token_acc": 0.2982945003545349 }, { "epoch": 5.034007622398124, "grad_norm": 0.20601011127730254, "learning_rate": 0.0004464188294493354, "loss": 2.994412899017334, "step": 8588, "token_acc": 0.29891394845418046 }, { "epoch": 5.034593960715333, "grad_norm": 0.18553739779654535, "learning_rate": 0.000446403838769706, "loss": 2.961716651916504, "step": 8589, "token_acc": 0.30247082671341696 }, { "epoch": 5.035180299032541, "grad_norm": 0.16914231102705346, "learning_rate": 0.00044638884624512723, "loss": 2.9398536682128906, "step": 8590, "token_acc": 0.30648004064911555 }, { "epoch": 5.0357666373497505, "grad_norm": 0.1904742495773429, "learning_rate": 0.0004463738518757398, "loss": 2.9772722721099854, "step": 8591, "token_acc": 0.3009807586760951 }, { "epoch": 5.03635297566696, "grad_norm": 0.16086845018883977, "learning_rate": 0.0004463588556616847, "loss": 2.981649398803711, "step": 8592, "token_acc": 0.2995121369542 }, { "epoch": 5.036939313984169, "grad_norm": 0.19569017217664667, "learning_rate": 0.0004463438576031027, "loss": 2.9715065956115723, "step": 8593, "token_acc": 0.3019302854957473 }, { "epoch": 5.037525652301378, "grad_norm": 0.18410594082517043, "learning_rate": 0.0004463288577001347, "loss": 3.00132417678833, "step": 8594, "token_acc": 0.29803745560725103 }, { "epoch": 5.038111990618587, "grad_norm": 0.16008511298314984, "learning_rate": 0.0004463138559529217, "loss": 2.993732452392578, "step": 8595, "token_acc": 0.2983107098316894 }, { "epoch": 5.038698328935796, "grad_norm": 0.19574003776501467, "learning_rate": 0.0004462988523616046, "loss": 2.980804920196533, "step": 8596, "token_acc": 0.2987962090813361 }, { "epoch": 5.039284667253005, "grad_norm": 0.18464562336370913, "learning_rate": 0.0004462838469263242, "loss": 2.982194423675537, "step": 8597, "token_acc": 0.2996981998824849 }, { "epoch": 5.039871005570214, "grad_norm": 0.1571993673061543, "learning_rate": 0.00044626883964722164, "loss": 2.926562786102295, "step": 8598, "token_acc": 0.30940352355891787 }, { "epoch": 5.040457343887423, "grad_norm": 0.1944283114064994, "learning_rate": 0.0004462538305244378, "loss": 2.9606873989105225, "step": 8599, "token_acc": 0.3037853127984594 }, { "epoch": 5.0410436822046325, "grad_norm": 0.2092103508225096, "learning_rate": 0.00044623881955811365, "loss": 2.959688186645508, "step": 8600, "token_acc": 0.3035425635982235 }, { "epoch": 5.041630020521841, "grad_norm": 0.17326912832992516, "learning_rate": 0.00044622380674839025, "loss": 2.9784727096557617, "step": 8601, "token_acc": 0.3003358461886079 }, { "epoch": 5.04221635883905, "grad_norm": 0.19295960945849924, "learning_rate": 0.00044620879209540855, "loss": 2.9652857780456543, "step": 8602, "token_acc": 0.3036766155582984 }, { "epoch": 5.042802697156259, "grad_norm": 0.20865610294849113, "learning_rate": 0.0004461937755993096, "loss": 2.995967388153076, "step": 8603, "token_acc": 0.29896122206856574 }, { "epoch": 5.043389035473468, "grad_norm": 0.20856190984697506, "learning_rate": 0.0004461787572602346, "loss": 2.997234344482422, "step": 8604, "token_acc": 0.298446230426761 }, { "epoch": 5.043975373790677, "grad_norm": 0.1700081823953103, "learning_rate": 0.00044616373707832455, "loss": 2.971435546875, "step": 8605, "token_acc": 0.3016519742203126 }, { "epoch": 5.044561712107886, "grad_norm": 0.16944168943075683, "learning_rate": 0.00044614871505372044, "loss": 2.975215196609497, "step": 8606, "token_acc": 0.30201850944806785 }, { "epoch": 5.045148050425095, "grad_norm": 0.20181168820853396, "learning_rate": 0.00044613369118656353, "loss": 2.976470947265625, "step": 8607, "token_acc": 0.3008271029725142 }, { "epoch": 5.0457343887423045, "grad_norm": 0.2714293895545918, "learning_rate": 0.00044611866547699487, "loss": 3.0086488723754883, "step": 8608, "token_acc": 0.2963525163185124 }, { "epoch": 5.046320727059514, "grad_norm": 0.22079845886327976, "learning_rate": 0.0004461036379251556, "loss": 2.9424355030059814, "step": 8609, "token_acc": 0.30691079574252406 }, { "epoch": 5.046907065376723, "grad_norm": 0.1842420869462325, "learning_rate": 0.00044608860853118695, "loss": 2.938812017440796, "step": 8610, "token_acc": 0.3072638157999543 }, { "epoch": 5.047493403693931, "grad_norm": 0.18522440045263966, "learning_rate": 0.00044607357729522997, "loss": 2.9694409370422363, "step": 8611, "token_acc": 0.3023615559217686 }, { "epoch": 5.04807974201114, "grad_norm": 0.23819309698712512, "learning_rate": 0.000446058544217426, "loss": 2.9925127029418945, "step": 8612, "token_acc": 0.29953019161161826 }, { "epoch": 5.048666080328349, "grad_norm": 0.22080586186445192, "learning_rate": 0.0004460435092979162, "loss": 2.9974308013916016, "step": 8613, "token_acc": 0.2993419714827196 }, { "epoch": 5.049252418645558, "grad_norm": 0.19473636781924344, "learning_rate": 0.00044602847253684175, "loss": 2.9720683097839355, "step": 8614, "token_acc": 0.3031263901676995 }, { "epoch": 5.049838756962767, "grad_norm": 0.2770266735643184, "learning_rate": 0.000446013433934344, "loss": 2.958238124847412, "step": 8615, "token_acc": 0.3034786927370683 }, { "epoch": 5.0504250952799765, "grad_norm": 0.16672914554842258, "learning_rate": 0.0004459983934905642, "loss": 2.960379123687744, "step": 8616, "token_acc": 0.3038039766604258 }, { "epoch": 5.051011433597186, "grad_norm": 0.219613956351533, "learning_rate": 0.0004459833512056436, "loss": 2.944220781326294, "step": 8617, "token_acc": 0.3077087648911027 }, { "epoch": 5.051597771914395, "grad_norm": 0.20154021212726264, "learning_rate": 0.00044596830707972345, "loss": 2.962651491165161, "step": 8618, "token_acc": 0.3034662593922553 }, { "epoch": 5.052184110231604, "grad_norm": 0.22355544683121295, "learning_rate": 0.00044595326111294514, "loss": 2.9767870903015137, "step": 8619, "token_acc": 0.30070841073947596 }, { "epoch": 5.052770448548813, "grad_norm": 0.23896038261285166, "learning_rate": 0.00044593821330545, "loss": 2.9746227264404297, "step": 8620, "token_acc": 0.3017857050106737 }, { "epoch": 5.053356786866022, "grad_norm": 0.17937357659973965, "learning_rate": 0.0004459231636573794, "loss": 2.9359049797058105, "step": 8621, "token_acc": 0.30662470039498685 }, { "epoch": 5.05394312518323, "grad_norm": 0.21005904788769492, "learning_rate": 0.0004459081121688747, "loss": 2.97896146774292, "step": 8622, "token_acc": 0.3007855594263292 }, { "epoch": 5.054529463500439, "grad_norm": 0.1868839888522517, "learning_rate": 0.00044589305884007723, "loss": 2.9834413528442383, "step": 8623, "token_acc": 0.29919687476739143 }, { "epoch": 5.0551158018176485, "grad_norm": 0.2396666052058184, "learning_rate": 0.0004458780036711285, "loss": 3.0094852447509766, "step": 8624, "token_acc": 0.2980510122628723 }, { "epoch": 5.055702140134858, "grad_norm": 0.17911013713198776, "learning_rate": 0.00044586294666216976, "loss": 3.035132884979248, "step": 8625, "token_acc": 0.2944158102275903 }, { "epoch": 5.056288478452067, "grad_norm": 0.2439051766555579, "learning_rate": 0.0004458478878133426, "loss": 2.957828998565674, "step": 8626, "token_acc": 0.30341192585532517 }, { "epoch": 5.056874816769276, "grad_norm": 0.17113305704283974, "learning_rate": 0.00044583282712478854, "loss": 2.998318672180176, "step": 8627, "token_acc": 0.2997714996863221 }, { "epoch": 5.057461155086485, "grad_norm": 0.24652215906221567, "learning_rate": 0.00044581776459664884, "loss": 2.976487636566162, "step": 8628, "token_acc": 0.30149553300775755 }, { "epoch": 5.058047493403694, "grad_norm": 0.17612599844843735, "learning_rate": 0.00044580270022906524, "loss": 2.991102933883667, "step": 8629, "token_acc": 0.2987881999270417 }, { "epoch": 5.058633831720903, "grad_norm": 0.21805217067953936, "learning_rate": 0.00044578763402217906, "loss": 2.986628532409668, "step": 8630, "token_acc": 0.2996015498337501 }, { "epoch": 5.059220170038112, "grad_norm": 0.1829586784045076, "learning_rate": 0.0004457725659761318, "loss": 2.9965732097625732, "step": 8631, "token_acc": 0.29890647935364 }, { "epoch": 5.059806508355321, "grad_norm": 0.18973636372920044, "learning_rate": 0.0004457574960910652, "loss": 2.953490734100342, "step": 8632, "token_acc": 0.30494309777131856 }, { "epoch": 5.06039284667253, "grad_norm": 0.2078350464978439, "learning_rate": 0.00044574242436712066, "loss": 2.955385446548462, "step": 8633, "token_acc": 0.30463890353189244 }, { "epoch": 5.060979184989739, "grad_norm": 0.18312587414734494, "learning_rate": 0.00044572735080443984, "loss": 2.985891103744507, "step": 8634, "token_acc": 0.2997699762434481 }, { "epoch": 5.061565523306948, "grad_norm": 0.18014611302782282, "learning_rate": 0.00044571227540316427, "loss": 2.977421760559082, "step": 8635, "token_acc": 0.3014093044400667 }, { "epoch": 5.062151861624157, "grad_norm": 0.15619999693082243, "learning_rate": 0.0004456971981634356, "loss": 2.956023693084717, "step": 8636, "token_acc": 0.3049728543525121 }, { "epoch": 5.062738199941366, "grad_norm": 0.2371325572934786, "learning_rate": 0.00044568211908539544, "loss": 2.952258825302124, "step": 8637, "token_acc": 0.30422057877647385 }, { "epoch": 5.063324538258575, "grad_norm": 0.1941031941816454, "learning_rate": 0.00044566703816918555, "loss": 2.9754738807678223, "step": 8638, "token_acc": 0.30187768446343644 }, { "epoch": 5.063910876575784, "grad_norm": 0.1588576861776656, "learning_rate": 0.0004456519554149474, "loss": 2.9642601013183594, "step": 8639, "token_acc": 0.3041995697475097 }, { "epoch": 5.064497214892993, "grad_norm": 0.20147915381564294, "learning_rate": 0.0004456368708228228, "loss": 3.0021042823791504, "step": 8640, "token_acc": 0.29708337053747286 }, { "epoch": 5.0650835532102025, "grad_norm": 0.17684925628371811, "learning_rate": 0.0004456217843929534, "loss": 3.0264391899108887, "step": 8641, "token_acc": 0.2945059279993948 }, { "epoch": 5.065669891527412, "grad_norm": 0.2139818903243028, "learning_rate": 0.000445606696125481, "loss": 3.0014138221740723, "step": 8642, "token_acc": 0.2979231801643147 }, { "epoch": 5.066256229844621, "grad_norm": 0.17499910993485562, "learning_rate": 0.0004455916060205473, "loss": 2.952077627182007, "step": 8643, "token_acc": 0.30543145118250714 }, { "epoch": 5.066842568161829, "grad_norm": 0.17815844920170426, "learning_rate": 0.0004455765140782939, "loss": 2.9972472190856934, "step": 8644, "token_acc": 0.2997598832615728 }, { "epoch": 5.067428906479038, "grad_norm": 0.18917003560753806, "learning_rate": 0.0004455614202988628, "loss": 2.95031476020813, "step": 8645, "token_acc": 0.30563063950510266 }, { "epoch": 5.068015244796247, "grad_norm": 0.1620532305910785, "learning_rate": 0.00044554632468239567, "loss": 2.9436497688293457, "step": 8646, "token_acc": 0.3063058509293967 }, { "epoch": 5.068601583113456, "grad_norm": 0.19191543765156602, "learning_rate": 0.0004455312272290343, "loss": 3.000683307647705, "step": 8647, "token_acc": 0.29723079065184327 }, { "epoch": 5.069187921430665, "grad_norm": 0.18859357856291722, "learning_rate": 0.0004455161279389205, "loss": 2.9740800857543945, "step": 8648, "token_acc": 0.3018820250328866 }, { "epoch": 5.0697742597478745, "grad_norm": 0.18570935491209128, "learning_rate": 0.00044550102681219613, "loss": 2.968301296234131, "step": 8649, "token_acc": 0.3021979305957651 }, { "epoch": 5.070360598065084, "grad_norm": 0.18548660058762137, "learning_rate": 0.0004454859238490031, "loss": 2.9676902294158936, "step": 8650, "token_acc": 0.30386588143474014 }, { "epoch": 5.070946936382293, "grad_norm": 0.24655278539567257, "learning_rate": 0.00044547081904948324, "loss": 2.969587802886963, "step": 8651, "token_acc": 0.302777904044793 }, { "epoch": 5.071533274699502, "grad_norm": 0.20740986710794485, "learning_rate": 0.00044545571241377834, "loss": 2.9880752563476562, "step": 8652, "token_acc": 0.2991293450473766 }, { "epoch": 5.072119613016711, "grad_norm": 0.17033005067289558, "learning_rate": 0.0004454406039420305, "loss": 2.950756788253784, "step": 8653, "token_acc": 0.30603406466361344 }, { "epoch": 5.07270595133392, "grad_norm": 0.2327896766457918, "learning_rate": 0.0004454254936343816, "loss": 2.9522461891174316, "step": 8654, "token_acc": 0.3041601602017047 }, { "epoch": 5.073292289651128, "grad_norm": 0.20808401365117496, "learning_rate": 0.0004454103814909734, "loss": 3.0387561321258545, "step": 8655, "token_acc": 0.2925784306797631 }, { "epoch": 5.073878627968337, "grad_norm": 0.17229705792926722, "learning_rate": 0.00044539526751194805, "loss": 2.992068290710449, "step": 8656, "token_acc": 0.299857431696822 }, { "epoch": 5.0744649662855466, "grad_norm": 0.20329672513826672, "learning_rate": 0.00044538015169744746, "loss": 2.9810991287231445, "step": 8657, "token_acc": 0.30157830519148265 }, { "epoch": 5.075051304602756, "grad_norm": 0.16240154419798736, "learning_rate": 0.0004453650340476136, "loss": 2.9683871269226074, "step": 8658, "token_acc": 0.30217312252559553 }, { "epoch": 5.075637642919965, "grad_norm": 0.20096924901561874, "learning_rate": 0.0004453499145625885, "loss": 3.014518976211548, "step": 8659, "token_acc": 0.29682944558938856 }, { "epoch": 5.076223981237174, "grad_norm": 0.17393855382729279, "learning_rate": 0.0004453347932425142, "loss": 3.005685329437256, "step": 8660, "token_acc": 0.2980601592202827 }, { "epoch": 5.076810319554383, "grad_norm": 0.2158725285054191, "learning_rate": 0.0004453196700875327, "loss": 2.9822983741760254, "step": 8661, "token_acc": 0.3007618468352555 }, { "epoch": 5.077396657871592, "grad_norm": 0.21044087792938376, "learning_rate": 0.0004453045450977862, "loss": 2.981567621231079, "step": 8662, "token_acc": 0.3014756010726135 }, { "epoch": 5.077982996188801, "grad_norm": 0.17054576406253677, "learning_rate": 0.0004452894182734166, "loss": 2.9881153106689453, "step": 8663, "token_acc": 0.2996795216484202 }, { "epoch": 5.07856933450601, "grad_norm": 0.19032653784141515, "learning_rate": 0.00044527428961456606, "loss": 2.939809560775757, "step": 8664, "token_acc": 0.30707649111507507 }, { "epoch": 5.0791556728232194, "grad_norm": 0.1923725972280376, "learning_rate": 0.0004452591591213767, "loss": 2.9823546409606934, "step": 8665, "token_acc": 0.30085225796393494 }, { "epoch": 5.079742011140428, "grad_norm": 0.1631994160241602, "learning_rate": 0.00044524402679399066, "loss": 2.9885072708129883, "step": 8666, "token_acc": 0.30099487084223453 }, { "epoch": 5.080328349457637, "grad_norm": 0.19004242145009445, "learning_rate": 0.00044522889263255016, "loss": 2.9761857986450195, "step": 8667, "token_acc": 0.29988298440640027 }, { "epoch": 5.080914687774846, "grad_norm": 0.1672695325002176, "learning_rate": 0.0004452137566371972, "loss": 3.0061936378479004, "step": 8668, "token_acc": 0.29717524317956284 }, { "epoch": 5.081501026092055, "grad_norm": 0.16657383551583727, "learning_rate": 0.0004451986188080741, "loss": 2.9993398189544678, "step": 8669, "token_acc": 0.2985622849571517 }, { "epoch": 5.082087364409264, "grad_norm": 0.18469211377288927, "learning_rate": 0.00044518347914532296, "loss": 2.9781854152679443, "step": 8670, "token_acc": 0.3016966476967222 }, { "epoch": 5.082673702726473, "grad_norm": 0.1831007185012431, "learning_rate": 0.0004451683376490861, "loss": 2.979910135269165, "step": 8671, "token_acc": 0.3009580999932528 }, { "epoch": 5.083260041043682, "grad_norm": 0.16628204293285825, "learning_rate": 0.0004451531943195057, "loss": 2.9878482818603516, "step": 8672, "token_acc": 0.29845324222247377 }, { "epoch": 5.0838463793608915, "grad_norm": 0.19004530046751614, "learning_rate": 0.00044513804915672397, "loss": 2.947545289993286, "step": 8673, "token_acc": 0.3059995131233139 }, { "epoch": 5.084432717678101, "grad_norm": 0.2495446346866189, "learning_rate": 0.00044512290216088327, "loss": 3.015436887741089, "step": 8674, "token_acc": 0.2961701676214725 }, { "epoch": 5.08501905599531, "grad_norm": 0.2917824388042082, "learning_rate": 0.0004451077533321257, "loss": 2.9743292331695557, "step": 8675, "token_acc": 0.3021603858956939 }, { "epoch": 5.085605394312518, "grad_norm": 0.190728223341427, "learning_rate": 0.0004450926026705939, "loss": 3.047908067703247, "step": 8676, "token_acc": 0.29266282379769903 }, { "epoch": 5.086191732629727, "grad_norm": 0.25614514439328046, "learning_rate": 0.0004450774501764299, "loss": 3.012049674987793, "step": 8677, "token_acc": 0.2975385315731287 }, { "epoch": 5.086778070946936, "grad_norm": 0.2555025238293874, "learning_rate": 0.0004450622958497761, "loss": 2.9370622634887695, "step": 8678, "token_acc": 0.30546119141648453 }, { "epoch": 5.087364409264145, "grad_norm": 0.1895178296392627, "learning_rate": 0.00044504713969077485, "loss": 2.9910037517547607, "step": 8679, "token_acc": 0.29987682113579406 }, { "epoch": 5.087950747581354, "grad_norm": 0.30336301251298026, "learning_rate": 0.0004450319816995686, "loss": 3.0123772621154785, "step": 8680, "token_acc": 0.2978076880056672 }, { "epoch": 5.0885370858985635, "grad_norm": 0.17058034493622912, "learning_rate": 0.0004450168218762997, "loss": 2.972095251083374, "step": 8681, "token_acc": 0.3031841932505351 }, { "epoch": 5.089123424215773, "grad_norm": 0.2590703595789448, "learning_rate": 0.0004450016602211106, "loss": 2.9533019065856934, "step": 8682, "token_acc": 0.30543957173147 }, { "epoch": 5.089709762532982, "grad_norm": 0.16398296780915148, "learning_rate": 0.0004449864967341436, "loss": 2.998281955718994, "step": 8683, "token_acc": 0.29859552042160736 }, { "epoch": 5.090296100850191, "grad_norm": 0.2832100719416877, "learning_rate": 0.0004449713314155412, "loss": 2.966888189315796, "step": 8684, "token_acc": 0.30374322361337475 }, { "epoch": 5.0908824391674, "grad_norm": 0.17066370113299584, "learning_rate": 0.00044495616426544585, "loss": 2.9454736709594727, "step": 8685, "token_acc": 0.3054903367751262 }, { "epoch": 5.091468777484609, "grad_norm": 0.21428016867978947, "learning_rate": 0.00044494099528400013, "loss": 3.000596761703491, "step": 8686, "token_acc": 0.2964813930342949 }, { "epoch": 5.092055115801817, "grad_norm": 0.1786485249308624, "learning_rate": 0.0004449258244713463, "loss": 2.9337856769561768, "step": 8687, "token_acc": 0.30664656474336427 }, { "epoch": 5.092641454119026, "grad_norm": 0.18949454231162494, "learning_rate": 0.0004449106518276272, "loss": 2.9337942600250244, "step": 8688, "token_acc": 0.30810018435137426 }, { "epoch": 5.0932277924362355, "grad_norm": 0.17691387774158146, "learning_rate": 0.0004448954773529851, "loss": 3.0088260173797607, "step": 8689, "token_acc": 0.2970852939363429 }, { "epoch": 5.093814130753445, "grad_norm": 0.1813394671176994, "learning_rate": 0.0004448803010475625, "loss": 2.963430404663086, "step": 8690, "token_acc": 0.30339260577018107 }, { "epoch": 5.094400469070654, "grad_norm": 0.18306315964868977, "learning_rate": 0.00044486512291150223, "loss": 2.9731225967407227, "step": 8691, "token_acc": 0.30617767057895307 }, { "epoch": 5.094986807387863, "grad_norm": 0.166233084742953, "learning_rate": 0.0004448499429449466, "loss": 3.001830816268921, "step": 8692, "token_acc": 0.2983365934282552 }, { "epoch": 5.095573145705072, "grad_norm": 0.19167196129571717, "learning_rate": 0.00044483476114803844, "loss": 2.974398374557495, "step": 8693, "token_acc": 0.30307335754339954 }, { "epoch": 5.096159484022281, "grad_norm": 0.16214186091038377, "learning_rate": 0.0004448195775209202, "loss": 2.9722065925598145, "step": 8694, "token_acc": 0.3036610642586429 }, { "epoch": 5.09674582233949, "grad_norm": 0.18606379590160094, "learning_rate": 0.0004448043920637345, "loss": 2.9871020317077637, "step": 8695, "token_acc": 0.29935455616612616 }, { "epoch": 5.097332160656699, "grad_norm": 0.182354034894133, "learning_rate": 0.0004447892047766241, "loss": 2.9713146686553955, "step": 8696, "token_acc": 0.301366809097906 }, { "epoch": 5.097918498973908, "grad_norm": 0.1799178693649738, "learning_rate": 0.00044477401565973154, "loss": 3.0149006843566895, "step": 8697, "token_acc": 0.2964379991745855 }, { "epoch": 5.098504837291117, "grad_norm": 0.1782470615321636, "learning_rate": 0.0004447588247131996, "loss": 2.9822301864624023, "step": 8698, "token_acc": 0.3016054597618781 }, { "epoch": 5.099091175608326, "grad_norm": 0.17662041488022265, "learning_rate": 0.00044474363193717093, "loss": 2.9736571311950684, "step": 8699, "token_acc": 0.3024353593450438 }, { "epoch": 5.099677513925535, "grad_norm": 0.1751931352238748, "learning_rate": 0.00044472843733178827, "loss": 3.0043129920959473, "step": 8700, "token_acc": 0.29886601743586405 }, { "epoch": 5.100263852242744, "grad_norm": 0.16441625263120016, "learning_rate": 0.00044471324089719435, "loss": 3.0036861896514893, "step": 8701, "token_acc": 0.2967620787926918 }, { "epoch": 5.100850190559953, "grad_norm": 0.17447517813921465, "learning_rate": 0.00044469804263353184, "loss": 2.9923291206359863, "step": 8702, "token_acc": 0.301533872009827 }, { "epoch": 5.101436528877162, "grad_norm": 0.17112936552337116, "learning_rate": 0.0004446828425409437, "loss": 2.9982962608337402, "step": 8703, "token_acc": 0.2995148130394313 }, { "epoch": 5.102022867194371, "grad_norm": 0.22457709184497177, "learning_rate": 0.00044466764061957257, "loss": 2.9788668155670166, "step": 8704, "token_acc": 0.3018351275563241 }, { "epoch": 5.10260920551158, "grad_norm": 0.21002594504636185, "learning_rate": 0.0004446524368695611, "loss": 3.007920265197754, "step": 8705, "token_acc": 0.2978220953171503 }, { "epoch": 5.1031955438287895, "grad_norm": 0.1656793860678139, "learning_rate": 0.0004446372312910525, "loss": 2.9555277824401855, "step": 8706, "token_acc": 0.3039694082823599 }, { "epoch": 5.103781882145999, "grad_norm": 0.2062391618323717, "learning_rate": 0.00044462202388418926, "loss": 3.0100295543670654, "step": 8707, "token_acc": 0.29648936225946554 }, { "epoch": 5.104368220463208, "grad_norm": 0.2098514263289546, "learning_rate": 0.00044460681464911445, "loss": 3.019043207168579, "step": 8708, "token_acc": 0.29410315766054484 }, { "epoch": 5.104954558780416, "grad_norm": 0.16575664721545047, "learning_rate": 0.00044459160358597085, "loss": 2.9941768646240234, "step": 8709, "token_acc": 0.30066963050415985 }, { "epoch": 5.105540897097625, "grad_norm": 0.2748136893708205, "learning_rate": 0.0004445763906949013, "loss": 2.967991590499878, "step": 8710, "token_acc": 0.30203086791791567 }, { "epoch": 5.106127235414834, "grad_norm": 0.3746450659734796, "learning_rate": 0.00044456117597604873, "loss": 3.0214946269989014, "step": 8711, "token_acc": 0.29649813831394567 }, { "epoch": 5.106713573732043, "grad_norm": 0.19248521139432512, "learning_rate": 0.0004445459594295561, "loss": 2.9820380210876465, "step": 8712, "token_acc": 0.3006762811389776 }, { "epoch": 5.107299912049252, "grad_norm": 0.21525298970307108, "learning_rate": 0.0004445307410555664, "loss": 2.984111785888672, "step": 8713, "token_acc": 0.3010132222084552 }, { "epoch": 5.1078862503664615, "grad_norm": 0.20275811644700503, "learning_rate": 0.0004445155208542224, "loss": 2.9863650798797607, "step": 8714, "token_acc": 0.30061141304347827 }, { "epoch": 5.108472588683671, "grad_norm": 0.20061241646584638, "learning_rate": 0.00044450029882566735, "loss": 2.9884612560272217, "step": 8715, "token_acc": 0.3009622214359632 }, { "epoch": 5.10905892700088, "grad_norm": 0.2219182911114369, "learning_rate": 0.000444485074970044, "loss": 2.932551383972168, "step": 8716, "token_acc": 0.30795195855247665 }, { "epoch": 5.109645265318089, "grad_norm": 0.1696554940962451, "learning_rate": 0.00044446984928749544, "loss": 2.975181818008423, "step": 8717, "token_acc": 0.3025253013028074 }, { "epoch": 5.110231603635298, "grad_norm": 0.1896026199259381, "learning_rate": 0.00044445462177816476, "loss": 2.984372615814209, "step": 8718, "token_acc": 0.30086946615089044 }, { "epoch": 5.110817941952506, "grad_norm": 0.19518374712631986, "learning_rate": 0.0004444393924421948, "loss": 2.985424041748047, "step": 8719, "token_acc": 0.30239633673179317 }, { "epoch": 5.111404280269715, "grad_norm": 0.19871318273017644, "learning_rate": 0.0004444241612797289, "loss": 2.965365171432495, "step": 8720, "token_acc": 0.30210786165637665 }, { "epoch": 5.111990618586924, "grad_norm": 0.17348778319980182, "learning_rate": 0.00044440892829090994, "loss": 2.9832210540771484, "step": 8721, "token_acc": 0.3014860509317962 }, { "epoch": 5.1125769569041335, "grad_norm": 0.20023844893961287, "learning_rate": 0.00044439369347588106, "loss": 2.9795336723327637, "step": 8722, "token_acc": 0.30259952008049584 }, { "epoch": 5.113163295221343, "grad_norm": 0.16923407908976956, "learning_rate": 0.0004443784568347854, "loss": 2.934471845626831, "step": 8723, "token_acc": 0.30796485961274817 }, { "epoch": 5.113749633538552, "grad_norm": 0.18246011096612696, "learning_rate": 0.00044436321836776606, "loss": 2.9944136142730713, "step": 8724, "token_acc": 0.297609207180037 }, { "epoch": 5.114335971855761, "grad_norm": 0.16949756168186397, "learning_rate": 0.00044434797807496617, "loss": 3.020205020904541, "step": 8725, "token_acc": 0.29478818956877423 }, { "epoch": 5.11492231017297, "grad_norm": 0.19699919015133432, "learning_rate": 0.0004443327359565289, "loss": 2.981968402862549, "step": 8726, "token_acc": 0.3023252321853579 }, { "epoch": 5.115508648490179, "grad_norm": 0.18498674873559284, "learning_rate": 0.0004443174920125975, "loss": 3.033045530319214, "step": 8727, "token_acc": 0.29232261126127057 }, { "epoch": 5.116094986807388, "grad_norm": 0.18455677225860928, "learning_rate": 0.00044430224624331506, "loss": 2.974199056625366, "step": 8728, "token_acc": 0.3019660968901706 }, { "epoch": 5.116681325124597, "grad_norm": 0.1759017776492813, "learning_rate": 0.00044428699864882483, "loss": 3.020569324493408, "step": 8729, "token_acc": 0.2958360552256663 }, { "epoch": 5.1172676634418055, "grad_norm": 0.18322128721256692, "learning_rate": 0.00044427174922927014, "loss": 2.961002826690674, "step": 8730, "token_acc": 0.30525251893761784 }, { "epoch": 5.117854001759015, "grad_norm": 0.2505976846360587, "learning_rate": 0.00044425649798479405, "loss": 2.9866740703582764, "step": 8731, "token_acc": 0.3010071466313694 }, { "epoch": 5.118440340076224, "grad_norm": 0.2115810775356059, "learning_rate": 0.00044424124491553995, "loss": 2.9912612438201904, "step": 8732, "token_acc": 0.2994038515239318 }, { "epoch": 5.119026678393433, "grad_norm": 0.1965034297047565, "learning_rate": 0.0004442259900216512, "loss": 2.9565072059631348, "step": 8733, "token_acc": 0.3062210525473758 }, { "epoch": 5.119613016710642, "grad_norm": 0.25963934167700076, "learning_rate": 0.00044421073330327087, "loss": 2.981520652770996, "step": 8734, "token_acc": 0.30099062082657485 }, { "epoch": 5.120199355027851, "grad_norm": 0.16998169640142108, "learning_rate": 0.00044419547476054244, "loss": 2.996492385864258, "step": 8735, "token_acc": 0.29904945311368475 }, { "epoch": 5.12078569334506, "grad_norm": 0.21578283434751916, "learning_rate": 0.0004441802143936092, "loss": 3.0027551651000977, "step": 8736, "token_acc": 0.296802316918217 }, { "epoch": 5.121372031662269, "grad_norm": 0.17813585143368707, "learning_rate": 0.00044416495220261454, "loss": 2.9835734367370605, "step": 8737, "token_acc": 0.3010338112477618 }, { "epoch": 5.121958369979478, "grad_norm": 0.19881562054319968, "learning_rate": 0.0004441496881877017, "loss": 3.0267527103424072, "step": 8738, "token_acc": 0.2962101504718184 }, { "epoch": 5.1225447082966875, "grad_norm": 0.20660763304233926, "learning_rate": 0.0004441344223490143, "loss": 2.977837085723877, "step": 8739, "token_acc": 0.3009801461513645 }, { "epoch": 5.123131046613897, "grad_norm": 0.17158880776176214, "learning_rate": 0.00044411915468669556, "loss": 2.996511697769165, "step": 8740, "token_acc": 0.2993599224637937 }, { "epoch": 5.123717384931105, "grad_norm": 0.21452063140028457, "learning_rate": 0.0004441038852008889, "loss": 2.9855563640594482, "step": 8741, "token_acc": 0.3006892202351457 }, { "epoch": 5.124303723248314, "grad_norm": 0.17079051588619126, "learning_rate": 0.0004440886138917378, "loss": 2.928725242614746, "step": 8742, "token_acc": 0.3088905440984022 }, { "epoch": 5.124890061565523, "grad_norm": 0.18292682922830103, "learning_rate": 0.00044407334075938573, "loss": 2.976896047592163, "step": 8743, "token_acc": 0.30177201386454805 }, { "epoch": 5.125476399882732, "grad_norm": 0.18563912676883831, "learning_rate": 0.00044405806580397614, "loss": 2.9855833053588867, "step": 8744, "token_acc": 0.29957146258233475 }, { "epoch": 5.126062738199941, "grad_norm": 0.18408442047945414, "learning_rate": 0.00044404278902565256, "loss": 2.965723991394043, "step": 8745, "token_acc": 0.3026603587109736 }, { "epoch": 5.12664907651715, "grad_norm": 0.18114602512018754, "learning_rate": 0.00044402751042455836, "loss": 2.9670424461364746, "step": 8746, "token_acc": 0.30280305114289313 }, { "epoch": 5.1272354148343595, "grad_norm": 0.17609532361280458, "learning_rate": 0.00044401223000083725, "loss": 2.9774763584136963, "step": 8747, "token_acc": 0.3030851253374998 }, { "epoch": 5.127821753151569, "grad_norm": 0.19086479124439165, "learning_rate": 0.0004439969477546326, "loss": 2.9790468215942383, "step": 8748, "token_acc": 0.2999583157491385 }, { "epoch": 5.128408091468778, "grad_norm": 0.1712440520593261, "learning_rate": 0.000443981663686088, "loss": 2.962512969970703, "step": 8749, "token_acc": 0.3022198925435723 }, { "epoch": 5.128994429785987, "grad_norm": 0.18097582819288932, "learning_rate": 0.00044396637779534716, "loss": 2.993630886077881, "step": 8750, "token_acc": 0.30078204517051677 }, { "epoch": 5.129580768103196, "grad_norm": 0.17713790717679545, "learning_rate": 0.0004439510900825535, "loss": 2.957152843475342, "step": 8751, "token_acc": 0.30460715075337297 }, { "epoch": 5.130167106420404, "grad_norm": 0.18507534220347635, "learning_rate": 0.0004439358005478508, "loss": 2.991044044494629, "step": 8752, "token_acc": 0.29998440763054773 }, { "epoch": 5.130753444737613, "grad_norm": 0.1941744986195138, "learning_rate": 0.0004439205091913825, "loss": 2.980870246887207, "step": 8753, "token_acc": 0.29917432582567416 }, { "epoch": 5.131339783054822, "grad_norm": 0.17934373854653354, "learning_rate": 0.00044390521601329235, "loss": 2.979238510131836, "step": 8754, "token_acc": 0.3008060295195227 }, { "epoch": 5.1319261213720315, "grad_norm": 0.18935686712937697, "learning_rate": 0.000443889921013724, "loss": 2.961998224258423, "step": 8755, "token_acc": 0.30451088600728166 }, { "epoch": 5.132512459689241, "grad_norm": 0.1635256496647054, "learning_rate": 0.0004438746241928211, "loss": 2.937725305557251, "step": 8756, "token_acc": 0.3063446571308372 }, { "epoch": 5.13309879800645, "grad_norm": 0.1805589924140241, "learning_rate": 0.0004438593255507274, "loss": 2.9922070503234863, "step": 8757, "token_acc": 0.29834255568218737 }, { "epoch": 5.133685136323659, "grad_norm": 0.2014278226288138, "learning_rate": 0.00044384402508758646, "loss": 3.0203442573547363, "step": 8758, "token_acc": 0.296373181581373 }, { "epoch": 5.134271474640868, "grad_norm": 0.19606642172806865, "learning_rate": 0.0004438287228035421, "loss": 2.9669575691223145, "step": 8759, "token_acc": 0.3038575572799706 }, { "epoch": 5.134857812958077, "grad_norm": 0.17193300018633415, "learning_rate": 0.00044381341869873827, "loss": 3.0092475414276123, "step": 8760, "token_acc": 0.2968296321646089 }, { "epoch": 5.135444151275286, "grad_norm": 0.19259356140749984, "learning_rate": 0.0004437981127733184, "loss": 2.958791971206665, "step": 8761, "token_acc": 0.30542714977250535 }, { "epoch": 5.136030489592494, "grad_norm": 0.1625306921850991, "learning_rate": 0.0004437828050274264, "loss": 3.019230842590332, "step": 8762, "token_acc": 0.29588822970439427 }, { "epoch": 5.1366168279097035, "grad_norm": 0.19129821411572573, "learning_rate": 0.0004437674954612061, "loss": 2.9478063583374023, "step": 8763, "token_acc": 0.3046184320266889 }, { "epoch": 5.137203166226913, "grad_norm": 0.19429898068825613, "learning_rate": 0.0004437521840748013, "loss": 2.954503059387207, "step": 8764, "token_acc": 0.3052622437459451 }, { "epoch": 5.137789504544122, "grad_norm": 0.29201981464710863, "learning_rate": 0.0004437368708683558, "loss": 2.9911608695983887, "step": 8765, "token_acc": 0.30020483913197493 }, { "epoch": 5.138375842861331, "grad_norm": 0.3240815323999452, "learning_rate": 0.0004437215558420135, "loss": 3.0255513191223145, "step": 8766, "token_acc": 0.29469124997363816 }, { "epoch": 5.13896218117854, "grad_norm": 0.33238639344521165, "learning_rate": 0.0004437062389959181, "loss": 2.995145559310913, "step": 8767, "token_acc": 0.2994174827692491 }, { "epoch": 5.139548519495749, "grad_norm": 0.2516811538929707, "learning_rate": 0.00044369092033021373, "loss": 2.9815473556518555, "step": 8768, "token_acc": 0.30134539073245536 }, { "epoch": 5.140134857812958, "grad_norm": 0.16500870732976872, "learning_rate": 0.0004436755998450441, "loss": 2.966063976287842, "step": 8769, "token_acc": 0.3051165689475931 }, { "epoch": 5.140721196130167, "grad_norm": 0.1824959402155693, "learning_rate": 0.0004436602775405533, "loss": 2.9898056983947754, "step": 8770, "token_acc": 0.3005512259034415 }, { "epoch": 5.141307534447376, "grad_norm": 0.18657288282759818, "learning_rate": 0.00044364495341688503, "loss": 3.0107436180114746, "step": 8771, "token_acc": 0.297682422593849 }, { "epoch": 5.1418938727645855, "grad_norm": 0.2033234731112678, "learning_rate": 0.0004436296274741834, "loss": 3.000121593475342, "step": 8772, "token_acc": 0.29768572334354854 }, { "epoch": 5.142480211081795, "grad_norm": 0.18308175038365282, "learning_rate": 0.0004436142997125923, "loss": 2.959010124206543, "step": 8773, "token_acc": 0.3038789398013062 }, { "epoch": 5.143066549399003, "grad_norm": 0.1695435572624618, "learning_rate": 0.0004435989701322558, "loss": 2.948486804962158, "step": 8774, "token_acc": 0.30577777320774896 }, { "epoch": 5.143652887716212, "grad_norm": 0.17129029861989012, "learning_rate": 0.00044358363873331786, "loss": 2.980088710784912, "step": 8775, "token_acc": 0.30152320207849737 }, { "epoch": 5.144239226033421, "grad_norm": 0.15855817207467682, "learning_rate": 0.00044356830551592254, "loss": 2.9855031967163086, "step": 8776, "token_acc": 0.30069962866711913 }, { "epoch": 5.14482556435063, "grad_norm": 0.17003999805084932, "learning_rate": 0.0004435529704802137, "loss": 3.011748790740967, "step": 8777, "token_acc": 0.29669308036285963 }, { "epoch": 5.145411902667839, "grad_norm": 0.17948421936435988, "learning_rate": 0.00044353763362633557, "loss": 2.9702816009521484, "step": 8778, "token_acc": 0.30203123697591067 }, { "epoch": 5.145998240985048, "grad_norm": 0.20017730236600856, "learning_rate": 0.00044352229495443226, "loss": 2.980778217315674, "step": 8779, "token_acc": 0.30084952127225484 }, { "epoch": 5.1465845793022575, "grad_norm": 0.19341715004494375, "learning_rate": 0.0004435069544646476, "loss": 3.001784324645996, "step": 8780, "token_acc": 0.29766664926658665 }, { "epoch": 5.147170917619467, "grad_norm": 0.1645253423288801, "learning_rate": 0.00044349161215712595, "loss": 2.9548749923706055, "step": 8781, "token_acc": 0.3050363481910589 }, { "epoch": 5.147757255936676, "grad_norm": 0.20518528355313262, "learning_rate": 0.00044347626803201135, "loss": 3.0156431198120117, "step": 8782, "token_acc": 0.2966124348270663 }, { "epoch": 5.148343594253885, "grad_norm": 0.23441674911064184, "learning_rate": 0.0004434609220894479, "loss": 2.993480682373047, "step": 8783, "token_acc": 0.299571681828181 }, { "epoch": 5.148929932571093, "grad_norm": 0.20569072116922216, "learning_rate": 0.0004434455743295798, "loss": 2.9530177116394043, "step": 8784, "token_acc": 0.3036298284585956 }, { "epoch": 5.149516270888302, "grad_norm": 0.1587091362918602, "learning_rate": 0.0004434302247525512, "loss": 3.0004758834838867, "step": 8785, "token_acc": 0.2969946980676645 }, { "epoch": 5.150102609205511, "grad_norm": 0.22384193720168497, "learning_rate": 0.0004434148733585063, "loss": 2.9857680797576904, "step": 8786, "token_acc": 0.2996169889031716 }, { "epoch": 5.15068894752272, "grad_norm": 0.24629592166074957, "learning_rate": 0.0004433995201475892, "loss": 3.0081186294555664, "step": 8787, "token_acc": 0.29775586097937995 }, { "epoch": 5.1512752858399296, "grad_norm": 0.18742951773209884, "learning_rate": 0.00044338416511994426, "loss": 2.9570837020874023, "step": 8788, "token_acc": 0.30387830131476945 }, { "epoch": 5.151861624157139, "grad_norm": 0.19646515708892104, "learning_rate": 0.0004433688082757158, "loss": 3.026297092437744, "step": 8789, "token_acc": 0.2940225329962323 }, { "epoch": 5.152447962474348, "grad_norm": 0.20051299088990934, "learning_rate": 0.0004433534496150478, "loss": 2.987358570098877, "step": 8790, "token_acc": 0.3001125246561379 }, { "epoch": 5.153034300791557, "grad_norm": 0.15457959234496235, "learning_rate": 0.00044333808913808476, "loss": 2.959099769592285, "step": 8791, "token_acc": 0.3042096669192431 }, { "epoch": 5.153620639108766, "grad_norm": 0.1838312972855188, "learning_rate": 0.0004433227268449709, "loss": 2.9745407104492188, "step": 8792, "token_acc": 0.3020439101868741 }, { "epoch": 5.154206977425975, "grad_norm": 0.17796718775698855, "learning_rate": 0.00044330736273585046, "loss": 2.971836566925049, "step": 8793, "token_acc": 0.3024766009788569 }, { "epoch": 5.154793315743184, "grad_norm": 0.224051263765041, "learning_rate": 0.00044329199681086796, "loss": 3.021592378616333, "step": 8794, "token_acc": 0.2952031088550839 }, { "epoch": 5.1553796540603924, "grad_norm": 0.1775266659273417, "learning_rate": 0.00044327662907016753, "loss": 2.9839346408843994, "step": 8795, "token_acc": 0.3005945614875003 }, { "epoch": 5.155965992377602, "grad_norm": 0.19104162559981613, "learning_rate": 0.0004432612595138936, "loss": 3.0073342323303223, "step": 8796, "token_acc": 0.29680753508342755 }, { "epoch": 5.156552330694811, "grad_norm": 0.19157912912753658, "learning_rate": 0.0004432458881421906, "loss": 2.9781274795532227, "step": 8797, "token_acc": 0.3018584978555794 }, { "epoch": 5.15713866901202, "grad_norm": 0.20015034902580847, "learning_rate": 0.00044323051495520285, "loss": 2.9905576705932617, "step": 8798, "token_acc": 0.300727431195736 }, { "epoch": 5.157725007329229, "grad_norm": 0.19837806833311086, "learning_rate": 0.0004432151399530748, "loss": 2.9781527519226074, "step": 8799, "token_acc": 0.3013042730788081 }, { "epoch": 5.158311345646438, "grad_norm": 0.19962525418409838, "learning_rate": 0.00044319976313595083, "loss": 2.9713144302368164, "step": 8800, "token_acc": 0.3012242292914294 }, { "epoch": 5.158897683963647, "grad_norm": 0.23789679591304785, "learning_rate": 0.0004431843845039755, "loss": 2.9699878692626953, "step": 8801, "token_acc": 0.3012037079739888 }, { "epoch": 5.159484022280856, "grad_norm": 0.2048395340654621, "learning_rate": 0.00044316900405729317, "loss": 2.9315085411071777, "step": 8802, "token_acc": 0.3082174919600675 }, { "epoch": 5.160070360598065, "grad_norm": 0.16372085563401884, "learning_rate": 0.00044315362179604836, "loss": 2.999645233154297, "step": 8803, "token_acc": 0.299346276164543 }, { "epoch": 5.1606566989152745, "grad_norm": 0.18957966346949431, "learning_rate": 0.0004431382377203855, "loss": 3.01366925239563, "step": 8804, "token_acc": 0.29710502564494096 }, { "epoch": 5.161243037232484, "grad_norm": 0.1976172691767416, "learning_rate": 0.0004431228518304492, "loss": 2.998316764831543, "step": 8805, "token_acc": 0.29986448195145987 }, { "epoch": 5.161829375549692, "grad_norm": 0.18530247330629188, "learning_rate": 0.0004431074641263839, "loss": 2.966491460800171, "step": 8806, "token_acc": 0.30217870244318124 }, { "epoch": 5.162415713866901, "grad_norm": 0.19169113547699163, "learning_rate": 0.00044309207460833423, "loss": 3.0257625579833984, "step": 8807, "token_acc": 0.2947127294036172 }, { "epoch": 5.16300205218411, "grad_norm": 0.17714026416745718, "learning_rate": 0.0004430766832764447, "loss": 2.9855637550354004, "step": 8808, "token_acc": 0.30169432113506095 }, { "epoch": 5.163588390501319, "grad_norm": 0.18939479033801662, "learning_rate": 0.00044306129013085995, "loss": 3.060751438140869, "step": 8809, "token_acc": 0.2904538324955869 }, { "epoch": 5.164174728818528, "grad_norm": 0.1720470249802947, "learning_rate": 0.00044304589517172444, "loss": 3.0255823135375977, "step": 8810, "token_acc": 0.29640227246762146 }, { "epoch": 5.164761067135737, "grad_norm": 0.16863651620345926, "learning_rate": 0.00044303049839918295, "loss": 2.9605231285095215, "step": 8811, "token_acc": 0.30332551932698065 }, { "epoch": 5.1653474054529465, "grad_norm": 0.17915145685804046, "learning_rate": 0.00044301509981338005, "loss": 3.0141773223876953, "step": 8812, "token_acc": 0.2975359092829027 }, { "epoch": 5.165933743770156, "grad_norm": 0.16283239190205318, "learning_rate": 0.00044299969941446034, "loss": 2.9851694107055664, "step": 8813, "token_acc": 0.3007235762451293 }, { "epoch": 5.166520082087365, "grad_norm": 0.18237965713611776, "learning_rate": 0.0004429842972025685, "loss": 2.9824442863464355, "step": 8814, "token_acc": 0.2993478860026853 }, { "epoch": 5.167106420404574, "grad_norm": 0.15731697620541296, "learning_rate": 0.0004429688931778493, "loss": 2.97575044631958, "step": 8815, "token_acc": 0.3009870621777124 }, { "epoch": 5.167692758721783, "grad_norm": 0.19106150923677062, "learning_rate": 0.0004429534873404474, "loss": 2.9616377353668213, "step": 8816, "token_acc": 0.30445221325381727 }, { "epoch": 5.168279097038991, "grad_norm": 0.19014711845616725, "learning_rate": 0.0004429380796905074, "loss": 2.982950448989868, "step": 8817, "token_acc": 0.3007281814475076 }, { "epoch": 5.1688654353562, "grad_norm": 0.17713231803324647, "learning_rate": 0.0004429226702281741, "loss": 3.018231153488159, "step": 8818, "token_acc": 0.2940174744818733 }, { "epoch": 5.169451773673409, "grad_norm": 0.16605211019691218, "learning_rate": 0.0004429072589535924, "loss": 3.0333194732666016, "step": 8819, "token_acc": 0.2949870093355757 }, { "epoch": 5.1700381119906185, "grad_norm": 0.22078888940285127, "learning_rate": 0.0004428918458669069, "loss": 2.9845995903015137, "step": 8820, "token_acc": 0.3003319458348701 }, { "epoch": 5.170624450307828, "grad_norm": 0.1980923427841761, "learning_rate": 0.00044287643096826247, "loss": 2.957256317138672, "step": 8821, "token_acc": 0.3030481030422556 }, { "epoch": 5.171210788625037, "grad_norm": 0.16410361144003952, "learning_rate": 0.0004428610142578038, "loss": 3.0132460594177246, "step": 8822, "token_acc": 0.295656487736714 }, { "epoch": 5.171797126942246, "grad_norm": 0.17138479508595136, "learning_rate": 0.00044284559573567583, "loss": 2.945361614227295, "step": 8823, "token_acc": 0.3072721001307518 }, { "epoch": 5.172383465259455, "grad_norm": 0.1995769737744185, "learning_rate": 0.0004428301754020233, "loss": 3.0178587436676025, "step": 8824, "token_acc": 0.29459409136047665 }, { "epoch": 5.172969803576664, "grad_norm": 0.23002664600489142, "learning_rate": 0.00044281475325699116, "loss": 2.949838638305664, "step": 8825, "token_acc": 0.3056479350384029 }, { "epoch": 5.173556141893873, "grad_norm": 0.21468769051597134, "learning_rate": 0.00044279932930072424, "loss": 2.96749210357666, "step": 8826, "token_acc": 0.3028843479021118 }, { "epoch": 5.174142480211081, "grad_norm": 0.17038943244702934, "learning_rate": 0.0004427839035333674, "loss": 2.9870846271514893, "step": 8827, "token_acc": 0.2995438255120174 }, { "epoch": 5.1747288185282905, "grad_norm": 0.21917612113587115, "learning_rate": 0.0004427684759550656, "loss": 2.9842967987060547, "step": 8828, "token_acc": 0.30077621305499996 }, { "epoch": 5.1753151568455, "grad_norm": 0.23661548471761087, "learning_rate": 0.0004427530465659637, "loss": 2.9789273738861084, "step": 8829, "token_acc": 0.2999345953517684 }, { "epoch": 5.175901495162709, "grad_norm": 0.17502682394592814, "learning_rate": 0.00044273761536620673, "loss": 2.9573769569396973, "step": 8830, "token_acc": 0.3050305909059106 }, { "epoch": 5.176487833479918, "grad_norm": 0.2327482210417244, "learning_rate": 0.0004427221823559395, "loss": 3.0415148735046387, "step": 8831, "token_acc": 0.2930228878810252 }, { "epoch": 5.177074171797127, "grad_norm": 0.25092826963169784, "learning_rate": 0.00044270674753530705, "loss": 2.9905500411987305, "step": 8832, "token_acc": 0.2994847393107199 }, { "epoch": 5.177660510114336, "grad_norm": 0.15462550598691166, "learning_rate": 0.0004426913109044544, "loss": 2.9697647094726562, "step": 8833, "token_acc": 0.3044459055652667 }, { "epoch": 5.178246848431545, "grad_norm": 0.20374395545410434, "learning_rate": 0.00044267587246352657, "loss": 2.9857914447784424, "step": 8834, "token_acc": 0.299605878770167 }, { "epoch": 5.178833186748754, "grad_norm": 0.18287376638783923, "learning_rate": 0.0004426604322126685, "loss": 2.984575033187866, "step": 8835, "token_acc": 0.3018724869021955 }, { "epoch": 5.179419525065963, "grad_norm": 0.1599217214744648, "learning_rate": 0.0004426449901520254, "loss": 2.9735639095306396, "step": 8836, "token_acc": 0.3014944119758023 }, { "epoch": 5.1800058633831725, "grad_norm": 0.18801290230886053, "learning_rate": 0.0004426295462817421, "loss": 2.996950149536133, "step": 8837, "token_acc": 0.2996473884201713 }, { "epoch": 5.180592201700381, "grad_norm": 0.16971393864212894, "learning_rate": 0.00044261410060196385, "loss": 3.0004491806030273, "step": 8838, "token_acc": 0.29815328080720005 }, { "epoch": 5.18117854001759, "grad_norm": 0.2110796833092154, "learning_rate": 0.0004425986531128356, "loss": 2.984823226928711, "step": 8839, "token_acc": 0.3024030106234238 }, { "epoch": 5.181764878334799, "grad_norm": 0.21010269020015432, "learning_rate": 0.0004425832038145026, "loss": 3.0057268142700195, "step": 8840, "token_acc": 0.2955416517408647 }, { "epoch": 5.182351216652008, "grad_norm": 0.2089935370215622, "learning_rate": 0.00044256775270710985, "loss": 2.954976797103882, "step": 8841, "token_acc": 0.30377144962627645 }, { "epoch": 5.182937554969217, "grad_norm": 0.17218165629041793, "learning_rate": 0.00044255229979080256, "loss": 3.0092296600341797, "step": 8842, "token_acc": 0.2978609045151622 }, { "epoch": 5.183523893286426, "grad_norm": 0.20326666788450598, "learning_rate": 0.0004425368450657259, "loss": 3.0203351974487305, "step": 8843, "token_acc": 0.29590390983188997 }, { "epoch": 5.184110231603635, "grad_norm": 0.18783590937165168, "learning_rate": 0.00044252138853202505, "loss": 3.0002493858337402, "step": 8844, "token_acc": 0.2984321888886888 }, { "epoch": 5.1846965699208445, "grad_norm": 0.20995669748011578, "learning_rate": 0.00044250593018984517, "loss": 3.0225157737731934, "step": 8845, "token_acc": 0.29629019268032714 }, { "epoch": 5.185282908238054, "grad_norm": 0.2770379681723397, "learning_rate": 0.00044249047003933154, "loss": 2.970541477203369, "step": 8846, "token_acc": 0.3037223453404951 }, { "epoch": 5.185869246555263, "grad_norm": 0.18594168630015998, "learning_rate": 0.0004424750080806292, "loss": 2.9690101146698, "step": 8847, "token_acc": 0.3037826960743005 }, { "epoch": 5.186455584872472, "grad_norm": 0.2621826405552439, "learning_rate": 0.0004424595443138836, "loss": 2.958822011947632, "step": 8848, "token_acc": 0.3035287414371366 }, { "epoch": 5.18704192318968, "grad_norm": 0.29547647906371594, "learning_rate": 0.00044244407873923985, "loss": 2.981954574584961, "step": 8849, "token_acc": 0.3022268224475623 }, { "epoch": 5.187628261506889, "grad_norm": 0.16182079894335188, "learning_rate": 0.00044242861135684343, "loss": 2.9978561401367188, "step": 8850, "token_acc": 0.2995199304416057 }, { "epoch": 5.188214599824098, "grad_norm": 0.26586322426428327, "learning_rate": 0.0004424131421668394, "loss": 2.974959135055542, "step": 8851, "token_acc": 0.30220256140567964 }, { "epoch": 5.188800938141307, "grad_norm": 0.18208648661996515, "learning_rate": 0.00044239767116937325, "loss": 2.978020191192627, "step": 8852, "token_acc": 0.3027106188788786 }, { "epoch": 5.1893872764585165, "grad_norm": 0.2195109012234581, "learning_rate": 0.00044238219836459017, "loss": 2.957524299621582, "step": 8853, "token_acc": 0.3047144459256438 }, { "epoch": 5.189973614775726, "grad_norm": 0.18567011613468803, "learning_rate": 0.00044236672375263565, "loss": 2.978147506713867, "step": 8854, "token_acc": 0.30193847276998786 }, { "epoch": 5.190559953092935, "grad_norm": 0.21921101979344748, "learning_rate": 0.000442351247333655, "loss": 3.0142345428466797, "step": 8855, "token_acc": 0.2960742541716494 }, { "epoch": 5.191146291410144, "grad_norm": 0.18643333502722184, "learning_rate": 0.0004423357691077934, "loss": 3.047701358795166, "step": 8856, "token_acc": 0.291195281824107 }, { "epoch": 5.191732629727353, "grad_norm": 0.19416886735007322, "learning_rate": 0.00044232028907519657, "loss": 2.9476370811462402, "step": 8857, "token_acc": 0.3083077483129947 }, { "epoch": 5.192318968044562, "grad_norm": 0.16444472799054644, "learning_rate": 0.0004423048072360097, "loss": 2.968719482421875, "step": 8858, "token_acc": 0.30315532387042554 }, { "epoch": 5.192905306361771, "grad_norm": 0.22440563144353334, "learning_rate": 0.00044228932359037846, "loss": 3.003300428390503, "step": 8859, "token_acc": 0.29841229592416213 }, { "epoch": 5.193491644678979, "grad_norm": 0.17034459721299242, "learning_rate": 0.00044227383813844794, "loss": 2.944145441055298, "step": 8860, "token_acc": 0.3054725770699171 }, { "epoch": 5.1940779829961885, "grad_norm": 0.2639884836063497, "learning_rate": 0.00044225835088036394, "loss": 2.998051166534424, "step": 8861, "token_acc": 0.298192902690213 }, { "epoch": 5.194664321313398, "grad_norm": 0.2555542330292339, "learning_rate": 0.0004422428618162717, "loss": 3.016000747680664, "step": 8862, "token_acc": 0.29538192109036915 }, { "epoch": 5.195250659630607, "grad_norm": 0.1750001256941667, "learning_rate": 0.0004422273709463169, "loss": 2.9487268924713135, "step": 8863, "token_acc": 0.3062791546876366 }, { "epoch": 5.195836997947816, "grad_norm": 0.20599213843054098, "learning_rate": 0.000442211878270645, "loss": 3.002913236618042, "step": 8864, "token_acc": 0.2964381339945955 }, { "epoch": 5.196423336265025, "grad_norm": 0.1716775549776159, "learning_rate": 0.0004421963837894014, "loss": 2.9811151027679443, "step": 8865, "token_acc": 0.3003942058004568 }, { "epoch": 5.197009674582234, "grad_norm": 0.17518677003637623, "learning_rate": 0.00044218088750273187, "loss": 2.995006561279297, "step": 8866, "token_acc": 0.2992382829080563 }, { "epoch": 5.197596012899443, "grad_norm": 0.19537416401865554, "learning_rate": 0.00044216538941078184, "loss": 3.0042848587036133, "step": 8867, "token_acc": 0.29889121427363435 }, { "epoch": 5.198182351216652, "grad_norm": 0.19672864276154467, "learning_rate": 0.00044214988951369694, "loss": 2.952193260192871, "step": 8868, "token_acc": 0.3071014224035176 }, { "epoch": 5.198768689533861, "grad_norm": 0.19838105460970937, "learning_rate": 0.00044213438781162266, "loss": 3.0012454986572266, "step": 8869, "token_acc": 0.29818024421597183 }, { "epoch": 5.19935502785107, "grad_norm": 0.19112045839496805, "learning_rate": 0.0004421188843047048, "loss": 2.9869096279144287, "step": 8870, "token_acc": 0.3008218522123387 }, { "epoch": 5.199941366168279, "grad_norm": 0.18063861123395328, "learning_rate": 0.00044210337899308887, "loss": 3.007112979888916, "step": 8871, "token_acc": 0.29686860601880327 }, { "epoch": 5.200527704485488, "grad_norm": 0.19092886981454182, "learning_rate": 0.00044208787187692057, "loss": 2.970839262008667, "step": 8872, "token_acc": 0.30168692311011064 }, { "epoch": 5.201114042802697, "grad_norm": 0.18618914737788142, "learning_rate": 0.00044207236295634554, "loss": 3.0074117183685303, "step": 8873, "token_acc": 0.2970973308405062 }, { "epoch": 5.201700381119906, "grad_norm": 0.17198400316198803, "learning_rate": 0.00044205685223150956, "loss": 2.963113307952881, "step": 8874, "token_acc": 0.30570938870798636 }, { "epoch": 5.202286719437115, "grad_norm": 0.16754572948298388, "learning_rate": 0.0004420413397025582, "loss": 3.021437644958496, "step": 8875, "token_acc": 0.29643082712176977 }, { "epoch": 5.202873057754324, "grad_norm": 0.182910722509467, "learning_rate": 0.0004420258253696372, "loss": 2.9806041717529297, "step": 8876, "token_acc": 0.29928805287867555 }, { "epoch": 5.203459396071533, "grad_norm": 0.1608751009547726, "learning_rate": 0.00044201030923289234, "loss": 2.9849867820739746, "step": 8877, "token_acc": 0.30099584124644246 }, { "epoch": 5.2040457343887425, "grad_norm": 0.17162172630345301, "learning_rate": 0.0004419947912924694, "loss": 2.9662232398986816, "step": 8878, "token_acc": 0.30181101250398756 }, { "epoch": 5.204632072705952, "grad_norm": 0.1812353648798643, "learning_rate": 0.00044197927154851404, "loss": 2.9623656272888184, "step": 8879, "token_acc": 0.3032062071085775 }, { "epoch": 5.205218411023161, "grad_norm": 0.1823312978791871, "learning_rate": 0.0004419637500011722, "loss": 2.986788511276245, "step": 8880, "token_acc": 0.30140712359573957 }, { "epoch": 5.205804749340369, "grad_norm": 0.1697103545320523, "learning_rate": 0.0004419482266505895, "loss": 2.9684510231018066, "step": 8881, "token_acc": 0.3030649594247828 }, { "epoch": 5.206391087657578, "grad_norm": 0.18503390846837403, "learning_rate": 0.000441932701496912, "loss": 2.9884328842163086, "step": 8882, "token_acc": 0.30068852200498014 }, { "epoch": 5.206977425974787, "grad_norm": 0.1838669596996058, "learning_rate": 0.00044191717454028533, "loss": 2.9981112480163574, "step": 8883, "token_acc": 0.298980003908031 }, { "epoch": 5.207563764291996, "grad_norm": 0.16383488848890274, "learning_rate": 0.0004419016457808554, "loss": 3.0084104537963867, "step": 8884, "token_acc": 0.29859226316994214 }, { "epoch": 5.208150102609205, "grad_norm": 0.1782335823502246, "learning_rate": 0.00044188611521876807, "loss": 3.0121350288391113, "step": 8885, "token_acc": 0.2961333942488869 }, { "epoch": 5.2087364409264145, "grad_norm": 0.1607014009834479, "learning_rate": 0.00044187058285416936, "loss": 2.967247486114502, "step": 8886, "token_acc": 0.30333722761212983 }, { "epoch": 5.209322779243624, "grad_norm": 0.17535083303147614, "learning_rate": 0.00044185504868720504, "loss": 3.0083847045898438, "step": 8887, "token_acc": 0.2994988005483208 }, { "epoch": 5.209909117560833, "grad_norm": 0.20255302695839197, "learning_rate": 0.00044183951271802106, "loss": 3.0134835243225098, "step": 8888, "token_acc": 0.2964723190250621 }, { "epoch": 5.210495455878042, "grad_norm": 0.16376752223380722, "learning_rate": 0.0004418239749467633, "loss": 2.950873374938965, "step": 8889, "token_acc": 0.3054603000153231 }, { "epoch": 5.211081794195251, "grad_norm": 0.19316398726621323, "learning_rate": 0.00044180843537357787, "loss": 3.043321132659912, "step": 8890, "token_acc": 0.2929329915781765 }, { "epoch": 5.21166813251246, "grad_norm": 0.19767689159108195, "learning_rate": 0.00044179289399861053, "loss": 2.9896275997161865, "step": 8891, "token_acc": 0.29901280984519296 }, { "epoch": 5.212254470829668, "grad_norm": 0.23145129203068807, "learning_rate": 0.00044177735082200753, "loss": 2.986607313156128, "step": 8892, "token_acc": 0.29773632546313844 }, { "epoch": 5.212840809146877, "grad_norm": 0.16424175028920202, "learning_rate": 0.0004417618058439147, "loss": 3.0072226524353027, "step": 8893, "token_acc": 0.29735308691700474 }, { "epoch": 5.2134271474640865, "grad_norm": 0.16600318774178754, "learning_rate": 0.00044174625906447806, "loss": 3.036065101623535, "step": 8894, "token_acc": 0.2934841558844038 }, { "epoch": 5.214013485781296, "grad_norm": 0.17662863720933394, "learning_rate": 0.0004417307104838438, "loss": 2.970824956893921, "step": 8895, "token_acc": 0.3025347094270194 }, { "epoch": 5.214599824098505, "grad_norm": 0.20370745434552467, "learning_rate": 0.0004417151601021578, "loss": 3.003786325454712, "step": 8896, "token_acc": 0.2956347967276588 }, { "epoch": 5.215186162415714, "grad_norm": 0.23450451072540401, "learning_rate": 0.0004416996079195662, "loss": 2.9558749198913574, "step": 8897, "token_acc": 0.30644454868462995 }, { "epoch": 5.215772500732923, "grad_norm": 0.21807072201181496, "learning_rate": 0.00044168405393621516, "loss": 2.9484782218933105, "step": 8898, "token_acc": 0.30556206208245074 }, { "epoch": 5.216358839050132, "grad_norm": 0.1645682979787322, "learning_rate": 0.00044166849815225066, "loss": 2.9944329261779785, "step": 8899, "token_acc": 0.2993786516310395 }, { "epoch": 5.216945177367341, "grad_norm": 0.25450867419561446, "learning_rate": 0.00044165294056781895, "loss": 2.9656882286071777, "step": 8900, "token_acc": 0.3034319047923187 }, { "epoch": 5.21753151568455, "grad_norm": 0.18298559766996206, "learning_rate": 0.00044163738118306606, "loss": 2.980564594268799, "step": 8901, "token_acc": 0.3022960717346534 }, { "epoch": 5.218117854001759, "grad_norm": 0.18654995936683463, "learning_rate": 0.00044162181999813835, "loss": 2.972099781036377, "step": 8902, "token_acc": 0.30253908712646577 }, { "epoch": 5.218704192318968, "grad_norm": 0.2230033722583745, "learning_rate": 0.00044160625701318167, "loss": 3.0208468437194824, "step": 8903, "token_acc": 0.29435814611378175 }, { "epoch": 5.219290530636177, "grad_norm": 0.1702957777606774, "learning_rate": 0.00044159069222834247, "loss": 2.9655423164367676, "step": 8904, "token_acc": 0.3044766679479735 }, { "epoch": 5.219876868953386, "grad_norm": 0.20527997158592426, "learning_rate": 0.0004415751256437669, "loss": 3.014284372329712, "step": 8905, "token_acc": 0.2965131200722733 }, { "epoch": 5.220463207270595, "grad_norm": 0.17342505245519793, "learning_rate": 0.00044155955725960115, "loss": 3.0179245471954346, "step": 8906, "token_acc": 0.2947802911083205 }, { "epoch": 5.221049545587804, "grad_norm": 0.1611146176638883, "learning_rate": 0.0004415439870759916, "loss": 2.997894287109375, "step": 8907, "token_acc": 0.29835908333355415 }, { "epoch": 5.221635883905013, "grad_norm": 0.17740540049367778, "learning_rate": 0.00044152841509308426, "loss": 2.993295431137085, "step": 8908, "token_acc": 0.2987822660098522 }, { "epoch": 5.222222222222222, "grad_norm": 0.15936008312579414, "learning_rate": 0.00044151284131102564, "loss": 3.0057215690612793, "step": 8909, "token_acc": 0.29852242572946447 }, { "epoch": 5.222808560539431, "grad_norm": 0.1802094448567103, "learning_rate": 0.0004414972657299619, "loss": 3.030078887939453, "step": 8910, "token_acc": 0.29605489386976774 }, { "epoch": 5.2233948988566405, "grad_norm": 0.18502441182507537, "learning_rate": 0.00044148168835003943, "loss": 2.9677929878234863, "step": 8911, "token_acc": 0.30523988434837734 }, { "epoch": 5.22398123717385, "grad_norm": 0.17013372774392807, "learning_rate": 0.00044146610917140445, "loss": 3.015394449234009, "step": 8912, "token_acc": 0.29783492404752837 }, { "epoch": 5.224567575491059, "grad_norm": 0.18967484607603383, "learning_rate": 0.0004414505281942034, "loss": 2.995748281478882, "step": 8913, "token_acc": 0.29927924578687354 }, { "epoch": 5.225153913808267, "grad_norm": 0.20999488452697718, "learning_rate": 0.00044143494541858266, "loss": 2.9627771377563477, "step": 8914, "token_acc": 0.30470339625799636 }, { "epoch": 5.225740252125476, "grad_norm": 0.1813279811064846, "learning_rate": 0.00044141936084468857, "loss": 3.0169663429260254, "step": 8915, "token_acc": 0.29530421848615357 }, { "epoch": 5.226326590442685, "grad_norm": 0.19978727939794774, "learning_rate": 0.0004414037744726675, "loss": 3.000596523284912, "step": 8916, "token_acc": 0.2980014149754277 }, { "epoch": 5.226912928759894, "grad_norm": 0.22008906941238965, "learning_rate": 0.00044138818630266586, "loss": 2.980915069580078, "step": 8917, "token_acc": 0.29976783598701584 }, { "epoch": 5.227499267077103, "grad_norm": 0.17925259503391702, "learning_rate": 0.0004413725963348302, "loss": 3.0163800716400146, "step": 8918, "token_acc": 0.29651659571646105 }, { "epoch": 5.2280856053943126, "grad_norm": 0.16288475389997062, "learning_rate": 0.0004413570045693068, "loss": 2.9983134269714355, "step": 8919, "token_acc": 0.29887425333129114 }, { "epoch": 5.228671943711522, "grad_norm": 0.16554900427244335, "learning_rate": 0.00044134141100624227, "loss": 3.030074119567871, "step": 8920, "token_acc": 0.2953712437499836 }, { "epoch": 5.229258282028731, "grad_norm": 0.19551230125111524, "learning_rate": 0.0004413258156457829, "loss": 2.982029438018799, "step": 8921, "token_acc": 0.30168336456402894 }, { "epoch": 5.22984462034594, "grad_norm": 0.17103674797047672, "learning_rate": 0.0004413102184880754, "loss": 3.018326759338379, "step": 8922, "token_acc": 0.2974741519088631 }, { "epoch": 5.230430958663149, "grad_norm": 0.1846554707296096, "learning_rate": 0.00044129461953326617, "loss": 2.9929158687591553, "step": 8923, "token_acc": 0.29943835167982974 }, { "epoch": 5.231017296980358, "grad_norm": 0.21596057627301538, "learning_rate": 0.00044127901878150173, "loss": 3.029737949371338, "step": 8924, "token_acc": 0.2959451934506281 }, { "epoch": 5.231603635297566, "grad_norm": 0.20478043341852908, "learning_rate": 0.0004412634162329287, "loss": 3.0275487899780273, "step": 8925, "token_acc": 0.2950249086682165 }, { "epoch": 5.2321899736147754, "grad_norm": 0.18603483725869288, "learning_rate": 0.0004412478118876936, "loss": 2.9790334701538086, "step": 8926, "token_acc": 0.3020004813992672 }, { "epoch": 5.232776311931985, "grad_norm": 0.21494013758739133, "learning_rate": 0.000441232205745943, "loss": 2.984231948852539, "step": 8927, "token_acc": 0.3004269037072689 }, { "epoch": 5.233362650249194, "grad_norm": 0.1984626802598579, "learning_rate": 0.0004412165978078235, "loss": 2.957655429840088, "step": 8928, "token_acc": 0.30387090621834706 }, { "epoch": 5.233948988566403, "grad_norm": 0.16402117223100196, "learning_rate": 0.00044120098807348175, "loss": 2.9990286827087402, "step": 8929, "token_acc": 0.29905224281170634 }, { "epoch": 5.234535326883612, "grad_norm": 0.20654602042874198, "learning_rate": 0.0004411853765430644, "loss": 2.9942474365234375, "step": 8930, "token_acc": 0.2988215302451835 }, { "epoch": 5.235121665200821, "grad_norm": 0.1756408942352806, "learning_rate": 0.00044116976321671806, "loss": 2.9616200923919678, "step": 8931, "token_acc": 0.3048046908728404 }, { "epoch": 5.23570800351803, "grad_norm": 0.2036433542364515, "learning_rate": 0.0004411541480945893, "loss": 2.9952173233032227, "step": 8932, "token_acc": 0.300897421569825 }, { "epoch": 5.236294341835239, "grad_norm": 0.178910159319396, "learning_rate": 0.000441138531176825, "loss": 3.0330865383148193, "step": 8933, "token_acc": 0.2943139031836964 }, { "epoch": 5.236880680152448, "grad_norm": 0.17842817647427806, "learning_rate": 0.0004411229124635717, "loss": 3.020102024078369, "step": 8934, "token_acc": 0.2958466112406758 }, { "epoch": 5.237467018469657, "grad_norm": 0.2439334544518674, "learning_rate": 0.00044110729195497624, "loss": 2.9836511611938477, "step": 8935, "token_acc": 0.3001236250673598 }, { "epoch": 5.238053356786866, "grad_norm": 0.20768452885111913, "learning_rate": 0.0004410916696511853, "loss": 2.9697375297546387, "step": 8936, "token_acc": 0.3034466440571023 }, { "epoch": 5.238639695104075, "grad_norm": 0.16610944527275565, "learning_rate": 0.0004410760455523456, "loss": 2.97253155708313, "step": 8937, "token_acc": 0.30274868831913676 }, { "epoch": 5.239226033421284, "grad_norm": 0.26550199089875925, "learning_rate": 0.0004410604196586039, "loss": 2.98433518409729, "step": 8938, "token_acc": 0.301486028734215 }, { "epoch": 5.239812371738493, "grad_norm": 0.23148763380201653, "learning_rate": 0.00044104479197010704, "loss": 3.0180928707122803, "step": 8939, "token_acc": 0.29548702087141016 }, { "epoch": 5.240398710055702, "grad_norm": 0.1689519885389431, "learning_rate": 0.0004410291624870018, "loss": 2.970151901245117, "step": 8940, "token_acc": 0.3029989658738366 }, { "epoch": 5.240985048372911, "grad_norm": 0.2566793729561637, "learning_rate": 0.000441013531209435, "loss": 2.960186243057251, "step": 8941, "token_acc": 0.3059482881643985 }, { "epoch": 5.24157138669012, "grad_norm": 0.1480607027245773, "learning_rate": 0.0004409978981375534, "loss": 2.9820432662963867, "step": 8942, "token_acc": 0.3007441695412346 }, { "epoch": 5.2421577250073295, "grad_norm": 0.2813427192433209, "learning_rate": 0.000440982263271504, "loss": 2.9836249351501465, "step": 8943, "token_acc": 0.30075547428497346 }, { "epoch": 5.242744063324539, "grad_norm": 0.18084629297810112, "learning_rate": 0.0004409666266114336, "loss": 3.0156726837158203, "step": 8944, "token_acc": 0.2977267806884607 }, { "epoch": 5.243330401641748, "grad_norm": 0.2590930547130295, "learning_rate": 0.000440950988157489, "loss": 3.019381523132324, "step": 8945, "token_acc": 0.29628158969835383 }, { "epoch": 5.243916739958956, "grad_norm": 0.21238467189924815, "learning_rate": 0.0004409353479098172, "loss": 2.9954915046691895, "step": 8946, "token_acc": 0.2999702526400782 }, { "epoch": 5.244503078276165, "grad_norm": 0.21909336429574225, "learning_rate": 0.0004409197058685651, "loss": 2.9572606086730957, "step": 8947, "token_acc": 0.3049026728272011 }, { "epoch": 5.245089416593374, "grad_norm": 0.18403618719413634, "learning_rate": 0.00044090406203387965, "loss": 3.008298873901367, "step": 8948, "token_acc": 0.2969490476238799 }, { "epoch": 5.245675754910583, "grad_norm": 0.20885422417421476, "learning_rate": 0.00044088841640590776, "loss": 2.993046283721924, "step": 8949, "token_acc": 0.29913778788304574 }, { "epoch": 5.246262093227792, "grad_norm": 0.16845572934840913, "learning_rate": 0.00044087276898479644, "loss": 2.9792304039001465, "step": 8950, "token_acc": 0.3001878658282922 }, { "epoch": 5.2468484315450015, "grad_norm": 0.25750556366653965, "learning_rate": 0.00044085711977069267, "loss": 2.963646650314331, "step": 8951, "token_acc": 0.3031053964543152 }, { "epoch": 5.247434769862211, "grad_norm": 0.1715417793121581, "learning_rate": 0.0004408414687637434, "loss": 3.00197696685791, "step": 8952, "token_acc": 0.2970641673483221 }, { "epoch": 5.24802110817942, "grad_norm": 0.21375651044166544, "learning_rate": 0.0004408258159640957, "loss": 3.0023531913757324, "step": 8953, "token_acc": 0.2989688588653051 }, { "epoch": 5.248607446496629, "grad_norm": 0.15817136100485737, "learning_rate": 0.00044081016137189667, "loss": 3.018862247467041, "step": 8954, "token_acc": 0.2944671701279631 }, { "epoch": 5.249193784813838, "grad_norm": 0.22573715221745091, "learning_rate": 0.00044079450498729324, "loss": 2.9969987869262695, "step": 8955, "token_acc": 0.29888204365606247 }, { "epoch": 5.249780123131047, "grad_norm": 0.15697446680234783, "learning_rate": 0.0004407788468104325, "loss": 3.039738655090332, "step": 8956, "token_acc": 0.29376139494715486 }, { "epoch": 5.250366461448255, "grad_norm": 0.19854052336317432, "learning_rate": 0.0004407631868414616, "loss": 2.9562487602233887, "step": 8957, "token_acc": 0.3041206195526397 }, { "epoch": 5.250952799765464, "grad_norm": 0.15438002685162322, "learning_rate": 0.00044074752508052763, "loss": 2.9413490295410156, "step": 8958, "token_acc": 0.3079357478216513 }, { "epoch": 5.2515391380826735, "grad_norm": 0.20102382666314142, "learning_rate": 0.00044073186152777764, "loss": 3.018157482147217, "step": 8959, "token_acc": 0.295324852375443 }, { "epoch": 5.252125476399883, "grad_norm": 0.17257851015975986, "learning_rate": 0.00044071619618335896, "loss": 2.957867383956909, "step": 8960, "token_acc": 0.30593616033640064 }, { "epoch": 5.252711814717092, "grad_norm": 0.2049432821868892, "learning_rate": 0.00044070052904741854, "loss": 2.9966557025909424, "step": 8961, "token_acc": 0.2991678138552783 }, { "epoch": 5.253298153034301, "grad_norm": 0.17512283238701976, "learning_rate": 0.00044068486012010356, "loss": 3.0010833740234375, "step": 8962, "token_acc": 0.2994248168663683 }, { "epoch": 5.25388449135151, "grad_norm": 0.23746866372384712, "learning_rate": 0.0004406691894015613, "loss": 2.9798941612243652, "step": 8963, "token_acc": 0.3015160022459292 }, { "epoch": 5.254470829668719, "grad_norm": 0.1783355414211252, "learning_rate": 0.000440653516891939, "loss": 3.007066249847412, "step": 8964, "token_acc": 0.2958862356432046 }, { "epoch": 5.255057167985928, "grad_norm": 0.20807832795203093, "learning_rate": 0.0004406378425913837, "loss": 2.976379156112671, "step": 8965, "token_acc": 0.30120299633536185 }, { "epoch": 5.255643506303137, "grad_norm": 0.16235502430411747, "learning_rate": 0.00044062216650004286, "loss": 2.9832100868225098, "step": 8966, "token_acc": 0.30056759615358736 }, { "epoch": 5.256229844620346, "grad_norm": 0.2473962715130966, "learning_rate": 0.0004406064886180636, "loss": 2.9909369945526123, "step": 8967, "token_acc": 0.2990887254991841 }, { "epoch": 5.256816182937555, "grad_norm": 0.20485010882088825, "learning_rate": 0.0004405908089455932, "loss": 2.9975104331970215, "step": 8968, "token_acc": 0.2990148296802106 }, { "epoch": 5.257402521254764, "grad_norm": 0.19310738514596032, "learning_rate": 0.00044057512748277907, "loss": 2.9998779296875, "step": 8969, "token_acc": 0.29703188175250217 }, { "epoch": 5.257988859571973, "grad_norm": 0.21991828785208, "learning_rate": 0.00044055944422976835, "loss": 3.00276255607605, "step": 8970, "token_acc": 0.2998272101819561 }, { "epoch": 5.258575197889182, "grad_norm": 0.17922257979645323, "learning_rate": 0.0004405437591867084, "loss": 3.0046751499176025, "step": 8971, "token_acc": 0.29758238295623185 }, { "epoch": 5.259161536206391, "grad_norm": 0.19963587343188294, "learning_rate": 0.0004405280723537467, "loss": 2.9805684089660645, "step": 8972, "token_acc": 0.30155046056612655 }, { "epoch": 5.2597478745236, "grad_norm": 0.1566887805441179, "learning_rate": 0.0004405123837310304, "loss": 3.0134499073028564, "step": 8973, "token_acc": 0.2961902849342508 }, { "epoch": 5.260334212840809, "grad_norm": 0.19050060204867159, "learning_rate": 0.000440496693318707, "loss": 3.0177907943725586, "step": 8974, "token_acc": 0.29542935062636944 }, { "epoch": 5.260920551158018, "grad_norm": 0.16947322314830734, "learning_rate": 0.0004404810011169238, "loss": 2.9758706092834473, "step": 8975, "token_acc": 0.3005742833378365 }, { "epoch": 5.2615068894752275, "grad_norm": 0.16374807215274723, "learning_rate": 0.00044046530712582837, "loss": 2.969801664352417, "step": 8976, "token_acc": 0.30163080044302293 }, { "epoch": 5.262093227792437, "grad_norm": 0.17665361265205137, "learning_rate": 0.00044044961134556807, "loss": 2.9777979850769043, "step": 8977, "token_acc": 0.30307320423883005 }, { "epoch": 5.262679566109645, "grad_norm": 0.18098293093167578, "learning_rate": 0.00044043391377629015, "loss": 2.998828411102295, "step": 8978, "token_acc": 0.2994901124017739 }, { "epoch": 5.263265904426854, "grad_norm": 0.1762123359405021, "learning_rate": 0.00044041821441814235, "loss": 2.9996912479400635, "step": 8979, "token_acc": 0.30041819710012313 }, { "epoch": 5.263852242744063, "grad_norm": 0.19713829118857354, "learning_rate": 0.00044040251327127195, "loss": 2.9943418502807617, "step": 8980, "token_acc": 0.29986187410060605 }, { "epoch": 5.264438581061272, "grad_norm": 0.17523977379726896, "learning_rate": 0.00044038681033582656, "loss": 3.0196452140808105, "step": 8981, "token_acc": 0.29542695799028923 }, { "epoch": 5.265024919378481, "grad_norm": 0.20990598627151094, "learning_rate": 0.0004403711056119536, "loss": 2.9773290157318115, "step": 8982, "token_acc": 0.3026007372985186 }, { "epoch": 5.26561125769569, "grad_norm": 0.1680517606360513, "learning_rate": 0.00044035539909980065, "loss": 2.9834506511688232, "step": 8983, "token_acc": 0.30214787332366083 }, { "epoch": 5.2661975960128995, "grad_norm": 0.1885221434714052, "learning_rate": 0.0004403396907995152, "loss": 2.9803786277770996, "step": 8984, "token_acc": 0.3029879139102925 }, { "epoch": 5.266783934330109, "grad_norm": 0.1614341785092233, "learning_rate": 0.0004403239807112449, "loss": 2.9949960708618164, "step": 8985, "token_acc": 0.2997863567920408 }, { "epoch": 5.267370272647318, "grad_norm": 0.19464696906147128, "learning_rate": 0.0004403082688351372, "loss": 3.0128018856048584, "step": 8986, "token_acc": 0.2963412745433046 }, { "epoch": 5.267956610964527, "grad_norm": 0.16907962032436916, "learning_rate": 0.00044029255517133984, "loss": 2.973231077194214, "step": 8987, "token_acc": 0.30173226475472714 }, { "epoch": 5.268542949281736, "grad_norm": 0.1751836547814023, "learning_rate": 0.00044027683972000026, "loss": 2.9882876873016357, "step": 8988, "token_acc": 0.2996053588119223 }, { "epoch": 5.269129287598945, "grad_norm": 0.16348007087086996, "learning_rate": 0.00044026112248126624, "loss": 2.986239194869995, "step": 8989, "token_acc": 0.30013280910991597 }, { "epoch": 5.269715625916153, "grad_norm": 0.16097578088024656, "learning_rate": 0.0004402454034552853, "loss": 2.9820144176483154, "step": 8990, "token_acc": 0.3003335074954705 }, { "epoch": 5.270301964233362, "grad_norm": 0.18900533189058852, "learning_rate": 0.0004402296826422052, "loss": 2.9793245792388916, "step": 8991, "token_acc": 0.3009920481259509 }, { "epoch": 5.2708883025505715, "grad_norm": 0.1606179658613837, "learning_rate": 0.00044021396004217353, "loss": 2.995755195617676, "step": 8992, "token_acc": 0.2992480379059865 }, { "epoch": 5.271474640867781, "grad_norm": 0.18573600184805467, "learning_rate": 0.000440198235655338, "loss": 2.9625377655029297, "step": 8993, "token_acc": 0.30563432052089895 }, { "epoch": 5.27206097918499, "grad_norm": 0.249366232030939, "learning_rate": 0.0004401825094818465, "loss": 3.0017154216766357, "step": 8994, "token_acc": 0.2982271864839338 }, { "epoch": 5.272647317502199, "grad_norm": 0.19587143004766586, "learning_rate": 0.0004401667815218464, "loss": 3.040799617767334, "step": 8995, "token_acc": 0.292670654364538 }, { "epoch": 5.273233655819408, "grad_norm": 0.17018425703692142, "learning_rate": 0.0004401510517754857, "loss": 3.045501232147217, "step": 8996, "token_acc": 0.2927826011648299 }, { "epoch": 5.273819994136617, "grad_norm": 0.19111976408633977, "learning_rate": 0.00044013532024291213, "loss": 2.998466730117798, "step": 8997, "token_acc": 0.2992639759516392 }, { "epoch": 5.274406332453826, "grad_norm": 0.19311184305396234, "learning_rate": 0.00044011958692427344, "loss": 3.0144405364990234, "step": 8998, "token_acc": 0.29683231344418404 }, { "epoch": 5.274992670771035, "grad_norm": 0.20402747957454556, "learning_rate": 0.00044010385181971737, "loss": 2.973325729370117, "step": 8999, "token_acc": 0.30287773596904977 }, { "epoch": 5.2755790090882435, "grad_norm": 0.21138347478060932, "learning_rate": 0.00044008811492939184, "loss": 2.994374990463257, "step": 9000, "token_acc": 0.3006901449683932 }, { "epoch": 5.276165347405453, "grad_norm": 0.17444842050541656, "learning_rate": 0.0004400723762534446, "loss": 2.9771082401275635, "step": 9001, "token_acc": 0.30027888144799214 }, { "epoch": 5.276751685722662, "grad_norm": 0.1793392358979344, "learning_rate": 0.0004400566357920235, "loss": 2.985520601272583, "step": 9002, "token_acc": 0.300737859041691 }, { "epoch": 5.277338024039871, "grad_norm": 0.23198400222942003, "learning_rate": 0.00044004089354527645, "loss": 3.0121638774871826, "step": 9003, "token_acc": 0.298374639892676 }, { "epoch": 5.27792436235708, "grad_norm": 0.19786139974896397, "learning_rate": 0.0004400251495133512, "loss": 2.980691909790039, "step": 9004, "token_acc": 0.3015544482546709 }, { "epoch": 5.278510700674289, "grad_norm": 0.16310231858834656, "learning_rate": 0.0004400094036963958, "loss": 3.0186262130737305, "step": 9005, "token_acc": 0.295912923391883 }, { "epoch": 5.279097038991498, "grad_norm": 0.2226138662969139, "learning_rate": 0.0004399936560945581, "loss": 3.0005500316619873, "step": 9006, "token_acc": 0.29972588308095144 }, { "epoch": 5.279683377308707, "grad_norm": 0.2086619740968379, "learning_rate": 0.00043997790670798596, "loss": 2.9874324798583984, "step": 9007, "token_acc": 0.30122326149264456 }, { "epoch": 5.280269715625916, "grad_norm": 0.2018533613990206, "learning_rate": 0.00043996215553682746, "loss": 3.01945161819458, "step": 9008, "token_acc": 0.29521037504527486 }, { "epoch": 5.2808560539431255, "grad_norm": 0.29030530268818694, "learning_rate": 0.00043994640258123043, "loss": 3.0150246620178223, "step": 9009, "token_acc": 0.294196137681103 }, { "epoch": 5.281442392260335, "grad_norm": 0.22786349535474545, "learning_rate": 0.00043993064784134296, "loss": 2.9889488220214844, "step": 9010, "token_acc": 0.29927417600066314 }, { "epoch": 5.282028730577543, "grad_norm": 0.184376233021802, "learning_rate": 0.0004399148913173129, "loss": 3.003593921661377, "step": 9011, "token_acc": 0.2980827694547884 }, { "epoch": 5.282615068894752, "grad_norm": 0.23196170676110875, "learning_rate": 0.0004398991330092884, "loss": 2.987874984741211, "step": 9012, "token_acc": 0.30077175970867276 }, { "epoch": 5.283201407211961, "grad_norm": 0.1678465415284779, "learning_rate": 0.00043988337291741745, "loss": 2.9983150959014893, "step": 9013, "token_acc": 0.29832164295316305 }, { "epoch": 5.28378774552917, "grad_norm": 0.26437958153503277, "learning_rate": 0.00043986761104184807, "loss": 3.0335121154785156, "step": 9014, "token_acc": 0.2949338983447906 }, { "epoch": 5.284374083846379, "grad_norm": 0.19310288010014848, "learning_rate": 0.00043985184738272833, "loss": 2.9948813915252686, "step": 9015, "token_acc": 0.30107556716997697 }, { "epoch": 5.284960422163588, "grad_norm": 0.21055216629100604, "learning_rate": 0.0004398360819402063, "loss": 3.0090439319610596, "step": 9016, "token_acc": 0.298008533003716 }, { "epoch": 5.2855467604807975, "grad_norm": 0.20108333680752324, "learning_rate": 0.00043982031471443014, "loss": 2.9878458976745605, "step": 9017, "token_acc": 0.300442637688266 }, { "epoch": 5.286133098798007, "grad_norm": 0.18783309491288122, "learning_rate": 0.0004398045457055478, "loss": 2.951472759246826, "step": 9018, "token_acc": 0.30635349120528127 }, { "epoch": 5.286719437115216, "grad_norm": 0.18916680988470463, "learning_rate": 0.0004397887749137076, "loss": 2.9853310585021973, "step": 9019, "token_acc": 0.30047829982710533 }, { "epoch": 5.287305775432425, "grad_norm": 0.16452639685793738, "learning_rate": 0.0004397730023390576, "loss": 2.9758191108703613, "step": 9020, "token_acc": 0.3014142738296317 }, { "epoch": 5.287892113749633, "grad_norm": 0.19148341471042932, "learning_rate": 0.000439757227981746, "loss": 2.998140811920166, "step": 9021, "token_acc": 0.299276188097431 }, { "epoch": 5.288478452066842, "grad_norm": 0.17677828606576196, "learning_rate": 0.0004397414518419209, "loss": 2.9652743339538574, "step": 9022, "token_acc": 0.3043837618762873 }, { "epoch": 5.289064790384051, "grad_norm": 0.18182722780218183, "learning_rate": 0.00043972567391973054, "loss": 2.9920711517333984, "step": 9023, "token_acc": 0.2978256366731837 }, { "epoch": 5.28965112870126, "grad_norm": 0.190047599058359, "learning_rate": 0.00043970989421532314, "loss": 3.048684597015381, "step": 9024, "token_acc": 0.2915355610453777 }, { "epoch": 5.2902374670184695, "grad_norm": 0.2128984464923535, "learning_rate": 0.00043969411272884695, "loss": 3.0161798000335693, "step": 9025, "token_acc": 0.2956715846200584 }, { "epoch": 5.290823805335679, "grad_norm": 0.2076837400338364, "learning_rate": 0.0004396783294604502, "loss": 2.9954628944396973, "step": 9026, "token_acc": 0.29907776197882346 }, { "epoch": 5.291410143652888, "grad_norm": 0.17352002175367803, "learning_rate": 0.0004396625444102811, "loss": 2.988490343093872, "step": 9027, "token_acc": 0.300507614213198 }, { "epoch": 5.291996481970097, "grad_norm": 0.16357906684042695, "learning_rate": 0.0004396467575784879, "loss": 3.040863513946533, "step": 9028, "token_acc": 0.29191863517745986 }, { "epoch": 5.292582820287306, "grad_norm": 0.16871807293084873, "learning_rate": 0.00043963096896521903, "loss": 2.991218090057373, "step": 9029, "token_acc": 0.29855056006618597 }, { "epoch": 5.293169158604515, "grad_norm": 0.15837612569619025, "learning_rate": 0.0004396151785706227, "loss": 3.007265329360962, "step": 9030, "token_acc": 0.2961740328211461 }, { "epoch": 5.293755496921724, "grad_norm": 0.1693331351156298, "learning_rate": 0.00043959938639484737, "loss": 2.9969210624694824, "step": 9031, "token_acc": 0.29905991868572585 }, { "epoch": 5.294341835238933, "grad_norm": 0.15884172461139925, "learning_rate": 0.00043958359243804123, "loss": 2.961029052734375, "step": 9032, "token_acc": 0.3057396982778822 }, { "epoch": 5.2949281735561415, "grad_norm": 0.1562626247102422, "learning_rate": 0.00043956779670035275, "loss": 3.016420364379883, "step": 9033, "token_acc": 0.2973867256727824 }, { "epoch": 5.295514511873351, "grad_norm": 0.16909214382619767, "learning_rate": 0.00043955199918193017, "loss": 2.964839458465576, "step": 9034, "token_acc": 0.3024468323609612 }, { "epoch": 5.29610085019056, "grad_norm": 0.16909519755189947, "learning_rate": 0.00043953619988292204, "loss": 2.970378875732422, "step": 9035, "token_acc": 0.3040509468571576 }, { "epoch": 5.296687188507769, "grad_norm": 0.16752595514924642, "learning_rate": 0.0004395203988034767, "loss": 2.984152317047119, "step": 9036, "token_acc": 0.30099869401551815 }, { "epoch": 5.297273526824978, "grad_norm": 0.17199017639625228, "learning_rate": 0.00043950459594374266, "loss": 2.991776466369629, "step": 9037, "token_acc": 0.29917025401297476 }, { "epoch": 5.297859865142187, "grad_norm": 0.16622895257561343, "learning_rate": 0.0004394887913038682, "loss": 2.9983131885528564, "step": 9038, "token_acc": 0.29719993535505196 }, { "epoch": 5.298446203459396, "grad_norm": 0.15446779685894196, "learning_rate": 0.00043947298488400193, "loss": 2.983732223510742, "step": 9039, "token_acc": 0.3007544608579208 }, { "epoch": 5.299032541776605, "grad_norm": 0.16341371397975465, "learning_rate": 0.0004394571766842923, "loss": 2.9753713607788086, "step": 9040, "token_acc": 0.3033618070771543 }, { "epoch": 5.299618880093814, "grad_norm": 0.1582565193448118, "learning_rate": 0.00043944136670488775, "loss": 2.9925074577331543, "step": 9041, "token_acc": 0.2988138734166553 }, { "epoch": 5.3002052184110235, "grad_norm": 0.2158839265510315, "learning_rate": 0.00043942555494593693, "loss": 3.013381004333496, "step": 9042, "token_acc": 0.29659904260264947 }, { "epoch": 5.300791556728232, "grad_norm": 0.2955990565102046, "learning_rate": 0.0004394097414075882, "loss": 2.9416894912719727, "step": 9043, "token_acc": 0.30741741596830147 }, { "epoch": 5.301377895045441, "grad_norm": 0.41102855052490916, "learning_rate": 0.0004393939260899902, "loss": 3.014218330383301, "step": 9044, "token_acc": 0.29890883937788265 }, { "epoch": 5.30196423336265, "grad_norm": 0.24742143524284269, "learning_rate": 0.0004393781089932915, "loss": 2.9374709129333496, "step": 9045, "token_acc": 0.306773796104543 }, { "epoch": 5.302550571679859, "grad_norm": 0.19923540453812247, "learning_rate": 0.00043936229011764063, "loss": 2.993546485900879, "step": 9046, "token_acc": 0.2991383160116921 }, { "epoch": 5.303136909997068, "grad_norm": 0.18734599103105803, "learning_rate": 0.0004393464694631862, "loss": 3.005157470703125, "step": 9047, "token_acc": 0.29651369997819066 }, { "epoch": 5.303723248314277, "grad_norm": 0.19786661598284475, "learning_rate": 0.00043933064703007685, "loss": 3.0181193351745605, "step": 9048, "token_acc": 0.29609156895589056 }, { "epoch": 5.304309586631486, "grad_norm": 0.1849158239155117, "learning_rate": 0.0004393148228184612, "loss": 2.9698729515075684, "step": 9049, "token_acc": 0.30432419903162666 }, { "epoch": 5.3048959249486956, "grad_norm": 0.22765277315197124, "learning_rate": 0.0004392989968284879, "loss": 2.9522218704223633, "step": 9050, "token_acc": 0.30494219586338744 }, { "epoch": 5.305482263265905, "grad_norm": 0.179471174950499, "learning_rate": 0.0004392831690603056, "loss": 3.0216903686523438, "step": 9051, "token_acc": 0.29581894408864573 }, { "epoch": 5.306068601583114, "grad_norm": 0.21619072079289595, "learning_rate": 0.00043926733951406294, "loss": 2.97031831741333, "step": 9052, "token_acc": 0.30289786121123663 }, { "epoch": 5.306654939900323, "grad_norm": 0.1862303326501486, "learning_rate": 0.00043925150818990876, "loss": 3.005505084991455, "step": 9053, "token_acc": 0.29801860394766694 }, { "epoch": 5.307241278217531, "grad_norm": 0.2451071186387691, "learning_rate": 0.00043923567508799164, "loss": 3.001654624938965, "step": 9054, "token_acc": 0.29795734210519415 }, { "epoch": 5.30782761653474, "grad_norm": 0.18561520212786708, "learning_rate": 0.00043921984020846034, "loss": 3.005300998687744, "step": 9055, "token_acc": 0.2967561583612746 }, { "epoch": 5.308413954851949, "grad_norm": 0.17125981233882595, "learning_rate": 0.0004392040035514636, "loss": 2.987459182739258, "step": 9056, "token_acc": 0.30125220509557826 }, { "epoch": 5.3090002931691584, "grad_norm": 0.16146577639858442, "learning_rate": 0.0004391881651171503, "loss": 2.992894172668457, "step": 9057, "token_acc": 0.2997772475365521 }, { "epoch": 5.309586631486368, "grad_norm": 0.17722148214195074, "learning_rate": 0.000439172324905669, "loss": 2.9894490242004395, "step": 9058, "token_acc": 0.30025777544721316 }, { "epoch": 5.310172969803577, "grad_norm": 0.16434965200368964, "learning_rate": 0.0004391564829171687, "loss": 3.050797462463379, "step": 9059, "token_acc": 0.2911740963791437 }, { "epoch": 5.310759308120786, "grad_norm": 0.1801426116645019, "learning_rate": 0.0004391406391517981, "loss": 3.003714084625244, "step": 9060, "token_acc": 0.29981185863860704 }, { "epoch": 5.311345646437995, "grad_norm": 0.17193464635070643, "learning_rate": 0.00043912479360970605, "loss": 2.9819345474243164, "step": 9061, "token_acc": 0.30195319040905727 }, { "epoch": 5.311931984755204, "grad_norm": 0.16890188659143965, "learning_rate": 0.0004391089462910414, "loss": 2.9901323318481445, "step": 9062, "token_acc": 0.29958821971247623 }, { "epoch": 5.312518323072413, "grad_norm": 0.1588347044790321, "learning_rate": 0.0004390930971959531, "loss": 3.0074849128723145, "step": 9063, "token_acc": 0.2977312790220008 }, { "epoch": 5.313104661389621, "grad_norm": 0.20184595665893776, "learning_rate": 0.0004390772463245899, "loss": 3.038729429244995, "step": 9064, "token_acc": 0.2940239404069497 }, { "epoch": 5.3136909997068305, "grad_norm": 0.1853595761019235, "learning_rate": 0.0004390613936771007, "loss": 3.005371570587158, "step": 9065, "token_acc": 0.2974896062283187 }, { "epoch": 5.31427733802404, "grad_norm": 0.17181148626626036, "learning_rate": 0.00043904553925363463, "loss": 2.994385004043579, "step": 9066, "token_acc": 0.29981219327796194 }, { "epoch": 5.314863676341249, "grad_norm": 0.16695814851275453, "learning_rate": 0.00043902968305434037, "loss": 2.994967460632324, "step": 9067, "token_acc": 0.29825487668290734 }, { "epoch": 5.315450014658458, "grad_norm": 0.17091380657950658, "learning_rate": 0.00043901382507936695, "loss": 3.0300703048706055, "step": 9068, "token_acc": 0.2957530805735992 }, { "epoch": 5.316036352975667, "grad_norm": 0.16914799738598335, "learning_rate": 0.0004389979653288634, "loss": 2.9900264739990234, "step": 9069, "token_acc": 0.29975252007001874 }, { "epoch": 5.316622691292876, "grad_norm": 0.17175649558983785, "learning_rate": 0.00043898210380297856, "loss": 2.979917526245117, "step": 9070, "token_acc": 0.302007556154995 }, { "epoch": 5.317209029610085, "grad_norm": 0.19096726336641856, "learning_rate": 0.00043896624050186153, "loss": 2.973921298980713, "step": 9071, "token_acc": 0.30105595583902744 }, { "epoch": 5.317795367927294, "grad_norm": 0.19104873045880574, "learning_rate": 0.00043895037542566133, "loss": 3.014247417449951, "step": 9072, "token_acc": 0.2959433058998717 }, { "epoch": 5.318381706244503, "grad_norm": 0.17981280137888261, "learning_rate": 0.000438934508574527, "loss": 2.9670023918151855, "step": 9073, "token_acc": 0.30469999606035536 }, { "epoch": 5.3189680445617125, "grad_norm": 0.2031681524840766, "learning_rate": 0.00043891863994860745, "loss": 3.0167717933654785, "step": 9074, "token_acc": 0.29542234418536056 }, { "epoch": 5.319554382878922, "grad_norm": 0.2029206731745043, "learning_rate": 0.000438902769548052, "loss": 2.9869422912597656, "step": 9075, "token_acc": 0.3006023337149339 }, { "epoch": 5.32014072119613, "grad_norm": 0.1665393521932243, "learning_rate": 0.0004388868973730095, "loss": 2.9725286960601807, "step": 9076, "token_acc": 0.30255266049769436 }, { "epoch": 5.320727059513339, "grad_norm": 0.2326136375892318, "learning_rate": 0.00043887102342362905, "loss": 2.991028308868408, "step": 9077, "token_acc": 0.2987586264608433 }, { "epoch": 5.321313397830548, "grad_norm": 0.35654014872848244, "learning_rate": 0.00043885514770005996, "loss": 2.9992032051086426, "step": 9078, "token_acc": 0.2984378666572466 }, { "epoch": 5.321899736147757, "grad_norm": 0.43352016405479826, "learning_rate": 0.0004388392702024512, "loss": 3.036896228790283, "step": 9079, "token_acc": 0.2930976126794435 }, { "epoch": 5.322486074464966, "grad_norm": 0.22709263576587976, "learning_rate": 0.0004388233909309519, "loss": 3.0080347061157227, "step": 9080, "token_acc": 0.29669356503021166 }, { "epoch": 5.323072412782175, "grad_norm": 0.3360876699048018, "learning_rate": 0.0004388075098857114, "loss": 3.030996084213257, "step": 9081, "token_acc": 0.29507307071053557 }, { "epoch": 5.3236587510993845, "grad_norm": 0.1713611819860275, "learning_rate": 0.0004387916270668787, "loss": 2.9721522331237793, "step": 9082, "token_acc": 0.30292086206851493 }, { "epoch": 5.324245089416594, "grad_norm": 0.27410192847292403, "learning_rate": 0.00043877574247460304, "loss": 3.0104458332061768, "step": 9083, "token_acc": 0.29868537886705276 }, { "epoch": 5.324831427733803, "grad_norm": 0.18501325999842583, "learning_rate": 0.0004387598561090337, "loss": 2.9424381256103516, "step": 9084, "token_acc": 0.30765649332926615 }, { "epoch": 5.325417766051012, "grad_norm": 0.2243954211447583, "learning_rate": 0.0004387439679703199, "loss": 3.0208492279052734, "step": 9085, "token_acc": 0.29631062516065876 }, { "epoch": 5.32600410436822, "grad_norm": 0.2142991068401102, "learning_rate": 0.00043872807805861084, "loss": 3.006753921508789, "step": 9086, "token_acc": 0.29839390276938466 }, { "epoch": 5.326590442685429, "grad_norm": 0.18678241223278796, "learning_rate": 0.00043871218637405574, "loss": 2.9700675010681152, "step": 9087, "token_acc": 0.304498306944872 }, { "epoch": 5.327176781002638, "grad_norm": 0.21918982552121088, "learning_rate": 0.00043869629291680396, "loss": 3.0078067779541016, "step": 9088, "token_acc": 0.29691341953773615 }, { "epoch": 5.327763119319847, "grad_norm": 0.1647629923234581, "learning_rate": 0.00043868039768700485, "loss": 2.9874019622802734, "step": 9089, "token_acc": 0.2998272637870504 }, { "epoch": 5.3283494576370565, "grad_norm": 0.23216085744773965, "learning_rate": 0.0004386645006848076, "loss": 2.9952993392944336, "step": 9090, "token_acc": 0.2996796118014837 }, { "epoch": 5.328935795954266, "grad_norm": 0.1845954321114459, "learning_rate": 0.0004386486019103616, "loss": 3.0147762298583984, "step": 9091, "token_acc": 0.29721920606584834 }, { "epoch": 5.329522134271475, "grad_norm": 0.18160347893725096, "learning_rate": 0.00043863270136381614, "loss": 2.9723920822143555, "step": 9092, "token_acc": 0.3020542979134778 }, { "epoch": 5.330108472588684, "grad_norm": 0.1921174722640265, "learning_rate": 0.0004386167990453207, "loss": 3.005222797393799, "step": 9093, "token_acc": 0.2964201099499321 }, { "epoch": 5.330694810905893, "grad_norm": 0.16919726479823421, "learning_rate": 0.00043860089495502457, "loss": 2.9306654930114746, "step": 9094, "token_acc": 0.30764397263842985 }, { "epoch": 5.331281149223102, "grad_norm": 0.22246804601587652, "learning_rate": 0.0004385849890930772, "loss": 3.024413585662842, "step": 9095, "token_acc": 0.29546120748317123 }, { "epoch": 5.331867487540311, "grad_norm": 0.16348867951802154, "learning_rate": 0.0004385690814596279, "loss": 3.0298447608947754, "step": 9096, "token_acc": 0.29299322607959355 }, { "epoch": 5.33245382585752, "grad_norm": 0.19417277952104597, "learning_rate": 0.00043855317205482633, "loss": 2.9752049446105957, "step": 9097, "token_acc": 0.3016191123767261 }, { "epoch": 5.3330401641747285, "grad_norm": 0.17432869043149182, "learning_rate": 0.00043853726087882166, "loss": 2.951829433441162, "step": 9098, "token_acc": 0.3064632413199307 }, { "epoch": 5.333626502491938, "grad_norm": 0.19529975466077387, "learning_rate": 0.00043852134793176357, "loss": 2.9743847846984863, "step": 9099, "token_acc": 0.30033352478260433 }, { "epoch": 5.334212840809147, "grad_norm": 0.15902717560952997, "learning_rate": 0.00043850543321380137, "loss": 2.9616200923919678, "step": 9100, "token_acc": 0.3040839599258503 }, { "epoch": 5.334799179126356, "grad_norm": 0.1711795980729838, "learning_rate": 0.00043848951672508467, "loss": 2.9648969173431396, "step": 9101, "token_acc": 0.3021213100127679 }, { "epoch": 5.335385517443565, "grad_norm": 0.17683121016681247, "learning_rate": 0.00043847359846576294, "loss": 2.9416234493255615, "step": 9102, "token_acc": 0.3071028697900199 }, { "epoch": 5.335971855760774, "grad_norm": 0.17082426975900597, "learning_rate": 0.00043845767843598573, "loss": 2.97518253326416, "step": 9103, "token_acc": 0.3020241852036798 }, { "epoch": 5.336558194077983, "grad_norm": 0.17732045007247624, "learning_rate": 0.0004384417566359026, "loss": 2.9920921325683594, "step": 9104, "token_acc": 0.29968072609712754 }, { "epoch": 5.337144532395192, "grad_norm": 0.18732295502292526, "learning_rate": 0.0004384258330656631, "loss": 3.0182032585144043, "step": 9105, "token_acc": 0.2962369624790306 }, { "epoch": 5.337730870712401, "grad_norm": 0.16235865257575285, "learning_rate": 0.0004384099077254168, "loss": 2.997974395751953, "step": 9106, "token_acc": 0.2998850106481171 }, { "epoch": 5.3383172090296105, "grad_norm": 0.17724697534035994, "learning_rate": 0.0004383939806153133, "loss": 2.9932146072387695, "step": 9107, "token_acc": 0.2999349755201958 }, { "epoch": 5.338903547346819, "grad_norm": 0.1505514019047103, "learning_rate": 0.0004383780517355023, "loss": 3.039780616760254, "step": 9108, "token_acc": 0.29069773432106905 }, { "epoch": 5.339489885664028, "grad_norm": 0.1787163038516192, "learning_rate": 0.0004383621210861332, "loss": 2.9905641078948975, "step": 9109, "token_acc": 0.30066747652764636 }, { "epoch": 5.340076223981237, "grad_norm": 0.16815565094377585, "learning_rate": 0.0004383461886673558, "loss": 2.951796531677246, "step": 9110, "token_acc": 0.30569369168016985 }, { "epoch": 5.340662562298446, "grad_norm": 0.16998321049608223, "learning_rate": 0.00043833025447931985, "loss": 3.0325381755828857, "step": 9111, "token_acc": 0.2933731175803017 }, { "epoch": 5.341248900615655, "grad_norm": 0.17016929803646744, "learning_rate": 0.00043831431852217487, "loss": 2.99227237701416, "step": 9112, "token_acc": 0.2998074009398834 }, { "epoch": 5.341835238932864, "grad_norm": 0.17890421546780835, "learning_rate": 0.00043829838079607067, "loss": 2.996424674987793, "step": 9113, "token_acc": 0.29883602976948315 }, { "epoch": 5.342421577250073, "grad_norm": 0.17656611181399984, "learning_rate": 0.00043828244130115693, "loss": 2.9825751781463623, "step": 9114, "token_acc": 0.29957749741094036 }, { "epoch": 5.3430079155672825, "grad_norm": 0.1835139930759743, "learning_rate": 0.00043826650003758326, "loss": 3.011472463607788, "step": 9115, "token_acc": 0.2957167065049242 }, { "epoch": 5.343594253884492, "grad_norm": 0.16232520937558356, "learning_rate": 0.00043825055700549967, "loss": 2.989093542098999, "step": 9116, "token_acc": 0.3002224371635333 }, { "epoch": 5.344180592201701, "grad_norm": 0.18884799020404452, "learning_rate": 0.00043823461220505566, "loss": 2.939085006713867, "step": 9117, "token_acc": 0.3080376668475884 }, { "epoch": 5.34476693051891, "grad_norm": 0.16289743138637722, "learning_rate": 0.0004382186656364011, "loss": 2.998645305633545, "step": 9118, "token_acc": 0.2987240533465257 }, { "epoch": 5.345353268836118, "grad_norm": 0.18348018468514293, "learning_rate": 0.0004382027172996859, "loss": 2.973112106323242, "step": 9119, "token_acc": 0.3037733776398539 }, { "epoch": 5.345939607153327, "grad_norm": 0.17111898173355553, "learning_rate": 0.0004381867671950597, "loss": 3.0142009258270264, "step": 9120, "token_acc": 0.295798492559428 }, { "epoch": 5.346525945470536, "grad_norm": 0.18493794820902176, "learning_rate": 0.00043817081532267243, "loss": 3.020679473876953, "step": 9121, "token_acc": 0.29420273558095317 }, { "epoch": 5.347112283787745, "grad_norm": 0.1900104764779965, "learning_rate": 0.00043815486168267395, "loss": 3.007984161376953, "step": 9122, "token_acc": 0.2980666413277588 }, { "epoch": 5.3476986221049545, "grad_norm": 0.20891142572759944, "learning_rate": 0.000438138906275214, "loss": 2.977004051208496, "step": 9123, "token_acc": 0.30262108736331444 }, { "epoch": 5.348284960422164, "grad_norm": 0.18171597979722, "learning_rate": 0.00043812294910044265, "loss": 2.9668478965759277, "step": 9124, "token_acc": 0.3027530770626728 }, { "epoch": 5.348871298739373, "grad_norm": 0.1819966298816744, "learning_rate": 0.0004381069901585096, "loss": 3.017306327819824, "step": 9125, "token_acc": 0.2964429197870244 }, { "epoch": 5.349457637056582, "grad_norm": 0.19670138542979698, "learning_rate": 0.0004380910294495649, "loss": 2.9684526920318604, "step": 9126, "token_acc": 0.3017075833273572 }, { "epoch": 5.350043975373791, "grad_norm": 0.24275647021977537, "learning_rate": 0.0004380750669737584, "loss": 3.0019752979278564, "step": 9127, "token_acc": 0.29845676635320423 }, { "epoch": 5.350630313691, "grad_norm": 0.34544412020102333, "learning_rate": 0.00043805910273124016, "loss": 3.029719829559326, "step": 9128, "token_acc": 0.2941714598740071 }, { "epoch": 5.351216652008208, "grad_norm": 0.39450054477024726, "learning_rate": 0.00043804313672216, "loss": 3.0041685104370117, "step": 9129, "token_acc": 0.2987318289719459 }, { "epoch": 5.351802990325417, "grad_norm": 0.21108597823311917, "learning_rate": 0.000438027168946668, "loss": 2.9998574256896973, "step": 9130, "token_acc": 0.2997330046117385 }, { "epoch": 5.3523893286426265, "grad_norm": 0.24200172827657124, "learning_rate": 0.0004380111994049141, "loss": 3.0410192012786865, "step": 9131, "token_acc": 0.29370106621552505 }, { "epoch": 5.352975666959836, "grad_norm": 0.17676015463625935, "learning_rate": 0.0004379952280970483, "loss": 2.993969440460205, "step": 9132, "token_acc": 0.29985223415532547 }, { "epoch": 5.353562005277045, "grad_norm": 0.2422420240392483, "learning_rate": 0.0004379792550232207, "loss": 2.9978816509246826, "step": 9133, "token_acc": 0.297968403325403 }, { "epoch": 5.354148343594254, "grad_norm": 0.1955019983002007, "learning_rate": 0.0004379632801835813, "loss": 2.9947316646575928, "step": 9134, "token_acc": 0.2980802568903255 }, { "epoch": 5.354734681911463, "grad_norm": 0.21153221776027153, "learning_rate": 0.0004379473035782802, "loss": 2.975295066833496, "step": 9135, "token_acc": 0.30332398493205254 }, { "epoch": 5.355321020228672, "grad_norm": 0.17094250122412796, "learning_rate": 0.00043793132520746737, "loss": 2.965813636779785, "step": 9136, "token_acc": 0.30223850309106665 }, { "epoch": 5.355907358545881, "grad_norm": 0.24498599400090912, "learning_rate": 0.000437915345071293, "loss": 2.9655113220214844, "step": 9137, "token_acc": 0.30281790319034746 }, { "epoch": 5.35649369686309, "grad_norm": 0.1742716541909434, "learning_rate": 0.0004378993631699072, "loss": 3.0123283863067627, "step": 9138, "token_acc": 0.2960446721836221 }, { "epoch": 5.357080035180299, "grad_norm": 0.202940611950018, "learning_rate": 0.00043788337950346004, "loss": 3.05010986328125, "step": 9139, "token_acc": 0.29201542621379095 }, { "epoch": 5.3576663734975085, "grad_norm": 0.18536594269826112, "learning_rate": 0.00043786739407210176, "loss": 3.0031495094299316, "step": 9140, "token_acc": 0.29814443910227567 }, { "epoch": 5.358252711814717, "grad_norm": 0.19895898318579572, "learning_rate": 0.00043785140687598246, "loss": 3.028254747390747, "step": 9141, "token_acc": 0.2948439971989522 }, { "epoch": 5.358839050131926, "grad_norm": 0.198364777569017, "learning_rate": 0.0004378354179152523, "loss": 2.9692914485931396, "step": 9142, "token_acc": 0.3033963210440252 }, { "epoch": 5.359425388449135, "grad_norm": 0.2046162409832557, "learning_rate": 0.0004378194271900615, "loss": 3.018787384033203, "step": 9143, "token_acc": 0.2963541029636208 }, { "epoch": 5.360011726766344, "grad_norm": 0.18350965376498618, "learning_rate": 0.0004378034347005603, "loss": 3.045400619506836, "step": 9144, "token_acc": 0.29270711800525673 }, { "epoch": 5.360598065083553, "grad_norm": 0.21239366422091696, "learning_rate": 0.00043778744044689887, "loss": 3.012734889984131, "step": 9145, "token_acc": 0.2966786874559734 }, { "epoch": 5.361184403400762, "grad_norm": 0.1785624495469117, "learning_rate": 0.00043777144442922746, "loss": 3.0375757217407227, "step": 9146, "token_acc": 0.2933921447434961 }, { "epoch": 5.361770741717971, "grad_norm": 0.1869701850839567, "learning_rate": 0.0004377554466476964, "loss": 2.9764602184295654, "step": 9147, "token_acc": 0.30308116970283044 }, { "epoch": 5.3623570800351805, "grad_norm": 0.20678921973343722, "learning_rate": 0.00043773944710245595, "loss": 3.0103230476379395, "step": 9148, "token_acc": 0.2969260637850056 }, { "epoch": 5.36294341835239, "grad_norm": 0.21617520499976442, "learning_rate": 0.0004377234457936563, "loss": 3.0311481952667236, "step": 9149, "token_acc": 0.29324328270640365 }, { "epoch": 5.363529756669599, "grad_norm": 0.1700281388531636, "learning_rate": 0.0004377074427214479, "loss": 3.042182207107544, "step": 9150, "token_acc": 0.2936014227689631 }, { "epoch": 5.364116094986807, "grad_norm": 0.24197773679398205, "learning_rate": 0.000437691437885981, "loss": 3.01670241355896, "step": 9151, "token_acc": 0.29751541832190337 }, { "epoch": 5.364702433304016, "grad_norm": 0.16192020141594574, "learning_rate": 0.00043767543128740596, "loss": 3.010223388671875, "step": 9152, "token_acc": 0.2980938225529976 }, { "epoch": 5.365288771621225, "grad_norm": 0.2247503009897838, "learning_rate": 0.0004376594229258731, "loss": 2.9473280906677246, "step": 9153, "token_acc": 0.3057339142986982 }, { "epoch": 5.365875109938434, "grad_norm": 0.15013193040836287, "learning_rate": 0.00043764341280153285, "loss": 2.9669604301452637, "step": 9154, "token_acc": 0.3040245085308132 }, { "epoch": 5.366461448255643, "grad_norm": 0.20336417158191517, "learning_rate": 0.00043762740091453557, "loss": 3.0124599933624268, "step": 9155, "token_acc": 0.2958797422604289 }, { "epoch": 5.3670477865728525, "grad_norm": 0.21391847122053625, "learning_rate": 0.00043761138726503175, "loss": 3.00254487991333, "step": 9156, "token_acc": 0.29970478623173236 }, { "epoch": 5.367634124890062, "grad_norm": 0.16388504604040585, "learning_rate": 0.0004375953718531717, "loss": 2.9890682697296143, "step": 9157, "token_acc": 0.2995357698004386 }, { "epoch": 5.368220463207271, "grad_norm": 0.19317133239185563, "learning_rate": 0.00043757935467910597, "loss": 2.9956259727478027, "step": 9158, "token_acc": 0.2983599585337976 }, { "epoch": 5.36880680152448, "grad_norm": 0.16795272970031677, "learning_rate": 0.0004375633357429849, "loss": 3.0120320320129395, "step": 9159, "token_acc": 0.2968905277472671 }, { "epoch": 5.369393139841689, "grad_norm": 0.18996447229060334, "learning_rate": 0.00043754731504495913, "loss": 2.9786884784698486, "step": 9160, "token_acc": 0.3021710997824595 }, { "epoch": 5.369979478158898, "grad_norm": 0.16044170495543397, "learning_rate": 0.00043753129258517897, "loss": 2.9795079231262207, "step": 9161, "token_acc": 0.30150728234532703 }, { "epoch": 5.370565816476106, "grad_norm": 0.17310262078471453, "learning_rate": 0.0004375152683637951, "loss": 2.946756362915039, "step": 9162, "token_acc": 0.30599011363727113 }, { "epoch": 5.371152154793315, "grad_norm": 0.18744979720332647, "learning_rate": 0.00043749924238095796, "loss": 3.028874397277832, "step": 9163, "token_acc": 0.29653462504681277 }, { "epoch": 5.3717384931105245, "grad_norm": 0.16666887544390915, "learning_rate": 0.000437483214636818, "loss": 2.978790521621704, "step": 9164, "token_acc": 0.30110063696872413 }, { "epoch": 5.372324831427734, "grad_norm": 0.16065214634098549, "learning_rate": 0.00043746718513152597, "loss": 2.9845144748687744, "step": 9165, "token_acc": 0.3002206268247571 }, { "epoch": 5.372911169744943, "grad_norm": 0.16518584922473464, "learning_rate": 0.0004374511538652323, "loss": 3.0337395668029785, "step": 9166, "token_acc": 0.2941255475659145 }, { "epoch": 5.373497508062152, "grad_norm": 0.15247452937110031, "learning_rate": 0.0004374351208380876, "loss": 2.99542236328125, "step": 9167, "token_acc": 0.29960084465063375 }, { "epoch": 5.374083846379361, "grad_norm": 0.18220169511160905, "learning_rate": 0.0004374190860502426, "loss": 2.979783058166504, "step": 9168, "token_acc": 0.3013378975149599 }, { "epoch": 5.37467018469657, "grad_norm": 0.17866960364153894, "learning_rate": 0.00043740304950184777, "loss": 3.079359531402588, "step": 9169, "token_acc": 0.2873769296219764 }, { "epoch": 5.375256523013779, "grad_norm": 0.17465333989636056, "learning_rate": 0.0004373870111930539, "loss": 2.9830048084259033, "step": 9170, "token_acc": 0.3008378983902508 }, { "epoch": 5.375842861330988, "grad_norm": 0.15919752440073343, "learning_rate": 0.00043737097112401147, "loss": 3.0141806602478027, "step": 9171, "token_acc": 0.29873897667532023 }, { "epoch": 5.3764291996481965, "grad_norm": 0.16926520412220503, "learning_rate": 0.0004373549292948713, "loss": 2.9947144985198975, "step": 9172, "token_acc": 0.2982845060774494 }, { "epoch": 5.377015537965406, "grad_norm": 0.175301236507858, "learning_rate": 0.000437338885705784, "loss": 3.015355110168457, "step": 9173, "token_acc": 0.2939951699110557 }, { "epoch": 5.377601876282615, "grad_norm": 0.22062452549267494, "learning_rate": 0.00043732284035690036, "loss": 3.0321760177612305, "step": 9174, "token_acc": 0.2948699058525854 }, { "epoch": 5.378188214599824, "grad_norm": 0.2028817041924162, "learning_rate": 0.00043730679324837106, "loss": 3.0003926753997803, "step": 9175, "token_acc": 0.29704682071361604 }, { "epoch": 5.378774552917033, "grad_norm": 0.178409406933029, "learning_rate": 0.00043729074438034676, "loss": 3.047703981399536, "step": 9176, "token_acc": 0.292749488798301 }, { "epoch": 5.379360891234242, "grad_norm": 0.27469746800952244, "learning_rate": 0.0004372746937529783, "loss": 3.012335777282715, "step": 9177, "token_acc": 0.29676354130824417 }, { "epoch": 5.379947229551451, "grad_norm": 0.340410924710611, "learning_rate": 0.00043725864136641657, "loss": 3.0225911140441895, "step": 9178, "token_acc": 0.29576936411100246 }, { "epoch": 5.38053356786866, "grad_norm": 0.16660309872293755, "learning_rate": 0.00043724258722081214, "loss": 3.0373167991638184, "step": 9179, "token_acc": 0.29349801653153795 }, { "epoch": 5.381119906185869, "grad_norm": 0.30377431415155953, "learning_rate": 0.00043722653131631595, "loss": 3.007723569869995, "step": 9180, "token_acc": 0.2962644766219692 }, { "epoch": 5.3817062445030786, "grad_norm": 0.18731345524247506, "learning_rate": 0.0004372104736530788, "loss": 3.002624750137329, "step": 9181, "token_acc": 0.29847645255169675 }, { "epoch": 5.382292582820288, "grad_norm": 0.22668001439788774, "learning_rate": 0.00043719441423125153, "loss": 3.0065250396728516, "step": 9182, "token_acc": 0.2991036932002966 }, { "epoch": 5.382878921137497, "grad_norm": 0.16166152827845828, "learning_rate": 0.00043717835305098486, "loss": 2.981966972351074, "step": 9183, "token_acc": 0.30158700808824696 }, { "epoch": 5.383465259454705, "grad_norm": 0.20781390419986828, "learning_rate": 0.0004371622901124299, "loss": 3.029080390930176, "step": 9184, "token_acc": 0.295977357581597 }, { "epoch": 5.384051597771914, "grad_norm": 0.15715379094598086, "learning_rate": 0.0004371462254157375, "loss": 2.988495349884033, "step": 9185, "token_acc": 0.29801257540319254 }, { "epoch": 5.384637936089123, "grad_norm": 0.17328543448150105, "learning_rate": 0.0004371301589610583, "loss": 3.0033321380615234, "step": 9186, "token_acc": 0.2986444277376902 }, { "epoch": 5.385224274406332, "grad_norm": 0.1910908168284505, "learning_rate": 0.00043711409074854356, "loss": 3.0306572914123535, "step": 9187, "token_acc": 0.2947030138463025 }, { "epoch": 5.3858106127235414, "grad_norm": 0.20530176067485745, "learning_rate": 0.00043709802077834397, "loss": 3.0185861587524414, "step": 9188, "token_acc": 0.29735392132131333 }, { "epoch": 5.386396951040751, "grad_norm": 0.19188222254927378, "learning_rate": 0.0004370819490506107, "loss": 3.0221943855285645, "step": 9189, "token_acc": 0.2960132166587996 }, { "epoch": 5.38698328935796, "grad_norm": 0.1918398241835136, "learning_rate": 0.0004370658755654946, "loss": 2.975102663040161, "step": 9190, "token_acc": 0.3022940654083272 }, { "epoch": 5.387569627675169, "grad_norm": 0.20112075610067767, "learning_rate": 0.00043704980032314663, "loss": 3.000765323638916, "step": 9191, "token_acc": 0.29835716546307217 }, { "epoch": 5.388155965992378, "grad_norm": 0.1801763373500616, "learning_rate": 0.00043703372332371784, "loss": 3.014225482940674, "step": 9192, "token_acc": 0.2973812137368669 }, { "epoch": 5.388742304309587, "grad_norm": 0.20236369849502156, "learning_rate": 0.0004370176445673593, "loss": 3.0275163650512695, "step": 9193, "token_acc": 0.29612129906852963 }, { "epoch": 5.389328642626795, "grad_norm": 0.16419626339382143, "learning_rate": 0.0004370015640542219, "loss": 3.0069777965545654, "step": 9194, "token_acc": 0.2981873425610928 }, { "epoch": 5.389914980944004, "grad_norm": 0.20727541648946754, "learning_rate": 0.00043698548178445687, "loss": 3.025744676589966, "step": 9195, "token_acc": 0.29461327099195256 }, { "epoch": 5.3905013192612135, "grad_norm": 0.19665410647308468, "learning_rate": 0.0004369693977582152, "loss": 3.0095314979553223, "step": 9196, "token_acc": 0.2959869262238341 }, { "epoch": 5.391087657578423, "grad_norm": 0.17692770078525114, "learning_rate": 0.000436953311975648, "loss": 3.0164284706115723, "step": 9197, "token_acc": 0.29699208802012245 }, { "epoch": 5.391673995895632, "grad_norm": 0.19368590935492458, "learning_rate": 0.0004369372244369063, "loss": 2.9869956970214844, "step": 9198, "token_acc": 0.3004770146481489 }, { "epoch": 5.392260334212841, "grad_norm": 0.17048860776292715, "learning_rate": 0.00043692113514214135, "loss": 2.9936423301696777, "step": 9199, "token_acc": 0.2990571415572285 }, { "epoch": 5.39284667253005, "grad_norm": 0.21351871790536314, "learning_rate": 0.0004369050440915042, "loss": 3.000220537185669, "step": 9200, "token_acc": 0.2978976453318129 }, { "epoch": 5.393433010847259, "grad_norm": 0.18031504774370866, "learning_rate": 0.00043688895128514595, "loss": 3.0250191688537598, "step": 9201, "token_acc": 0.296540668764597 }, { "epoch": 5.394019349164468, "grad_norm": 0.2072695184927117, "learning_rate": 0.00043687285672321785, "loss": 3.021432876586914, "step": 9202, "token_acc": 0.29552462494240594 }, { "epoch": 5.394605687481677, "grad_norm": 0.16515999631271344, "learning_rate": 0.00043685676040587114, "loss": 2.9758167266845703, "step": 9203, "token_acc": 0.3010076313828045 }, { "epoch": 5.395192025798886, "grad_norm": 0.2128930276757322, "learning_rate": 0.0004368406623332569, "loss": 2.9760899543762207, "step": 9204, "token_acc": 0.30306753460165803 }, { "epoch": 5.395778364116095, "grad_norm": 0.1716019996462372, "learning_rate": 0.00043682456250552647, "loss": 2.9838485717773438, "step": 9205, "token_acc": 0.29959297827710973 }, { "epoch": 5.396364702433304, "grad_norm": 0.21367083266072395, "learning_rate": 0.000436808460922831, "loss": 2.982295036315918, "step": 9206, "token_acc": 0.3008408499443801 }, { "epoch": 5.396951040750513, "grad_norm": 0.23771484930629602, "learning_rate": 0.0004367923575853218, "loss": 3.012080192565918, "step": 9207, "token_acc": 0.29722584798267776 }, { "epoch": 5.397537379067722, "grad_norm": 0.15524494049981094, "learning_rate": 0.00043677625249315, "loss": 2.9758782386779785, "step": 9208, "token_acc": 0.3019443158692274 }, { "epoch": 5.398123717384931, "grad_norm": 0.24224484263508103, "learning_rate": 0.00043676014564646707, "loss": 3.020840644836426, "step": 9209, "token_acc": 0.2950101693223363 }, { "epoch": 5.39871005570214, "grad_norm": 0.16731381304731427, "learning_rate": 0.0004367440370454242, "loss": 2.9955849647521973, "step": 9210, "token_acc": 0.2996279032548463 }, { "epoch": 5.399296394019349, "grad_norm": 0.18402064897074494, "learning_rate": 0.0004367279266901728, "loss": 2.963852882385254, "step": 9211, "token_acc": 0.3025639309316809 }, { "epoch": 5.399882732336558, "grad_norm": 0.1717535595453676, "learning_rate": 0.0004367118145808641, "loss": 3.0061962604522705, "step": 9212, "token_acc": 0.29711706331592863 }, { "epoch": 5.4004690706537675, "grad_norm": 0.16538817031606068, "learning_rate": 0.0004366957007176495, "loss": 3.008577823638916, "step": 9213, "token_acc": 0.2990590505753934 }, { "epoch": 5.401055408970977, "grad_norm": 0.17236010616899708, "learning_rate": 0.0004366795851006804, "loss": 2.997645854949951, "step": 9214, "token_acc": 0.29857436119621616 }, { "epoch": 5.401641747288186, "grad_norm": 0.16176818007024776, "learning_rate": 0.0004366634677301081, "loss": 2.9997642040252686, "step": 9215, "token_acc": 0.29937201117446943 }, { "epoch": 5.402228085605394, "grad_norm": 0.19191844251632073, "learning_rate": 0.00043664734860608407, "loss": 3.0136168003082275, "step": 9216, "token_acc": 0.2968503577640633 }, { "epoch": 5.402814423922603, "grad_norm": 0.17072337754377945, "learning_rate": 0.0004366312277287597, "loss": 3.0065536499023438, "step": 9217, "token_acc": 0.2967459919107903 }, { "epoch": 5.403400762239812, "grad_norm": 0.2413245608773218, "learning_rate": 0.0004366151050982865, "loss": 3.002647638320923, "step": 9218, "token_acc": 0.2985527169969832 }, { "epoch": 5.403987100557021, "grad_norm": 0.1689021934084639, "learning_rate": 0.0004365989807148158, "loss": 2.9973690509796143, "step": 9219, "token_acc": 0.2985785132733975 }, { "epoch": 5.40457343887423, "grad_norm": 0.19824066540803134, "learning_rate": 0.0004365828545784991, "loss": 2.9683022499084473, "step": 9220, "token_acc": 0.3031374600344811 }, { "epoch": 5.4051597771914395, "grad_norm": 0.17914715084163957, "learning_rate": 0.00043656672668948793, "loss": 2.9808239936828613, "step": 9221, "token_acc": 0.30219912444637875 }, { "epoch": 5.405746115508649, "grad_norm": 0.1944230219706418, "learning_rate": 0.0004365505970479338, "loss": 2.983018398284912, "step": 9222, "token_acc": 0.3016974100403996 }, { "epoch": 5.406332453825858, "grad_norm": 0.20422988992206242, "learning_rate": 0.0004365344656539881, "loss": 2.992086410522461, "step": 9223, "token_acc": 0.29913463095169707 }, { "epoch": 5.406918792143067, "grad_norm": 0.17372542947531716, "learning_rate": 0.0004365183325078026, "loss": 3.002697467803955, "step": 9224, "token_acc": 0.29948867786705624 }, { "epoch": 5.407505130460276, "grad_norm": 0.17849456607175218, "learning_rate": 0.0004365021976095286, "loss": 3.034900188446045, "step": 9225, "token_acc": 0.292770246871442 }, { "epoch": 5.408091468777485, "grad_norm": 0.1730381274158474, "learning_rate": 0.00043648606095931776, "loss": 2.9906253814697266, "step": 9226, "token_acc": 0.3000059074482649 }, { "epoch": 5.408677807094693, "grad_norm": 0.18590836432215493, "learning_rate": 0.0004364699225573217, "loss": 2.988412380218506, "step": 9227, "token_acc": 0.30035891363871825 }, { "epoch": 5.409264145411902, "grad_norm": 0.18214320240758064, "learning_rate": 0.00043645378240369197, "loss": 3.0267510414123535, "step": 9228, "token_acc": 0.2927593511056003 }, { "epoch": 5.4098504837291115, "grad_norm": 0.17971269440319435, "learning_rate": 0.00043643764049858025, "loss": 2.9992713928222656, "step": 9229, "token_acc": 0.3002239620732642 }, { "epoch": 5.410436822046321, "grad_norm": 0.17506003836245262, "learning_rate": 0.00043642149684213806, "loss": 3.0024595260620117, "step": 9230, "token_acc": 0.2996928178128943 }, { "epoch": 5.41102316036353, "grad_norm": 0.17909626918620603, "learning_rate": 0.0004364053514345172, "loss": 3.0282022953033447, "step": 9231, "token_acc": 0.29547305613597746 }, { "epoch": 5.411609498680739, "grad_norm": 0.18218685450453048, "learning_rate": 0.00043638920427586914, "loss": 2.9802122116088867, "step": 9232, "token_acc": 0.30058711672223426 }, { "epoch": 5.412195836997948, "grad_norm": 0.22286997600339165, "learning_rate": 0.0004363730553663458, "loss": 2.9856045246124268, "step": 9233, "token_acc": 0.30046031938876067 }, { "epoch": 5.412782175315157, "grad_norm": 0.1927053476867642, "learning_rate": 0.0004363569047060986, "loss": 3.005012035369873, "step": 9234, "token_acc": 0.2979531304278118 }, { "epoch": 5.413368513632366, "grad_norm": 0.19135134004819682, "learning_rate": 0.00043634075229527947, "loss": 3.0004217624664307, "step": 9235, "token_acc": 0.2980238014280857 }, { "epoch": 5.413954851949575, "grad_norm": 0.2383198081137073, "learning_rate": 0.0004363245981340401, "loss": 3.013132095336914, "step": 9236, "token_acc": 0.2977878646143379 }, { "epoch": 5.4145411902667835, "grad_norm": 0.18864435919583616, "learning_rate": 0.0004363084422225322, "loss": 3.0213661193847656, "step": 9237, "token_acc": 0.2967907264808319 }, { "epoch": 5.415127528583993, "grad_norm": 0.21119102406146584, "learning_rate": 0.00043629228456090746, "loss": 2.998109817504883, "step": 9238, "token_acc": 0.2995388483308461 }, { "epoch": 5.415713866901202, "grad_norm": 0.2019791506724574, "learning_rate": 0.0004362761251493178, "loss": 3.0338542461395264, "step": 9239, "token_acc": 0.2938528142930139 }, { "epoch": 5.416300205218411, "grad_norm": 0.22774311915643164, "learning_rate": 0.000436259963987915, "loss": 3.043783187866211, "step": 9240, "token_acc": 0.2920801796726226 }, { "epoch": 5.41688654353562, "grad_norm": 0.30062014050764185, "learning_rate": 0.00043624380107685075, "loss": 3.0157740116119385, "step": 9241, "token_acc": 0.2968377472360541 }, { "epoch": 5.417472881852829, "grad_norm": 0.171528258582784, "learning_rate": 0.00043622763641627696, "loss": 3.025561809539795, "step": 9242, "token_acc": 0.2952885331679027 }, { "epoch": 5.418059220170038, "grad_norm": 0.224950669205509, "learning_rate": 0.0004362114700063455, "loss": 3.0040669441223145, "step": 9243, "token_acc": 0.30032943484333186 }, { "epoch": 5.418645558487247, "grad_norm": 0.17686075053443148, "learning_rate": 0.0004361953018472082, "loss": 2.9854679107666016, "step": 9244, "token_acc": 0.2999281912107064 }, { "epoch": 5.419231896804456, "grad_norm": 0.20183812832250378, "learning_rate": 0.0004361791319390169, "loss": 3.0033135414123535, "step": 9245, "token_acc": 0.2978057568941098 }, { "epoch": 5.4198182351216655, "grad_norm": 0.19114749915503423, "learning_rate": 0.0004361629602819236, "loss": 2.997903823852539, "step": 9246, "token_acc": 0.29931048208641364 }, { "epoch": 5.420404573438875, "grad_norm": 0.1834011025760006, "learning_rate": 0.00043614678687608013, "loss": 2.952251672744751, "step": 9247, "token_acc": 0.3068261844437789 }, { "epoch": 5.420990911756084, "grad_norm": 0.1850155134363094, "learning_rate": 0.00043613061172163836, "loss": 3.013467311859131, "step": 9248, "token_acc": 0.2968805583209424 }, { "epoch": 5.421577250073292, "grad_norm": 0.20399408835518085, "learning_rate": 0.00043611443481875043, "loss": 3.0461878776550293, "step": 9249, "token_acc": 0.2913973021769462 }, { "epoch": 5.422163588390501, "grad_norm": 0.20812083883917137, "learning_rate": 0.00043609825616756806, "loss": 3.0102429389953613, "step": 9250, "token_acc": 0.2973120388142738 }, { "epoch": 5.42274992670771, "grad_norm": 0.17338536210648833, "learning_rate": 0.0004360820757682434, "loss": 2.9975225925445557, "step": 9251, "token_acc": 0.29768570261789107 }, { "epoch": 5.423336265024919, "grad_norm": 0.2206571098266944, "learning_rate": 0.0004360658936209284, "loss": 3.038896083831787, "step": 9252, "token_acc": 0.293813964007289 }, { "epoch": 5.423922603342128, "grad_norm": 0.19092589515622527, "learning_rate": 0.00043604970972577504, "loss": 2.994692325592041, "step": 9253, "token_acc": 0.2983439760205509 }, { "epoch": 5.4245089416593375, "grad_norm": 0.2121925931217304, "learning_rate": 0.00043603352408293537, "loss": 2.9860029220581055, "step": 9254, "token_acc": 0.2991710724185089 }, { "epoch": 5.425095279976547, "grad_norm": 0.18318351864060545, "learning_rate": 0.0004360173366925614, "loss": 3.019348621368408, "step": 9255, "token_acc": 0.29682057232220554 }, { "epoch": 5.425681618293756, "grad_norm": 0.23123159053509515, "learning_rate": 0.0004360011475548052, "loss": 3.001685619354248, "step": 9256, "token_acc": 0.2986617809084044 }, { "epoch": 5.426267956610965, "grad_norm": 0.16810121788417504, "learning_rate": 0.00043598495666981893, "loss": 3.029845714569092, "step": 9257, "token_acc": 0.29408669733687554 }, { "epoch": 5.426854294928174, "grad_norm": 0.24424652568678923, "learning_rate": 0.0004359687640377545, "loss": 2.9915499687194824, "step": 9258, "token_acc": 0.29974120752568517 }, { "epoch": 5.427440633245382, "grad_norm": 0.1731489771855594, "learning_rate": 0.00043595256965876425, "loss": 3.022756338119507, "step": 9259, "token_acc": 0.29660911424733677 }, { "epoch": 5.428026971562591, "grad_norm": 0.22888904374953642, "learning_rate": 0.0004359363735330001, "loss": 3.0366921424865723, "step": 9260, "token_acc": 0.2936887750226628 }, { "epoch": 5.4286133098798, "grad_norm": 0.1722030270620727, "learning_rate": 0.0004359201756606143, "loss": 2.9952399730682373, "step": 9261, "token_acc": 0.299448297134879 }, { "epoch": 5.4291996481970095, "grad_norm": 0.19550145150084677, "learning_rate": 0.00043590397604175904, "loss": 3.012085437774658, "step": 9262, "token_acc": 0.2988688559383098 }, { "epoch": 5.429785986514219, "grad_norm": 0.15268101380819202, "learning_rate": 0.00043588777467658637, "loss": 3.0059452056884766, "step": 9263, "token_acc": 0.29804369193696695 }, { "epoch": 5.430372324831428, "grad_norm": 0.21377781914596058, "learning_rate": 0.0004358715715652485, "loss": 2.9617834091186523, "step": 9264, "token_acc": 0.30397697784974365 }, { "epoch": 5.430958663148637, "grad_norm": 0.15815124395461022, "learning_rate": 0.00043585536670789774, "loss": 3.0124738216400146, "step": 9265, "token_acc": 0.29803631728485314 }, { "epoch": 5.431545001465846, "grad_norm": 0.28691000049356535, "learning_rate": 0.0004358391601046863, "loss": 3.0200634002685547, "step": 9266, "token_acc": 0.2946043860005691 }, { "epoch": 5.432131339783055, "grad_norm": 0.21717247195790618, "learning_rate": 0.00043582295175576626, "loss": 3.0279107093811035, "step": 9267, "token_acc": 0.2937962939555578 }, { "epoch": 5.432717678100264, "grad_norm": 0.18031065853372505, "learning_rate": 0.00043580674166129006, "loss": 2.976165771484375, "step": 9268, "token_acc": 0.3014500146659355 }, { "epoch": 5.433304016417473, "grad_norm": 0.18264851342714286, "learning_rate": 0.00043579052982140986, "loss": 3.009168863296509, "step": 9269, "token_acc": 0.2982893652102226 }, { "epoch": 5.4338903547346815, "grad_norm": 0.171081317370748, "learning_rate": 0.000435774316236278, "loss": 2.9766793251037598, "step": 9270, "token_acc": 0.3010177881207117 }, { "epoch": 5.434476693051891, "grad_norm": 0.17820552169061585, "learning_rate": 0.00043575810090604677, "loss": 2.9695844650268555, "step": 9271, "token_acc": 0.30336865909660626 }, { "epoch": 5.4350630313691, "grad_norm": 0.16766333512772605, "learning_rate": 0.00043574188383086853, "loss": 3.061079740524292, "step": 9272, "token_acc": 0.29130429143916897 }, { "epoch": 5.435649369686309, "grad_norm": 0.18019955424352724, "learning_rate": 0.00043572566501089556, "loss": 3.0243349075317383, "step": 9273, "token_acc": 0.29577893275590106 }, { "epoch": 5.436235708003518, "grad_norm": 0.1700494382420953, "learning_rate": 0.00043570944444628023, "loss": 3.0157384872436523, "step": 9274, "token_acc": 0.2970691676436108 }, { "epoch": 5.436822046320727, "grad_norm": 0.19469236522996472, "learning_rate": 0.0004356932221371749, "loss": 3.0311927795410156, "step": 9275, "token_acc": 0.29395106305827795 }, { "epoch": 5.437408384637936, "grad_norm": 0.1739583651296284, "learning_rate": 0.000435676998083732, "loss": 3.0042123794555664, "step": 9276, "token_acc": 0.2994384576441051 }, { "epoch": 5.437994722955145, "grad_norm": 0.16191266215786504, "learning_rate": 0.0004356607722861039, "loss": 3.0321288108825684, "step": 9277, "token_acc": 0.29488933810190227 }, { "epoch": 5.438581061272354, "grad_norm": 0.19423654784793112, "learning_rate": 0.000435644544744443, "loss": 3.0031025409698486, "step": 9278, "token_acc": 0.2970747732835048 }, { "epoch": 5.4391673995895635, "grad_norm": 0.22403544521636995, "learning_rate": 0.0004356283154589018, "loss": 3.0001492500305176, "step": 9279, "token_acc": 0.2995907630936788 }, { "epoch": 5.439753737906772, "grad_norm": 0.16828275422280975, "learning_rate": 0.00043561208442963276, "loss": 2.9616000652313232, "step": 9280, "token_acc": 0.30402783597179966 }, { "epoch": 5.440340076223981, "grad_norm": 0.1845331992738277, "learning_rate": 0.00043559585165678816, "loss": 3.0193986892700195, "step": 9281, "token_acc": 0.29629318273976124 }, { "epoch": 5.44092641454119, "grad_norm": 0.20789023389744837, "learning_rate": 0.0004355796171405207, "loss": 3.00126314163208, "step": 9282, "token_acc": 0.2981333807594632 }, { "epoch": 5.441512752858399, "grad_norm": 0.16954784224428396, "learning_rate": 0.00043556338088098287, "loss": 2.997490882873535, "step": 9283, "token_acc": 0.29713143051380375 }, { "epoch": 5.442099091175608, "grad_norm": 0.1997056145546008, "learning_rate": 0.00043554714287832706, "loss": 2.968294143676758, "step": 9284, "token_acc": 0.3028045737271407 }, { "epoch": 5.442685429492817, "grad_norm": 0.2205692575472107, "learning_rate": 0.0004355309031327059, "loss": 3.010143280029297, "step": 9285, "token_acc": 0.2968342817807521 }, { "epoch": 5.443271767810026, "grad_norm": 0.16775892558814662, "learning_rate": 0.0004355146616442719, "loss": 3.0132737159729004, "step": 9286, "token_acc": 0.29593930613486497 }, { "epoch": 5.4438581061272355, "grad_norm": 0.17157262877560361, "learning_rate": 0.0004354984184131776, "loss": 2.9661223888397217, "step": 9287, "token_acc": 0.303610413673653 }, { "epoch": 5.444444444444445, "grad_norm": 0.16533490426807776, "learning_rate": 0.00043548217343957564, "loss": 3.0108532905578613, "step": 9288, "token_acc": 0.297357799230755 }, { "epoch": 5.445030782761654, "grad_norm": 0.16525369286095035, "learning_rate": 0.00043546592672361864, "loss": 3.048128604888916, "step": 9289, "token_acc": 0.2936736290546731 }, { "epoch": 5.445617121078863, "grad_norm": 0.1671089805013018, "learning_rate": 0.0004354496782654591, "loss": 3.0219802856445312, "step": 9290, "token_acc": 0.29415309054544997 }, { "epoch": 5.446203459396072, "grad_norm": 0.18941501665907617, "learning_rate": 0.00043543342806524985, "loss": 3.030090093612671, "step": 9291, "token_acc": 0.29489052279625394 }, { "epoch": 5.44678979771328, "grad_norm": 0.17666970004787222, "learning_rate": 0.00043541717612314337, "loss": 3.032655715942383, "step": 9292, "token_acc": 0.29356235342628145 }, { "epoch": 5.447376136030489, "grad_norm": 0.15355495087167506, "learning_rate": 0.0004354009224392923, "loss": 2.9572701454162598, "step": 9293, "token_acc": 0.30464304107532225 }, { "epoch": 5.447962474347698, "grad_norm": 0.17084517834543655, "learning_rate": 0.0004353846670138495, "loss": 2.9812989234924316, "step": 9294, "token_acc": 0.3013412434318867 }, { "epoch": 5.4485488126649075, "grad_norm": 0.1673863729460517, "learning_rate": 0.00043536840984696745, "loss": 3.003182888031006, "step": 9295, "token_acc": 0.297100122323711 }, { "epoch": 5.449135150982117, "grad_norm": 0.1730762621250855, "learning_rate": 0.000435352150938799, "loss": 2.9701852798461914, "step": 9296, "token_acc": 0.30238794100257943 }, { "epoch": 5.449721489299326, "grad_norm": 0.16722979845107286, "learning_rate": 0.00043533589028949693, "loss": 2.96159029006958, "step": 9297, "token_acc": 0.30298906999442643 }, { "epoch": 5.450307827616535, "grad_norm": 0.20761695612468078, "learning_rate": 0.0004353196278992139, "loss": 2.9742608070373535, "step": 9298, "token_acc": 0.3024488976178739 }, { "epoch": 5.450894165933744, "grad_norm": 0.24730649567758445, "learning_rate": 0.00043530336376810267, "loss": 3.027625560760498, "step": 9299, "token_acc": 0.2932454912381992 }, { "epoch": 5.451480504250953, "grad_norm": 0.2086871442759235, "learning_rate": 0.000435287097896316, "loss": 2.963857889175415, "step": 9300, "token_acc": 0.303133837270567 }, { "epoch": 5.452066842568162, "grad_norm": 0.17552198299056931, "learning_rate": 0.0004352708302840068, "loss": 3.0172722339630127, "step": 9301, "token_acc": 0.2954539299429642 }, { "epoch": 5.45265318088537, "grad_norm": 0.20034092909944234, "learning_rate": 0.00043525456093132774, "loss": 3.028874158859253, "step": 9302, "token_acc": 0.2929356010958453 }, { "epoch": 5.4532395192025795, "grad_norm": 0.1679772025208035, "learning_rate": 0.00043523828983843175, "loss": 3.028562545776367, "step": 9303, "token_acc": 0.2949679623202518 }, { "epoch": 5.453825857519789, "grad_norm": 0.2056386046516587, "learning_rate": 0.0004352220170054716, "loss": 2.993312358856201, "step": 9304, "token_acc": 0.3008977513613443 }, { "epoch": 5.454412195836998, "grad_norm": 0.24164607982125028, "learning_rate": 0.0004352057424326002, "loss": 2.9532313346862793, "step": 9305, "token_acc": 0.303769057068378 }, { "epoch": 5.454998534154207, "grad_norm": 0.18961125269549683, "learning_rate": 0.00043518946611997047, "loss": 3.045319080352783, "step": 9306, "token_acc": 0.2936871415286535 }, { "epoch": 5.455584872471416, "grad_norm": 0.20391789495819038, "learning_rate": 0.00043517318806773523, "loss": 2.975623607635498, "step": 9307, "token_acc": 0.30348533440944714 }, { "epoch": 5.456171210788625, "grad_norm": 0.21554786432652587, "learning_rate": 0.0004351569082760474, "loss": 2.994164228439331, "step": 9308, "token_acc": 0.29931022985391875 }, { "epoch": 5.456757549105834, "grad_norm": 0.1608018041268314, "learning_rate": 0.0004351406267450598, "loss": 2.9838571548461914, "step": 9309, "token_acc": 0.3014281185485305 }, { "epoch": 5.457343887423043, "grad_norm": 0.17861351621622457, "learning_rate": 0.0004351243434749257, "loss": 3.0143494606018066, "step": 9310, "token_acc": 0.29763984332314986 }, { "epoch": 5.457930225740252, "grad_norm": 0.19405751891232503, "learning_rate": 0.0004351080584657977, "loss": 2.983546733856201, "step": 9311, "token_acc": 0.29993846798371365 }, { "epoch": 5.4585165640574616, "grad_norm": 0.16875607433881457, "learning_rate": 0.000435091771717829, "loss": 2.9874768257141113, "step": 9312, "token_acc": 0.3021902556180271 }, { "epoch": 5.45910290237467, "grad_norm": 0.18848562525728227, "learning_rate": 0.0004350754832311725, "loss": 3.001331329345703, "step": 9313, "token_acc": 0.2978758174173115 }, { "epoch": 5.459689240691879, "grad_norm": 0.1743419543391133, "learning_rate": 0.00043505919300598116, "loss": 3.0114850997924805, "step": 9314, "token_acc": 0.29753927525905527 }, { "epoch": 5.460275579009088, "grad_norm": 0.16160176014485184, "learning_rate": 0.0004350429010424082, "loss": 2.9708027839660645, "step": 9315, "token_acc": 0.303546845474579 }, { "epoch": 5.460861917326297, "grad_norm": 0.1679364862816142, "learning_rate": 0.0004350266073406065, "loss": 2.9907479286193848, "step": 9316, "token_acc": 0.3013923298783718 }, { "epoch": 5.461448255643506, "grad_norm": 0.16828796381774416, "learning_rate": 0.0004350103119007291, "loss": 2.9992048740386963, "step": 9317, "token_acc": 0.2986363540301788 }, { "epoch": 5.462034593960715, "grad_norm": 0.17432363369494414, "learning_rate": 0.0004349940147229291, "loss": 2.998342275619507, "step": 9318, "token_acc": 0.29761861223295594 }, { "epoch": 5.4626209322779244, "grad_norm": 0.15796614884459884, "learning_rate": 0.0004349777158073597, "loss": 3.0219717025756836, "step": 9319, "token_acc": 0.2963373852956559 }, { "epoch": 5.463207270595134, "grad_norm": 0.17108010254385855, "learning_rate": 0.0004349614151541739, "loss": 2.9519553184509277, "step": 9320, "token_acc": 0.3052930157880653 }, { "epoch": 5.463793608912343, "grad_norm": 0.16508200271117104, "learning_rate": 0.00043494511276352476, "loss": 2.9958677291870117, "step": 9321, "token_acc": 0.29816006516098753 }, { "epoch": 5.464379947229552, "grad_norm": 0.16089407466310768, "learning_rate": 0.00043492880863556563, "loss": 2.9480462074279785, "step": 9322, "token_acc": 0.3062120695181084 }, { "epoch": 5.464966285546761, "grad_norm": 0.17575109116089047, "learning_rate": 0.0004349125027704495, "loss": 3.0240964889526367, "step": 9323, "token_acc": 0.2949767441860465 }, { "epoch": 5.465552623863969, "grad_norm": 0.16368225472213027, "learning_rate": 0.0004348961951683295, "loss": 2.9775891304016113, "step": 9324, "token_acc": 0.302826786614448 }, { "epoch": 5.466138962181178, "grad_norm": 0.16068471993661282, "learning_rate": 0.000434879885829359, "loss": 2.9678049087524414, "step": 9325, "token_acc": 0.3033337182257559 }, { "epoch": 5.466725300498387, "grad_norm": 0.18143339984879545, "learning_rate": 0.000434863574753691, "loss": 2.999800682067871, "step": 9326, "token_acc": 0.29858578763054866 }, { "epoch": 5.4673116388155965, "grad_norm": 0.1714597427861293, "learning_rate": 0.0004348472619414789, "loss": 2.9835762977600098, "step": 9327, "token_acc": 0.3013311567624895 }, { "epoch": 5.467897977132806, "grad_norm": 0.1606308949179028, "learning_rate": 0.00043483094739287583, "loss": 3.0146384239196777, "step": 9328, "token_acc": 0.29494197503831837 }, { "epoch": 5.468484315450015, "grad_norm": 0.22327434064351578, "learning_rate": 0.0004348146311080351, "loss": 3.0092413425445557, "step": 9329, "token_acc": 0.29714063266568547 }, { "epoch": 5.469070653767224, "grad_norm": 0.20174208910688704, "learning_rate": 0.0004347983130871099, "loss": 3.0328457355499268, "step": 9330, "token_acc": 0.29422749303363666 }, { "epoch": 5.469656992084433, "grad_norm": 0.20139370258095388, "learning_rate": 0.0004347819933302536, "loss": 3.0082056522369385, "step": 9331, "token_acc": 0.2987750160054218 }, { "epoch": 5.470243330401642, "grad_norm": 0.21304896528816133, "learning_rate": 0.0004347656718376195, "loss": 3.0142111778259277, "step": 9332, "token_acc": 0.29565170440228933 }, { "epoch": 5.470829668718851, "grad_norm": 0.1671499857318402, "learning_rate": 0.00043474934860936084, "loss": 3.001624822616577, "step": 9333, "token_acc": 0.29842798187004055 }, { "epoch": 5.47141600703606, "grad_norm": 0.1771438022559278, "learning_rate": 0.000434733023645631, "loss": 3.044080972671509, "step": 9334, "token_acc": 0.29239327557689954 }, { "epoch": 5.4720023453532685, "grad_norm": 0.17616371080157178, "learning_rate": 0.0004347166969465833, "loss": 3.0142054557800293, "step": 9335, "token_acc": 0.297204885947399 }, { "epoch": 5.472588683670478, "grad_norm": 0.23854516696567812, "learning_rate": 0.0004347003685123713, "loss": 2.941039562225342, "step": 9336, "token_acc": 0.30678391695821644 }, { "epoch": 5.473175021987687, "grad_norm": 0.27139848290724333, "learning_rate": 0.00043468403834314806, "loss": 3.019939422607422, "step": 9337, "token_acc": 0.2958992674332099 }, { "epoch": 5.473761360304896, "grad_norm": 0.23103085215293517, "learning_rate": 0.00043466770643906715, "loss": 3.024759292602539, "step": 9338, "token_acc": 0.2953997514188592 }, { "epoch": 5.474347698622105, "grad_norm": 0.199254949621376, "learning_rate": 0.0004346513728002821, "loss": 2.948340892791748, "step": 9339, "token_acc": 0.3054001886337216 }, { "epoch": 5.474934036939314, "grad_norm": 0.24108422225289827, "learning_rate": 0.0004346350374269461, "loss": 2.9728355407714844, "step": 9340, "token_acc": 0.30249291726650035 }, { "epoch": 5.475520375256523, "grad_norm": 0.18333449599754734, "learning_rate": 0.0004346187003192128, "loss": 3.007983684539795, "step": 9341, "token_acc": 0.2965609322945698 }, { "epoch": 5.476106713573732, "grad_norm": 0.21960325550894513, "learning_rate": 0.00043460236147723554, "loss": 2.9771604537963867, "step": 9342, "token_acc": 0.3029555330514968 }, { "epoch": 5.476693051890941, "grad_norm": 0.21545323323924467, "learning_rate": 0.0004345860209011679, "loss": 3.042112112045288, "step": 9343, "token_acc": 0.2935750961258718 }, { "epoch": 5.4772793902081505, "grad_norm": 0.22157656146051166, "learning_rate": 0.00043456967859116323, "loss": 3.02256178855896, "step": 9344, "token_acc": 0.29564296915838995 }, { "epoch": 5.477865728525359, "grad_norm": 0.22812204607766765, "learning_rate": 0.0004345533345473753, "loss": 2.99564528465271, "step": 9345, "token_acc": 0.300691012603308 }, { "epoch": 5.478452066842568, "grad_norm": 0.16539667899202423, "learning_rate": 0.0004345369887699574, "loss": 2.991602897644043, "step": 9346, "token_acc": 0.30129104943838436 }, { "epoch": 5.479038405159777, "grad_norm": 0.19475536965149032, "learning_rate": 0.0004345206412590631, "loss": 3.02744722366333, "step": 9347, "token_acc": 0.2950667609911991 }, { "epoch": 5.479624743476986, "grad_norm": 0.1600815320060909, "learning_rate": 0.0004345042920148461, "loss": 2.9872257709503174, "step": 9348, "token_acc": 0.30130164704858464 }, { "epoch": 5.480211081794195, "grad_norm": 0.1933174638543292, "learning_rate": 0.0004344879410374598, "loss": 3.0092458724975586, "step": 9349, "token_acc": 0.29812729587577225 }, { "epoch": 5.480797420111404, "grad_norm": 0.1727257584071273, "learning_rate": 0.00043447158832705805, "loss": 3.0022435188293457, "step": 9350, "token_acc": 0.300934805340844 }, { "epoch": 5.481383758428613, "grad_norm": 0.18644676950434388, "learning_rate": 0.00043445523388379415, "loss": 3.0150766372680664, "step": 9351, "token_acc": 0.2966749374456903 }, { "epoch": 5.4819700967458225, "grad_norm": 0.15508637663972538, "learning_rate": 0.000434438877707822, "loss": 3.0035812854766846, "step": 9352, "token_acc": 0.2975214307378772 }, { "epoch": 5.482556435063032, "grad_norm": 0.169499747956723, "learning_rate": 0.00043442251979929503, "loss": 3.0251708030700684, "step": 9353, "token_acc": 0.29517203262421055 }, { "epoch": 5.483142773380241, "grad_norm": 0.15878939296820843, "learning_rate": 0.00043440616015836707, "loss": 2.9710445404052734, "step": 9354, "token_acc": 0.30166225816799275 }, { "epoch": 5.48372911169745, "grad_norm": 0.18162086682813014, "learning_rate": 0.00043438979878519167, "loss": 3.005713701248169, "step": 9355, "token_acc": 0.2981250256284086 }, { "epoch": 5.484315450014659, "grad_norm": 0.20055223048789003, "learning_rate": 0.0004343734356799226, "loss": 3.0156188011169434, "step": 9356, "token_acc": 0.2956498232795972 }, { "epoch": 5.484901788331867, "grad_norm": 0.17553405711785136, "learning_rate": 0.0004343570708427136, "loss": 2.993959426879883, "step": 9357, "token_acc": 0.29916330909708394 }, { "epoch": 5.485488126649076, "grad_norm": 0.24411861725253006, "learning_rate": 0.0004343407042737183, "loss": 3.007037878036499, "step": 9358, "token_acc": 0.29775937639362776 }, { "epoch": 5.486074464966285, "grad_norm": 0.23974016629393172, "learning_rate": 0.00043432433597309053, "loss": 2.9751949310302734, "step": 9359, "token_acc": 0.30380451212301995 }, { "epoch": 5.4866608032834945, "grad_norm": 0.18379043830839406, "learning_rate": 0.0004343079659409839, "loss": 2.967970371246338, "step": 9360, "token_acc": 0.3029283690067507 }, { "epoch": 5.487247141600704, "grad_norm": 0.18169451798087394, "learning_rate": 0.0004342915941775524, "loss": 2.987395763397217, "step": 9361, "token_acc": 0.30139185469139934 }, { "epoch": 5.487833479917913, "grad_norm": 0.16386457270493635, "learning_rate": 0.00043427522068294964, "loss": 3.0131359100341797, "step": 9362, "token_acc": 0.29625857197843486 }, { "epoch": 5.488419818235122, "grad_norm": 0.18017356074422491, "learning_rate": 0.0004342588454573295, "loss": 2.94559645652771, "step": 9363, "token_acc": 0.3063685557235271 }, { "epoch": 5.489006156552331, "grad_norm": 0.1915794965924, "learning_rate": 0.0004342424685008458, "loss": 3.0099494457244873, "step": 9364, "token_acc": 0.29770148969688 }, { "epoch": 5.48959249486954, "grad_norm": 0.16935458706535836, "learning_rate": 0.0004342260898136524, "loss": 2.9863579273223877, "step": 9365, "token_acc": 0.29992056489485847 }, { "epoch": 5.490178833186749, "grad_norm": 0.1775506097557626, "learning_rate": 0.00043420970939590315, "loss": 2.9944534301757812, "step": 9366, "token_acc": 0.300386918987335 }, { "epoch": 5.490765171503957, "grad_norm": 0.1974878024972098, "learning_rate": 0.00043419332724775183, "loss": 3.0044822692871094, "step": 9367, "token_acc": 0.29898780564857524 }, { "epoch": 5.4913515098211665, "grad_norm": 0.16881773587847546, "learning_rate": 0.00043417694336935256, "loss": 3.0309898853302, "step": 9368, "token_acc": 0.2934425913853382 }, { "epoch": 5.491937848138376, "grad_norm": 0.17724400557659958, "learning_rate": 0.00043416055776085893, "loss": 3.0080833435058594, "step": 9369, "token_acc": 0.2971438302582633 }, { "epoch": 5.492524186455585, "grad_norm": 0.15701262671466007, "learning_rate": 0.00043414417042242506, "loss": 3.0453224182128906, "step": 9370, "token_acc": 0.29231416469161803 }, { "epoch": 5.493110524772794, "grad_norm": 0.19354910230130587, "learning_rate": 0.0004341277813542049, "loss": 2.989180564880371, "step": 9371, "token_acc": 0.30065705469248777 }, { "epoch": 5.493696863090003, "grad_norm": 0.23206530054141628, "learning_rate": 0.0004341113905563523, "loss": 3.0411672592163086, "step": 9372, "token_acc": 0.29412633467530197 }, { "epoch": 5.494283201407212, "grad_norm": 0.19446118650442637, "learning_rate": 0.0004340949980290213, "loss": 3.0418009757995605, "step": 9373, "token_acc": 0.2931182966065528 }, { "epoch": 5.494869539724421, "grad_norm": 0.18023346085427694, "learning_rate": 0.00043407860377236583, "loss": 2.970451593399048, "step": 9374, "token_acc": 0.3025208310246384 }, { "epoch": 5.49545587804163, "grad_norm": 0.18846912317183515, "learning_rate": 0.0004340622077865399, "loss": 3.0217268466949463, "step": 9375, "token_acc": 0.29559524304977525 }, { "epoch": 5.496042216358839, "grad_norm": 0.18030663470292602, "learning_rate": 0.00043404581007169764, "loss": 3.004028081893921, "step": 9376, "token_acc": 0.2991030748691432 }, { "epoch": 5.4966285546760485, "grad_norm": 0.1826128650138986, "learning_rate": 0.000434029410627993, "loss": 3.0328335762023926, "step": 9377, "token_acc": 0.29389144419967406 }, { "epoch": 5.497214892993257, "grad_norm": 0.16835067970420253, "learning_rate": 0.00043401300945558, "loss": 3.0004849433898926, "step": 9378, "token_acc": 0.29846186712240974 }, { "epoch": 5.497801231310466, "grad_norm": 0.18967900230634488, "learning_rate": 0.0004339966065546127, "loss": 3.0322606563568115, "step": 9379, "token_acc": 0.2957057353105662 }, { "epoch": 5.498387569627675, "grad_norm": 0.21495167929653738, "learning_rate": 0.00043398020192524523, "loss": 3.0453951358795166, "step": 9380, "token_acc": 0.29316311185136074 }, { "epoch": 5.498973907944884, "grad_norm": 0.1712357227152772, "learning_rate": 0.0004339637955676318, "loss": 2.9697022438049316, "step": 9381, "token_acc": 0.3007912726279548 }, { "epoch": 5.499560246262093, "grad_norm": 0.23446145021797088, "learning_rate": 0.0004339473874819262, "loss": 2.98172664642334, "step": 9382, "token_acc": 0.30057683582863437 }, { "epoch": 5.500146584579302, "grad_norm": 0.2907211520884201, "learning_rate": 0.00043393097766828293, "loss": 2.9900078773498535, "step": 9383, "token_acc": 0.29807310104548207 }, { "epoch": 5.500732922896511, "grad_norm": 0.16959876754183015, "learning_rate": 0.0004339145661268559, "loss": 3.022305488586426, "step": 9384, "token_acc": 0.29637482795922826 }, { "epoch": 5.5013192612137205, "grad_norm": 0.21187735366206484, "learning_rate": 0.0004338981528577994, "loss": 3.000006675720215, "step": 9385, "token_acc": 0.2995254164369933 }, { "epoch": 5.50190559953093, "grad_norm": 0.2443164023925679, "learning_rate": 0.0004338817378612675, "loss": 3.0584471225738525, "step": 9386, "token_acc": 0.28929861903019655 }, { "epoch": 5.502491937848139, "grad_norm": 0.1671842632629723, "learning_rate": 0.0004338653211374145, "loss": 3.0012640953063965, "step": 9387, "token_acc": 0.2984004728538658 }, { "epoch": 5.503078276165347, "grad_norm": 0.19813626163432357, "learning_rate": 0.0004338489026863945, "loss": 2.9920334815979004, "step": 9388, "token_acc": 0.29932451823428746 }, { "epoch": 5.503664614482556, "grad_norm": 0.17687717320635774, "learning_rate": 0.00043383248250836187, "loss": 3.0035901069641113, "step": 9389, "token_acc": 0.2986588751510074 }, { "epoch": 5.504250952799765, "grad_norm": 0.19068261016671093, "learning_rate": 0.00043381606060347076, "loss": 2.984023094177246, "step": 9390, "token_acc": 0.30226055135020585 }, { "epoch": 5.504837291116974, "grad_norm": 0.15992504897434573, "learning_rate": 0.00043379963697187554, "loss": 3.0117697715759277, "step": 9391, "token_acc": 0.2966624106230848 }, { "epoch": 5.505423629434183, "grad_norm": 0.1896594067260591, "learning_rate": 0.00043378321161373026, "loss": 3.0310535430908203, "step": 9392, "token_acc": 0.2949860127035132 }, { "epoch": 5.5060099677513925, "grad_norm": 0.16920094324649537, "learning_rate": 0.0004337667845291894, "loss": 2.99334716796875, "step": 9393, "token_acc": 0.29846604959701756 }, { "epoch": 5.506596306068602, "grad_norm": 0.176388198803083, "learning_rate": 0.00043375035571840726, "loss": 3.0528552532196045, "step": 9394, "token_acc": 0.2911713331620858 }, { "epoch": 5.507182644385811, "grad_norm": 0.17092608085548297, "learning_rate": 0.00043373392518153815, "loss": 2.986557960510254, "step": 9395, "token_acc": 0.30249499227305804 }, { "epoch": 5.50776898270302, "grad_norm": 0.15328530036068685, "learning_rate": 0.0004337174929187364, "loss": 3.0155205726623535, "step": 9396, "token_acc": 0.2970230022916062 }, { "epoch": 5.508355321020229, "grad_norm": 0.18237846686239156, "learning_rate": 0.0004337010589301563, "loss": 2.9875736236572266, "step": 9397, "token_acc": 0.3006323535047714 }, { "epoch": 5.508941659337438, "grad_norm": 0.15841846383228525, "learning_rate": 0.00043368462321595236, "loss": 3.0443122386932373, "step": 9398, "token_acc": 0.2914185151423073 }, { "epoch": 5.509527997654647, "grad_norm": 0.16263065524028364, "learning_rate": 0.00043366818577627886, "loss": 2.9979233741760254, "step": 9399, "token_acc": 0.29885475272351814 }, { "epoch": 5.510114335971855, "grad_norm": 0.1723298723366606, "learning_rate": 0.0004336517466112903, "loss": 2.995983600616455, "step": 9400, "token_acc": 0.2978448076656448 }, { "epoch": 5.5107006742890645, "grad_norm": 0.20187269414754777, "learning_rate": 0.000433635305721141, "loss": 3.0249767303466797, "step": 9401, "token_acc": 0.29541429173865136 }, { "epoch": 5.511287012606274, "grad_norm": 0.18664800798980863, "learning_rate": 0.0004336188631059855, "loss": 3.0449013710021973, "step": 9402, "token_acc": 0.292352200527398 }, { "epoch": 5.511873350923483, "grad_norm": 0.15324617081762024, "learning_rate": 0.00043360241876597817, "loss": 2.972271203994751, "step": 9403, "token_acc": 0.3038447509737675 }, { "epoch": 5.512459689240692, "grad_norm": 0.17140731816783925, "learning_rate": 0.00043358597270127353, "loss": 2.977762222290039, "step": 9404, "token_acc": 0.3025508731418675 }, { "epoch": 5.513046027557901, "grad_norm": 0.18327996559283097, "learning_rate": 0.0004335695249120261, "loss": 2.9809370040893555, "step": 9405, "token_acc": 0.2999408274968501 }, { "epoch": 5.51363236587511, "grad_norm": 0.18974578362194042, "learning_rate": 0.00043355307539839026, "loss": 3.010877847671509, "step": 9406, "token_acc": 0.29699738731304526 }, { "epoch": 5.514218704192319, "grad_norm": 0.1834888309245768, "learning_rate": 0.0004335366241605207, "loss": 2.9924750328063965, "step": 9407, "token_acc": 0.29959800924011043 }, { "epoch": 5.514805042509528, "grad_norm": 0.16288238828971746, "learning_rate": 0.0004335201711985718, "loss": 3.0277392864227295, "step": 9408, "token_acc": 0.2942145762624038 }, { "epoch": 5.515391380826737, "grad_norm": 0.16089403547306336, "learning_rate": 0.00043350371651269825, "loss": 2.9950387477874756, "step": 9409, "token_acc": 0.2991269017109097 }, { "epoch": 5.515977719143946, "grad_norm": 0.15991453230368985, "learning_rate": 0.0004334872601030546, "loss": 2.969026565551758, "step": 9410, "token_acc": 0.3028002726214051 }, { "epoch": 5.516564057461155, "grad_norm": 0.1967749455189207, "learning_rate": 0.0004334708019697953, "loss": 3.0168423652648926, "step": 9411, "token_acc": 0.2964568616264732 }, { "epoch": 5.517150395778364, "grad_norm": 0.2366919285871696, "learning_rate": 0.0004334543421130751, "loss": 3.01121187210083, "step": 9412, "token_acc": 0.2964904170023908 }, { "epoch": 5.517736734095573, "grad_norm": 0.23840548553057841, "learning_rate": 0.00043343788053304855, "loss": 3.015990972518921, "step": 9413, "token_acc": 0.29593804861535716 }, { "epoch": 5.518323072412782, "grad_norm": 0.19128260085810947, "learning_rate": 0.00043342141722987025, "loss": 2.993687152862549, "step": 9414, "token_acc": 0.2999158970085569 }, { "epoch": 5.518909410729991, "grad_norm": 0.1638974745229396, "learning_rate": 0.000433404952203695, "loss": 2.9645180702209473, "step": 9415, "token_acc": 0.30518516606213936 }, { "epoch": 5.5194957490472, "grad_norm": 0.20631894037930643, "learning_rate": 0.0004333884854546773, "loss": 2.9568023681640625, "step": 9416, "token_acc": 0.3049634063737341 }, { "epoch": 5.520082087364409, "grad_norm": 0.19887940177501076, "learning_rate": 0.0004333720169829719, "loss": 3.007230281829834, "step": 9417, "token_acc": 0.29784604566798684 }, { "epoch": 5.5206684256816185, "grad_norm": 0.1830773133928135, "learning_rate": 0.00043335554678873345, "loss": 3.0155134201049805, "step": 9418, "token_acc": 0.2965248363999102 }, { "epoch": 5.521254763998828, "grad_norm": 0.23703950501463483, "learning_rate": 0.00043333907487211684, "loss": 3.0468897819519043, "step": 9419, "token_acc": 0.2931239480551567 }, { "epoch": 5.521841102316037, "grad_norm": 0.20751805438002874, "learning_rate": 0.0004333226012332766, "loss": 3.0036652088165283, "step": 9420, "token_acc": 0.30087034577609334 }, { "epoch": 5.522427440633246, "grad_norm": 0.16104949912946592, "learning_rate": 0.0004333061258723675, "loss": 2.9873478412628174, "step": 9421, "token_acc": 0.29975913415065736 }, { "epoch": 5.523013778950454, "grad_norm": 0.2109539337701507, "learning_rate": 0.00043328964878954445, "loss": 3.0426526069641113, "step": 9422, "token_acc": 0.29446116574185255 }, { "epoch": 5.523600117267663, "grad_norm": 0.16941032704916537, "learning_rate": 0.00043327316998496206, "loss": 3.024261474609375, "step": 9423, "token_acc": 0.2959658970611777 }, { "epoch": 5.524186455584872, "grad_norm": 0.20434604066975953, "learning_rate": 0.00043325668945877526, "loss": 3.0104238986968994, "step": 9424, "token_acc": 0.2969926793337366 }, { "epoch": 5.524772793902081, "grad_norm": 0.1749752595667825, "learning_rate": 0.00043324020721113876, "loss": 2.9500277042388916, "step": 9425, "token_acc": 0.3064660501523786 }, { "epoch": 5.5253591322192905, "grad_norm": 0.16697771530572791, "learning_rate": 0.0004332237232422075, "loss": 3.023108959197998, "step": 9426, "token_acc": 0.29665352218817626 }, { "epoch": 5.5259454705365, "grad_norm": 0.2752882037177016, "learning_rate": 0.0004332072375521362, "loss": 3.0061073303222656, "step": 9427, "token_acc": 0.29803831152612065 }, { "epoch": 5.526531808853709, "grad_norm": 0.25590083139718867, "learning_rate": 0.0004331907501410798, "loss": 3.0086348056793213, "step": 9428, "token_acc": 0.29783920980374856 }, { "epoch": 5.527118147170918, "grad_norm": 0.17319592805934894, "learning_rate": 0.0004331742610091931, "loss": 3.012814521789551, "step": 9429, "token_acc": 0.2979724496921221 }, { "epoch": 5.527704485488127, "grad_norm": 0.21454232799656464, "learning_rate": 0.0004331577701566311, "loss": 3.007486581802368, "step": 9430, "token_acc": 0.29804485831947125 }, { "epoch": 5.528290823805335, "grad_norm": 0.22447794462132634, "learning_rate": 0.0004331412775835486, "loss": 2.9998936653137207, "step": 9431, "token_acc": 0.29816750464800895 }, { "epoch": 5.528877162122544, "grad_norm": 0.2875163596874977, "learning_rate": 0.00043312478329010065, "loss": 3.067638635635376, "step": 9432, "token_acc": 0.2889450543664271 }, { "epoch": 5.529463500439753, "grad_norm": 0.17526870198281402, "learning_rate": 0.00043310828727644214, "loss": 2.993114709854126, "step": 9433, "token_acc": 0.29982795710361665 }, { "epoch": 5.5300498387569625, "grad_norm": 0.1935005606618356, "learning_rate": 0.000433091789542728, "loss": 2.9839954376220703, "step": 9434, "token_acc": 0.3010632694176998 }, { "epoch": 5.530636177074172, "grad_norm": 0.17675171428271097, "learning_rate": 0.00043307529008911315, "loss": 3.0044941902160645, "step": 9435, "token_acc": 0.29715196856185183 }, { "epoch": 5.531222515391381, "grad_norm": 0.17227898136409595, "learning_rate": 0.00043305878891575266, "loss": 3.031464099884033, "step": 9436, "token_acc": 0.29422518696661126 }, { "epoch": 5.53180885370859, "grad_norm": 0.19061003497288673, "learning_rate": 0.0004330422860228016, "loss": 2.971642255783081, "step": 9437, "token_acc": 0.30416964608370045 }, { "epoch": 5.532395192025799, "grad_norm": 0.19336355354909046, "learning_rate": 0.00043302578141041486, "loss": 3.00479793548584, "step": 9438, "token_acc": 0.29789834512331337 }, { "epoch": 5.532981530343008, "grad_norm": 0.19053764720440614, "learning_rate": 0.00043300927507874753, "loss": 3.0219674110412598, "step": 9439, "token_acc": 0.29553795989519616 }, { "epoch": 5.533567868660217, "grad_norm": 0.1795587059848596, "learning_rate": 0.0004329927670279547, "loss": 2.9725608825683594, "step": 9440, "token_acc": 0.30276111887691454 }, { "epoch": 5.534154206977426, "grad_norm": 0.16730239467832667, "learning_rate": 0.0004329762572581914, "loss": 3.0156161785125732, "step": 9441, "token_acc": 0.29544475151665284 }, { "epoch": 5.534740545294635, "grad_norm": 0.2471057373874181, "learning_rate": 0.00043295974576961274, "loss": 3.0116987228393555, "step": 9442, "token_acc": 0.2962087084176546 }, { "epoch": 5.535326883611844, "grad_norm": 0.15518135666905755, "learning_rate": 0.0004329432325623738, "loss": 2.9920949935913086, "step": 9443, "token_acc": 0.2996758339433232 }, { "epoch": 5.535913221929053, "grad_norm": 0.15241851584977692, "learning_rate": 0.0004329267176366297, "loss": 2.9840667247772217, "step": 9444, "token_acc": 0.300709035563145 }, { "epoch": 5.536499560246262, "grad_norm": 0.14709230456075592, "learning_rate": 0.00043291020099253555, "loss": 3.0079855918884277, "step": 9445, "token_acc": 0.29767388925819493 }, { "epoch": 5.537085898563471, "grad_norm": 0.1491350928490702, "learning_rate": 0.00043289368263024655, "loss": 2.9527130126953125, "step": 9446, "token_acc": 0.3067292604368832 }, { "epoch": 5.53767223688068, "grad_norm": 0.15493413931037414, "learning_rate": 0.0004328771625499179, "loss": 3.022693634033203, "step": 9447, "token_acc": 0.2946309304321768 }, { "epoch": 5.538258575197889, "grad_norm": 0.1496983759595075, "learning_rate": 0.0004328606407517047, "loss": 3.0094494819641113, "step": 9448, "token_acc": 0.2970012593845771 }, { "epoch": 5.538844913515098, "grad_norm": 0.16415841876125883, "learning_rate": 0.0004328441172357622, "loss": 2.9764533042907715, "step": 9449, "token_acc": 0.3007958346546098 }, { "epoch": 5.5394312518323074, "grad_norm": 0.15710987994295583, "learning_rate": 0.00043282759200224556, "loss": 2.9747698307037354, "step": 9450, "token_acc": 0.3032096196329081 }, { "epoch": 5.540017590149517, "grad_norm": 0.1577542351239319, "learning_rate": 0.0004328110650513101, "loss": 3.002152681350708, "step": 9451, "token_acc": 0.29886545466241043 }, { "epoch": 5.540603928466726, "grad_norm": 0.15485076271625242, "learning_rate": 0.00043279453638311107, "loss": 3.0010223388671875, "step": 9452, "token_acc": 0.2992914516814983 }, { "epoch": 5.541190266783934, "grad_norm": 0.16379006894422668, "learning_rate": 0.00043277800599780364, "loss": 3.0720930099487305, "step": 9453, "token_acc": 0.2888835302628406 }, { "epoch": 5.541776605101143, "grad_norm": 0.15439909680552336, "learning_rate": 0.0004327614738955431, "loss": 2.9842398166656494, "step": 9454, "token_acc": 0.3001962416905164 }, { "epoch": 5.542362943418352, "grad_norm": 0.15270878349037145, "learning_rate": 0.0004327449400764849, "loss": 3.042994976043701, "step": 9455, "token_acc": 0.2923127819233011 }, { "epoch": 5.542949281735561, "grad_norm": 0.16045093121590084, "learning_rate": 0.0004327284045407841, "loss": 3.035616636276245, "step": 9456, "token_acc": 0.2930281401939159 }, { "epoch": 5.54353562005277, "grad_norm": 0.15264748646531065, "learning_rate": 0.00043271186728859626, "loss": 3.0118813514709473, "step": 9457, "token_acc": 0.29589908205811954 }, { "epoch": 5.5441219583699795, "grad_norm": 0.17808934519793188, "learning_rate": 0.0004326953283200766, "loss": 3.0128226280212402, "step": 9458, "token_acc": 0.2940254399926992 }, { "epoch": 5.544708296687189, "grad_norm": 0.17305300894296483, "learning_rate": 0.00043267878763538056, "loss": 2.9983043670654297, "step": 9459, "token_acc": 0.299401134884744 }, { "epoch": 5.545294635004398, "grad_norm": 0.20042670898163922, "learning_rate": 0.00043266224523466347, "loss": 2.977257251739502, "step": 9460, "token_acc": 0.3009390616705413 }, { "epoch": 5.545880973321607, "grad_norm": 0.18409919883494427, "learning_rate": 0.00043264570111808064, "loss": 3.039703369140625, "step": 9461, "token_acc": 0.2931015674264778 }, { "epoch": 5.546467311638816, "grad_norm": 0.16439806752008956, "learning_rate": 0.00043262915528578767, "loss": 3.0225369930267334, "step": 9462, "token_acc": 0.29539860576643145 }, { "epoch": 5.547053649956025, "grad_norm": 0.17286503420805704, "learning_rate": 0.0004326126077379398, "loss": 2.992945909500122, "step": 9463, "token_acc": 0.300638338290669 }, { "epoch": 5.547639988273234, "grad_norm": 0.173935008201628, "learning_rate": 0.00043259605847469263, "loss": 3.0009875297546387, "step": 9464, "token_acc": 0.29795214786851 }, { "epoch": 5.548226326590442, "grad_norm": 0.16709085431629267, "learning_rate": 0.00043257950749620147, "loss": 2.9774985313415527, "step": 9465, "token_acc": 0.3012531074019583 }, { "epoch": 5.5488126649076515, "grad_norm": 0.1934510195318812, "learning_rate": 0.00043256295480262195, "loss": 3.0060086250305176, "step": 9466, "token_acc": 0.3000656243968346 }, { "epoch": 5.549399003224861, "grad_norm": 0.2728313701104438, "learning_rate": 0.00043254640039410943, "loss": 2.9284114837646484, "step": 9467, "token_acc": 0.30796650593664127 }, { "epoch": 5.54998534154207, "grad_norm": 0.3647029398591561, "learning_rate": 0.00043252984427081945, "loss": 3.061497688293457, "step": 9468, "token_acc": 0.2908314452312613 }, { "epoch": 5.550571679859279, "grad_norm": 0.2398856113506125, "learning_rate": 0.0004325132864329075, "loss": 3.0241472721099854, "step": 9469, "token_acc": 0.2960483742084566 }, { "epoch": 5.551158018176488, "grad_norm": 0.2214949390067725, "learning_rate": 0.0004324967268805292, "loss": 2.998025417327881, "step": 9470, "token_acc": 0.2997026147754056 }, { "epoch": 5.551744356493697, "grad_norm": 0.22839354474485768, "learning_rate": 0.00043248016561384014, "loss": 3.0145392417907715, "step": 9471, "token_acc": 0.29481959292381493 }, { "epoch": 5.552330694810906, "grad_norm": 0.20551582814020553, "learning_rate": 0.00043246360263299576, "loss": 3.010359287261963, "step": 9472, "token_acc": 0.2984882462006645 }, { "epoch": 5.552917033128115, "grad_norm": 0.18292084583042478, "learning_rate": 0.00043244703793815167, "loss": 2.997507095336914, "step": 9473, "token_acc": 0.29935713344750364 }, { "epoch": 5.5535033714453235, "grad_norm": 0.19812503568566023, "learning_rate": 0.00043243047152946356, "loss": 2.9943928718566895, "step": 9474, "token_acc": 0.30122276785483765 }, { "epoch": 5.554089709762533, "grad_norm": 0.15795928045482333, "learning_rate": 0.00043241390340708697, "loss": 3.0328638553619385, "step": 9475, "token_acc": 0.29366938488782995 }, { "epoch": 5.554676048079742, "grad_norm": 0.213239481451516, "learning_rate": 0.0004323973335711776, "loss": 3.0118188858032227, "step": 9476, "token_acc": 0.29764269176030755 }, { "epoch": 5.555262386396951, "grad_norm": 0.15263667282382087, "learning_rate": 0.000432380762021891, "loss": 3.031569719314575, "step": 9477, "token_acc": 0.2929050498869861 }, { "epoch": 5.55584872471416, "grad_norm": 0.20638453816711766, "learning_rate": 0.000432364188759383, "loss": 3.0327658653259277, "step": 9478, "token_acc": 0.2926560625491251 }, { "epoch": 5.556435063031369, "grad_norm": 0.16573847023168606, "learning_rate": 0.0004323476137838092, "loss": 2.987891912460327, "step": 9479, "token_acc": 0.30018902504385747 }, { "epoch": 5.557021401348578, "grad_norm": 0.22255895005456156, "learning_rate": 0.0004323310370953252, "loss": 2.985628843307495, "step": 9480, "token_acc": 0.3011409692219205 }, { "epoch": 5.557607739665787, "grad_norm": 0.17078694536071845, "learning_rate": 0.0004323144586940868, "loss": 3.0173001289367676, "step": 9481, "token_acc": 0.2958549032758368 }, { "epoch": 5.558194077982996, "grad_norm": 0.22715437180514036, "learning_rate": 0.00043229787858024973, "loss": 3.02471661567688, "step": 9482, "token_acc": 0.2932128228384664 }, { "epoch": 5.5587804163002055, "grad_norm": 0.2020690966866365, "learning_rate": 0.0004322812967539698, "loss": 3.0398335456848145, "step": 9483, "token_acc": 0.29275114619076703 }, { "epoch": 5.559366754617415, "grad_norm": 0.3352401803029688, "learning_rate": 0.0004322647132154026, "loss": 3.02543306350708, "step": 9484, "token_acc": 0.2953252669850706 }, { "epoch": 5.559953092934624, "grad_norm": 0.19594454581560863, "learning_rate": 0.00043224812796470414, "loss": 3.0144078731536865, "step": 9485, "token_acc": 0.29517604160439953 }, { "epoch": 5.560539431251832, "grad_norm": 0.23418751638448665, "learning_rate": 0.00043223154100203, "loss": 2.95459246635437, "step": 9486, "token_acc": 0.3076315339358776 }, { "epoch": 5.561125769569041, "grad_norm": 0.20373948204869285, "learning_rate": 0.00043221495232753616, "loss": 2.9931588172912598, "step": 9487, "token_acc": 0.29906892208599717 }, { "epoch": 5.56171210788625, "grad_norm": 0.24754027517796798, "learning_rate": 0.0004321983619413784, "loss": 3.020735263824463, "step": 9488, "token_acc": 0.295491859077549 }, { "epoch": 5.562298446203459, "grad_norm": 0.20078553734281449, "learning_rate": 0.0004321817698437125, "loss": 3.0241341590881348, "step": 9489, "token_acc": 0.2963542859418717 }, { "epoch": 5.562884784520668, "grad_norm": 0.20940765083540744, "learning_rate": 0.0004321651760346944, "loss": 2.9976284503936768, "step": 9490, "token_acc": 0.2978556836902801 }, { "epoch": 5.5634711228378775, "grad_norm": 0.17133403241427358, "learning_rate": 0.0004321485805144799, "loss": 2.993675708770752, "step": 9491, "token_acc": 0.2983808830625918 }, { "epoch": 5.564057461155087, "grad_norm": 0.20110559372543427, "learning_rate": 0.000432131983283225, "loss": 2.9991750717163086, "step": 9492, "token_acc": 0.29702934750274396 }, { "epoch": 5.564643799472296, "grad_norm": 0.18754257217321788, "learning_rate": 0.0004321153843410855, "loss": 3.0179224014282227, "step": 9493, "token_acc": 0.2964063873541159 }, { "epoch": 5.565230137789505, "grad_norm": 0.203897837846292, "learning_rate": 0.0004320987836882173, "loss": 2.994328022003174, "step": 9494, "token_acc": 0.2983112743381653 }, { "epoch": 5.565816476106714, "grad_norm": 0.16036465056483465, "learning_rate": 0.0004320821813247765, "loss": 3.0221657752990723, "step": 9495, "token_acc": 0.29479546516159166 }, { "epoch": 5.566402814423922, "grad_norm": 0.2012359393730973, "learning_rate": 0.0004320655772509189, "loss": 2.99320125579834, "step": 9496, "token_acc": 0.30027345757413576 }, { "epoch": 5.566989152741131, "grad_norm": 0.183975998874183, "learning_rate": 0.00043204897146680065, "loss": 3.008193016052246, "step": 9497, "token_acc": 0.2992542140207764 }, { "epoch": 5.56757549105834, "grad_norm": 0.1972059160150747, "learning_rate": 0.0004320323639725775, "loss": 3.022758722305298, "step": 9498, "token_acc": 0.2956468972565552 }, { "epoch": 5.5681618293755495, "grad_norm": 0.17278827872451347, "learning_rate": 0.0004320157547684057, "loss": 2.9845824241638184, "step": 9499, "token_acc": 0.3009983062156752 }, { "epoch": 5.568748167692759, "grad_norm": 0.21005859958466666, "learning_rate": 0.0004319991438544411, "loss": 3.0067405700683594, "step": 9500, "token_acc": 0.2977675209897229 }, { "epoch": 5.569334506009968, "grad_norm": 0.1743347513588534, "learning_rate": 0.0004319825312308397, "loss": 2.9743270874023438, "step": 9501, "token_acc": 0.3026966224810234 }, { "epoch": 5.569920844327177, "grad_norm": 0.16531455874855314, "learning_rate": 0.0004319659168977578, "loss": 2.9722275733947754, "step": 9502, "token_acc": 0.30333792652820335 }, { "epoch": 5.570507182644386, "grad_norm": 0.16879334573894708, "learning_rate": 0.0004319493008553512, "loss": 3.0240373611450195, "step": 9503, "token_acc": 0.29491075686700285 }, { "epoch": 5.571093520961595, "grad_norm": 0.1635008896473854, "learning_rate": 0.0004319326831037762, "loss": 3.0178942680358887, "step": 9504, "token_acc": 0.2967016507241287 }, { "epoch": 5.571679859278804, "grad_norm": 0.18688984143424864, "learning_rate": 0.0004319160636431887, "loss": 3.0153112411499023, "step": 9505, "token_acc": 0.2972402466417318 }, { "epoch": 5.572266197596013, "grad_norm": 0.182453126088087, "learning_rate": 0.00043189944247374495, "loss": 3.02286434173584, "step": 9506, "token_acc": 0.29398997723043013 }, { "epoch": 5.572852535913222, "grad_norm": 0.19833881459217165, "learning_rate": 0.00043188281959560105, "loss": 2.9843196868896484, "step": 9507, "token_acc": 0.29977302452454746 }, { "epoch": 5.573438874230431, "grad_norm": 0.17581081450183797, "learning_rate": 0.00043186619500891314, "loss": 2.997870683670044, "step": 9508, "token_acc": 0.29883769449394176 }, { "epoch": 5.57402521254764, "grad_norm": 0.2039416403457468, "learning_rate": 0.00043184956871383746, "loss": 3.0138344764709473, "step": 9509, "token_acc": 0.2966442987061073 }, { "epoch": 5.574611550864849, "grad_norm": 0.2072866115869426, "learning_rate": 0.0004318329407105301, "loss": 2.9914472103118896, "step": 9510, "token_acc": 0.3007728229921276 }, { "epoch": 5.575197889182058, "grad_norm": 0.15104159368363976, "learning_rate": 0.0004318163109991472, "loss": 3.0027198791503906, "step": 9511, "token_acc": 0.29881884795386016 }, { "epoch": 5.575784227499267, "grad_norm": 0.1806939022655288, "learning_rate": 0.00043179967957984516, "loss": 2.9996519088745117, "step": 9512, "token_acc": 0.2988821421674856 }, { "epoch": 5.576370565816476, "grad_norm": 0.16043262903584404, "learning_rate": 0.00043178304645278003, "loss": 2.9799346923828125, "step": 9513, "token_acc": 0.3019449116904962 }, { "epoch": 5.576956904133685, "grad_norm": 0.1737721818800752, "learning_rate": 0.00043176641161810824, "loss": 3.014482259750366, "step": 9514, "token_acc": 0.2968615113340873 }, { "epoch": 5.577543242450894, "grad_norm": 0.18926054639918807, "learning_rate": 0.0004317497750759859, "loss": 2.9841599464416504, "step": 9515, "token_acc": 0.30318275400748224 }, { "epoch": 5.5781295807681035, "grad_norm": 0.16322166011277983, "learning_rate": 0.0004317331368265693, "loss": 3.021703004837036, "step": 9516, "token_acc": 0.29446560026867596 }, { "epoch": 5.578715919085313, "grad_norm": 0.17428426544483375, "learning_rate": 0.0004317164968700148, "loss": 3.018117904663086, "step": 9517, "token_acc": 0.2965354941366815 }, { "epoch": 5.579302257402521, "grad_norm": 0.16268074246702305, "learning_rate": 0.00043169985520647857, "loss": 3.01544189453125, "step": 9518, "token_acc": 0.294755603733564 }, { "epoch": 5.57988859571973, "grad_norm": 0.17943860147494728, "learning_rate": 0.0004316832118361172, "loss": 3.0336971282958984, "step": 9519, "token_acc": 0.2941748835898242 }, { "epoch": 5.580474934036939, "grad_norm": 0.18477428241520144, "learning_rate": 0.0004316665667590868, "loss": 3.0227746963500977, "step": 9520, "token_acc": 0.29475153869704296 }, { "epoch": 5.581061272354148, "grad_norm": 0.2088075111321705, "learning_rate": 0.0004316499199755438, "loss": 2.984206199645996, "step": 9521, "token_acc": 0.3017601455791507 }, { "epoch": 5.581647610671357, "grad_norm": 0.21254027026259575, "learning_rate": 0.00043163327148564457, "loss": 3.0296449661254883, "step": 9522, "token_acc": 0.29570377416251203 }, { "epoch": 5.582233948988566, "grad_norm": 0.19722027035943757, "learning_rate": 0.00043161662128954554, "loss": 3.0179333686828613, "step": 9523, "token_acc": 0.2953351197804762 }, { "epoch": 5.5828202873057755, "grad_norm": 0.22325342548140517, "learning_rate": 0.0004315999693874031, "loss": 2.9934096336364746, "step": 9524, "token_acc": 0.2991760144536382 }, { "epoch": 5.583406625622985, "grad_norm": 0.19130693250430392, "learning_rate": 0.0004315833157793736, "loss": 3.0450313091278076, "step": 9525, "token_acc": 0.292958554193621 }, { "epoch": 5.583992963940194, "grad_norm": 0.23290702776374, "learning_rate": 0.0004315666604656136, "loss": 2.958040475845337, "step": 9526, "token_acc": 0.3034703592860798 }, { "epoch": 5.584579302257403, "grad_norm": 0.23654086113241085, "learning_rate": 0.0004315500034462795, "loss": 2.962702751159668, "step": 9527, "token_acc": 0.3022096528417662 }, { "epoch": 5.585165640574612, "grad_norm": 0.18405221620687423, "learning_rate": 0.00043153334472152764, "loss": 2.993170976638794, "step": 9528, "token_acc": 0.2983177491520436 }, { "epoch": 5.585751978891821, "grad_norm": 0.2212657501567586, "learning_rate": 0.00043151668429151473, "loss": 3.004142999649048, "step": 9529, "token_acc": 0.2971003345266435 }, { "epoch": 5.586338317209029, "grad_norm": 0.20256211746017752, "learning_rate": 0.0004315000221563972, "loss": 3.0117411613464355, "step": 9530, "token_acc": 0.2984054610557037 }, { "epoch": 5.586924655526238, "grad_norm": 0.18909968891366608, "learning_rate": 0.00043148335831633144, "loss": 2.9673969745635986, "step": 9531, "token_acc": 0.30217277433137507 }, { "epoch": 5.5875109938434475, "grad_norm": 0.19822619812522504, "learning_rate": 0.0004314666927714741, "loss": 2.9493350982666016, "step": 9532, "token_acc": 0.306335849681107 }, { "epoch": 5.588097332160657, "grad_norm": 0.1653030354919907, "learning_rate": 0.0004314500255219817, "loss": 3.0565433502197266, "step": 9533, "token_acc": 0.2892925306085353 }, { "epoch": 5.588683670477866, "grad_norm": 0.169206849004507, "learning_rate": 0.00043143335656801085, "loss": 2.97257661819458, "step": 9534, "token_acc": 0.3024045618031257 }, { "epoch": 5.589270008795075, "grad_norm": 0.18463700508008768, "learning_rate": 0.0004314166859097181, "loss": 3.0033555030822754, "step": 9535, "token_acc": 0.2975013070756361 }, { "epoch": 5.589856347112284, "grad_norm": 0.19389542722141503, "learning_rate": 0.00043140001354726007, "loss": 3.017164468765259, "step": 9536, "token_acc": 0.29687775073784495 }, { "epoch": 5.590442685429493, "grad_norm": 0.18970568826400527, "learning_rate": 0.00043138333948079333, "loss": 2.9916439056396484, "step": 9537, "token_acc": 0.2996392563130145 }, { "epoch": 5.591029023746702, "grad_norm": 0.20000789500609364, "learning_rate": 0.0004313666637104745, "loss": 3.020634889602661, "step": 9538, "token_acc": 0.2954950942647031 }, { "epoch": 5.59161536206391, "grad_norm": 0.1613265693315138, "learning_rate": 0.00043134998623646026, "loss": 3.0108795166015625, "step": 9539, "token_acc": 0.2994221399311984 }, { "epoch": 5.5922017003811195, "grad_norm": 0.18126871128087596, "learning_rate": 0.00043133330705890727, "loss": 2.9905154705047607, "step": 9540, "token_acc": 0.30001103851390065 }, { "epoch": 5.592788038698329, "grad_norm": 0.20689412250807387, "learning_rate": 0.00043131662617797227, "loss": 3.0328121185302734, "step": 9541, "token_acc": 0.2934050294977063 }, { "epoch": 5.593374377015538, "grad_norm": 0.16438513533665722, "learning_rate": 0.00043129994359381187, "loss": 3.0185208320617676, "step": 9542, "token_acc": 0.2951699735382681 }, { "epoch": 5.593960715332747, "grad_norm": 0.18867560935661773, "learning_rate": 0.0004312832593065828, "loss": 2.99249267578125, "step": 9543, "token_acc": 0.3017088950043104 }, { "epoch": 5.594547053649956, "grad_norm": 0.1863607718558092, "learning_rate": 0.0004312665733164418, "loss": 3.016849994659424, "step": 9544, "token_acc": 0.2948993393396419 }, { "epoch": 5.595133391967165, "grad_norm": 0.16932417741866526, "learning_rate": 0.00043124988562354556, "loss": 2.951427936553955, "step": 9545, "token_acc": 0.3044230687561048 }, { "epoch": 5.595719730284374, "grad_norm": 0.19064838047441154, "learning_rate": 0.0004312331962280509, "loss": 2.9883534908294678, "step": 9546, "token_acc": 0.29957602518282583 }, { "epoch": 5.596306068601583, "grad_norm": 0.194961639619161, "learning_rate": 0.0004312165051301146, "loss": 2.997401714324951, "step": 9547, "token_acc": 0.29868872890232423 }, { "epoch": 5.596892406918792, "grad_norm": 0.17465762650883984, "learning_rate": 0.00043119981232989346, "loss": 2.982698678970337, "step": 9548, "token_acc": 0.3013779460084126 }, { "epoch": 5.5974787452360015, "grad_norm": 0.1835960069757496, "learning_rate": 0.0004311831178275442, "loss": 3.0160086154937744, "step": 9549, "token_acc": 0.29681308854804495 }, { "epoch": 5.598065083553211, "grad_norm": 0.18046356377062456, "learning_rate": 0.00043116642162322366, "loss": 3.0057034492492676, "step": 9550, "token_acc": 0.2979149837679657 }, { "epoch": 5.598651421870419, "grad_norm": 0.18743938937309376, "learning_rate": 0.00043114972371708884, "loss": 2.997492790222168, "step": 9551, "token_acc": 0.3004006487717376 }, { "epoch": 5.599237760187628, "grad_norm": 0.18578818663756563, "learning_rate": 0.0004311330241092963, "loss": 3.0247702598571777, "step": 9552, "token_acc": 0.2943992919680048 }, { "epoch": 5.599824098504837, "grad_norm": 0.2146490150787229, "learning_rate": 0.00043111632280000325, "loss": 3.0167722702026367, "step": 9553, "token_acc": 0.2948207914433943 }, { "epoch": 5.600410436822046, "grad_norm": 0.20455195149780603, "learning_rate": 0.0004310996197893663, "loss": 2.997222661972046, "step": 9554, "token_acc": 0.30008608046929797 }, { "epoch": 5.600996775139255, "grad_norm": 0.17392518613616814, "learning_rate": 0.00043108291507754244, "loss": 3.0305111408233643, "step": 9555, "token_acc": 0.29682078090082736 }, { "epoch": 5.601583113456464, "grad_norm": 0.18573324796172533, "learning_rate": 0.00043106620866468863, "loss": 3.0287609100341797, "step": 9556, "token_acc": 0.29382080915880404 }, { "epoch": 5.6021694517736735, "grad_norm": 0.14933753101867098, "learning_rate": 0.0004310495005509618, "loss": 3.010798454284668, "step": 9557, "token_acc": 0.2962245961235133 }, { "epoch": 5.602755790090883, "grad_norm": 0.1774086041438887, "learning_rate": 0.00043103279073651894, "loss": 2.989170789718628, "step": 9558, "token_acc": 0.30033399291682944 }, { "epoch": 5.603342128408092, "grad_norm": 0.19124858814237647, "learning_rate": 0.0004310160792215168, "loss": 3.0522878170013428, "step": 9559, "token_acc": 0.2932727879799666 }, { "epoch": 5.603928466725301, "grad_norm": 0.17971728779723478, "learning_rate": 0.0004309993660061126, "loss": 2.9710168838500977, "step": 9560, "token_acc": 0.30304193919173217 }, { "epoch": 5.604514805042509, "grad_norm": 0.22346838349322226, "learning_rate": 0.0004309826510904633, "loss": 2.9577934741973877, "step": 9561, "token_acc": 0.3044612445827372 }, { "epoch": 5.605101143359718, "grad_norm": 0.30844898390450953, "learning_rate": 0.00043096593447472574, "loss": 3.0060253143310547, "step": 9562, "token_acc": 0.2979023328704518 }, { "epoch": 5.605687481676927, "grad_norm": 0.30398529653323336, "learning_rate": 0.00043094921615905726, "loss": 3.0262985229492188, "step": 9563, "token_acc": 0.2950060010650897 }, { "epoch": 5.606273819994136, "grad_norm": 0.18426877913750847, "learning_rate": 0.0004309324961436146, "loss": 3.0328433513641357, "step": 9564, "token_acc": 0.293977275948532 }, { "epoch": 5.6068601583113455, "grad_norm": 0.1906612566560988, "learning_rate": 0.000430915774428555, "loss": 3.029179573059082, "step": 9565, "token_acc": 0.29620073820801573 }, { "epoch": 5.607446496628555, "grad_norm": 0.1656020304966405, "learning_rate": 0.0004308990510140355, "loss": 3.026366949081421, "step": 9566, "token_acc": 0.2949843415756632 }, { "epoch": 5.608032834945764, "grad_norm": 0.2119433880332245, "learning_rate": 0.00043088232590021307, "loss": 3.0275793075561523, "step": 9567, "token_acc": 0.29360575993646515 }, { "epoch": 5.608619173262973, "grad_norm": 0.1981877850757136, "learning_rate": 0.0004308655990872451, "loss": 2.9862585067749023, "step": 9568, "token_acc": 0.3010810853393064 }, { "epoch": 5.609205511580182, "grad_norm": 0.16796205263474484, "learning_rate": 0.0004308488705752884, "loss": 3.0349173545837402, "step": 9569, "token_acc": 0.29396607257891344 }, { "epoch": 5.609791849897391, "grad_norm": 0.19379537811112035, "learning_rate": 0.0004308321403645004, "loss": 2.985604763031006, "step": 9570, "token_acc": 0.30119517341487384 }, { "epoch": 5.6103781882146, "grad_norm": 0.16759587131431963, "learning_rate": 0.000430815408455038, "loss": 2.9972641468048096, "step": 9571, "token_acc": 0.29898307583363826 }, { "epoch": 5.610964526531809, "grad_norm": 0.20723113338273044, "learning_rate": 0.0004307986748470585, "loss": 3.0022172927856445, "step": 9572, "token_acc": 0.29989593122965746 }, { "epoch": 5.6115508648490176, "grad_norm": 0.15891757816208982, "learning_rate": 0.0004307819395407191, "loss": 2.9610557556152344, "step": 9573, "token_acc": 0.3049252963749291 }, { "epoch": 5.612137203166227, "grad_norm": 0.20498814126873524, "learning_rate": 0.0004307652025361769, "loss": 3.014742374420166, "step": 9574, "token_acc": 0.29650920581353607 }, { "epoch": 5.612723541483436, "grad_norm": 0.16549235440141769, "learning_rate": 0.0004307484638335893, "loss": 3.035586357116699, "step": 9575, "token_acc": 0.2927976931646897 }, { "epoch": 5.613309879800645, "grad_norm": 0.2063805413379132, "learning_rate": 0.0004307317234331135, "loss": 2.9846510887145996, "step": 9576, "token_acc": 0.3018079505978217 }, { "epoch": 5.613896218117854, "grad_norm": 0.18337881699184097, "learning_rate": 0.00043071498133490663, "loss": 3.0001320838928223, "step": 9577, "token_acc": 0.29861320510528383 }, { "epoch": 5.614482556435063, "grad_norm": 0.179091746629479, "learning_rate": 0.00043069823753912604, "loss": 3.0667128562927246, "step": 9578, "token_acc": 0.2906930547187236 }, { "epoch": 5.615068894752272, "grad_norm": 0.17443620105808696, "learning_rate": 0.00043068149204592894, "loss": 2.9557714462280273, "step": 9579, "token_acc": 0.30458732948231976 }, { "epoch": 5.615655233069481, "grad_norm": 0.18183156962717031, "learning_rate": 0.0004306647448554728, "loss": 2.9554553031921387, "step": 9580, "token_acc": 0.30323421592702593 }, { "epoch": 5.6162415713866904, "grad_norm": 0.17971486281632484, "learning_rate": 0.00043064799596791473, "loss": 3.0134754180908203, "step": 9581, "token_acc": 0.2953587800398216 }, { "epoch": 5.616827909703899, "grad_norm": 0.1680766872328503, "learning_rate": 0.0004306312453834122, "loss": 3.0314207077026367, "step": 9582, "token_acc": 0.2931591171844893 }, { "epoch": 5.617414248021108, "grad_norm": 0.1791789724021555, "learning_rate": 0.0004306144931021225, "loss": 3.0352745056152344, "step": 9583, "token_acc": 0.29327257801377904 }, { "epoch": 5.618000586338317, "grad_norm": 0.16565237767824229, "learning_rate": 0.0004305977391242031, "loss": 2.9725794792175293, "step": 9584, "token_acc": 0.30460619709891235 }, { "epoch": 5.618586924655526, "grad_norm": 0.20220192773069864, "learning_rate": 0.00043058098344981124, "loss": 3.034191608428955, "step": 9585, "token_acc": 0.29381273439584066 }, { "epoch": 5.619173262972735, "grad_norm": 0.3133331869981549, "learning_rate": 0.0004305642260791044, "loss": 3.021151542663574, "step": 9586, "token_acc": 0.2951352677749831 }, { "epoch": 5.619759601289944, "grad_norm": 0.29715495114804735, "learning_rate": 0.0004305474670122399, "loss": 2.986931324005127, "step": 9587, "token_acc": 0.30382221083642885 }, { "epoch": 5.620345939607153, "grad_norm": 0.17416711064100368, "learning_rate": 0.0004305307062493754, "loss": 2.989133596420288, "step": 9588, "token_acc": 0.3011156075977321 }, { "epoch": 5.6209322779243625, "grad_norm": 0.2666930558579081, "learning_rate": 0.000430513943790668, "loss": 2.996760845184326, "step": 9589, "token_acc": 0.298364182550406 }, { "epoch": 5.621518616241572, "grad_norm": 0.17759517420041773, "learning_rate": 0.0004304971796362754, "loss": 2.977205514907837, "step": 9590, "token_acc": 0.2996640492694385 }, { "epoch": 5.622104954558781, "grad_norm": 0.2029505751181237, "learning_rate": 0.00043048041378635507, "loss": 3.0145092010498047, "step": 9591, "token_acc": 0.296279004496068 }, { "epoch": 5.62269129287599, "grad_norm": 0.2071340865748021, "learning_rate": 0.00043046364624106437, "loss": 3.01771879196167, "step": 9592, "token_acc": 0.2958692264263358 }, { "epoch": 5.623277631193199, "grad_norm": 0.18015656623955906, "learning_rate": 0.000430446877000561, "loss": 3.024195432662964, "step": 9593, "token_acc": 0.29632518480438214 }, { "epoch": 5.623863969510407, "grad_norm": 0.20289019911959003, "learning_rate": 0.0004304301060650023, "loss": 2.9754247665405273, "step": 9594, "token_acc": 0.30220750493256393 }, { "epoch": 5.624450307827616, "grad_norm": 0.1774965744770958, "learning_rate": 0.0004304133334345459, "loss": 2.9952664375305176, "step": 9595, "token_acc": 0.2995590168748132 }, { "epoch": 5.625036646144825, "grad_norm": 0.22040994260841465, "learning_rate": 0.00043039655910934936, "loss": 3.048928737640381, "step": 9596, "token_acc": 0.2933570798269591 }, { "epoch": 5.6256229844620345, "grad_norm": 0.1619246695524503, "learning_rate": 0.0004303797830895703, "loss": 3.059825897216797, "step": 9597, "token_acc": 0.2900392779910852 }, { "epoch": 5.626209322779244, "grad_norm": 0.18173730652951914, "learning_rate": 0.0004303630053753661, "loss": 2.9628822803497314, "step": 9598, "token_acc": 0.30443590526874287 }, { "epoch": 5.626795661096453, "grad_norm": 0.17676495272694956, "learning_rate": 0.0004303462259668945, "loss": 3.0075762271881104, "step": 9599, "token_acc": 0.29794336611578104 }, { "epoch": 5.627381999413662, "grad_norm": 0.21949285664824922, "learning_rate": 0.0004303294448643133, "loss": 3.0106120109558105, "step": 9600, "token_acc": 0.29631546615430865 }, { "epoch": 5.627968337730871, "grad_norm": 0.14889499934497757, "learning_rate": 0.00043031266206777985, "loss": 2.9857020378112793, "step": 9601, "token_acc": 0.3005728918015711 }, { "epoch": 5.62855467604808, "grad_norm": 0.1914237252271556, "learning_rate": 0.0004302958775774519, "loss": 3.0286338329315186, "step": 9602, "token_acc": 0.29449090091068464 }, { "epoch": 5.629141014365289, "grad_norm": 0.1514533817159106, "learning_rate": 0.0004302790913934872, "loss": 2.979556083679199, "step": 9603, "token_acc": 0.3026030864745488 }, { "epoch": 5.629727352682497, "grad_norm": 0.23294354768241712, "learning_rate": 0.0004302623035160433, "loss": 3.0235707759857178, "step": 9604, "token_acc": 0.29476490272814926 }, { "epoch": 5.6303136909997065, "grad_norm": 0.1660070302903849, "learning_rate": 0.000430245513945278, "loss": 3.0098559856414795, "step": 9605, "token_acc": 0.2977769563839899 }, { "epoch": 5.630900029316916, "grad_norm": 0.1904031362317103, "learning_rate": 0.00043022872268134896, "loss": 2.9822745323181152, "step": 9606, "token_acc": 0.2994259573449356 }, { "epoch": 5.631486367634125, "grad_norm": 0.18577592173023713, "learning_rate": 0.00043021192972441394, "loss": 3.000027656555176, "step": 9607, "token_acc": 0.2992741347725885 }, { "epoch": 5.632072705951334, "grad_norm": 0.16806913403330592, "learning_rate": 0.0004301951350746307, "loss": 2.967625379562378, "step": 9608, "token_acc": 0.30256362757933575 }, { "epoch": 5.632659044268543, "grad_norm": 0.20525597502535287, "learning_rate": 0.00043017833873215693, "loss": 3.0037293434143066, "step": 9609, "token_acc": 0.29628084426192425 }, { "epoch": 5.633245382585752, "grad_norm": 0.17102531424477008, "learning_rate": 0.0004301615406971505, "loss": 3.0159807205200195, "step": 9610, "token_acc": 0.29815839598465593 }, { "epoch": 5.633831720902961, "grad_norm": 0.1632568933900903, "learning_rate": 0.0004301447409697692, "loss": 2.9904332160949707, "step": 9611, "token_acc": 0.298852141454758 }, { "epoch": 5.63441805922017, "grad_norm": 0.16258987689427026, "learning_rate": 0.0004301279395501707, "loss": 3.0202741622924805, "step": 9612, "token_acc": 0.2958002543746282 }, { "epoch": 5.635004397537379, "grad_norm": 0.16705275273409906, "learning_rate": 0.000430111136438513, "loss": 3.025203227996826, "step": 9613, "token_acc": 0.2960612980830362 }, { "epoch": 5.6355907358545885, "grad_norm": 0.17253200213145323, "learning_rate": 0.00043009433163495393, "loss": 3.0537548065185547, "step": 9614, "token_acc": 0.29039515048614517 }, { "epoch": 5.636177074171798, "grad_norm": 0.15450757489085828, "learning_rate": 0.0004300775251396513, "loss": 3.0372791290283203, "step": 9615, "token_acc": 0.2938734758374541 }, { "epoch": 5.636763412489006, "grad_norm": 0.1564287493835027, "learning_rate": 0.00043006071695276285, "loss": 3.049679756164551, "step": 9616, "token_acc": 0.291018561028399 }, { "epoch": 5.637349750806215, "grad_norm": 0.164010178808354, "learning_rate": 0.0004300439070744466, "loss": 3.0052952766418457, "step": 9617, "token_acc": 0.2977842811597144 }, { "epoch": 5.637936089123424, "grad_norm": 0.18052484177361303, "learning_rate": 0.0004300270955048605, "loss": 3.0111279487609863, "step": 9618, "token_acc": 0.2971457198982215 }, { "epoch": 5.638522427440633, "grad_norm": 0.19196036412766956, "learning_rate": 0.0004300102822441625, "loss": 3.0343875885009766, "step": 9619, "token_acc": 0.29424578446723976 }, { "epoch": 5.639108765757842, "grad_norm": 0.17243112602710092, "learning_rate": 0.00042999346729251045, "loss": 2.997490882873535, "step": 9620, "token_acc": 0.2989858074102722 }, { "epoch": 5.639695104075051, "grad_norm": 0.20113933334743675, "learning_rate": 0.00042997665065006225, "loss": 3.047173261642456, "step": 9621, "token_acc": 0.29113161188632886 }, { "epoch": 5.6402814423922605, "grad_norm": 0.2585591125496731, "learning_rate": 0.000429959832316976, "loss": 2.9997150897979736, "step": 9622, "token_acc": 0.29988833379333396 }, { "epoch": 5.64086778070947, "grad_norm": 0.20017018742608264, "learning_rate": 0.0004299430122934096, "loss": 2.998478889465332, "step": 9623, "token_acc": 0.29868727653884447 }, { "epoch": 5.641454119026679, "grad_norm": 0.24058965700287915, "learning_rate": 0.00042992619057952104, "loss": 2.973613739013672, "step": 9624, "token_acc": 0.30261002600295167 }, { "epoch": 5.642040457343887, "grad_norm": 0.26664231546732725, "learning_rate": 0.00042990936717546844, "loss": 2.9879512786865234, "step": 9625, "token_acc": 0.298649619381812 }, { "epoch": 5.642626795661096, "grad_norm": 0.17993722326741365, "learning_rate": 0.00042989254208140973, "loss": 2.987138032913208, "step": 9626, "token_acc": 0.30090425984513874 }, { "epoch": 5.643213133978305, "grad_norm": 0.28639773475492575, "learning_rate": 0.000429875715297503, "loss": 3.0216989517211914, "step": 9627, "token_acc": 0.2943093002397818 }, { "epoch": 5.643799472295514, "grad_norm": 0.19931065816872046, "learning_rate": 0.0004298588868239063, "loss": 2.9978439807891846, "step": 9628, "token_acc": 0.30077226921214784 }, { "epoch": 5.644385810612723, "grad_norm": 0.21915779202010707, "learning_rate": 0.0004298420566607777, "loss": 3.0021843910217285, "step": 9629, "token_acc": 0.2999309392265193 }, { "epoch": 5.6449721489299325, "grad_norm": 0.20022634930008637, "learning_rate": 0.00042982522480827535, "loss": 2.9696850776672363, "step": 9630, "token_acc": 0.30302787435261086 }, { "epoch": 5.645558487247142, "grad_norm": 0.1861986787477895, "learning_rate": 0.00042980839126655735, "loss": 3.033020257949829, "step": 9631, "token_acc": 0.29264681515214686 }, { "epoch": 5.646144825564351, "grad_norm": 0.19885598517749872, "learning_rate": 0.00042979155603578177, "loss": 3.023491144180298, "step": 9632, "token_acc": 0.29548935074963 }, { "epoch": 5.64673116388156, "grad_norm": 0.2351570344041383, "learning_rate": 0.0004297747191161068, "loss": 2.9687154293060303, "step": 9633, "token_acc": 0.30320883004121807 }, { "epoch": 5.647317502198769, "grad_norm": 0.18211110565034438, "learning_rate": 0.0004297578805076906, "loss": 3.027240514755249, "step": 9634, "token_acc": 0.2960423766523404 }, { "epoch": 5.647903840515978, "grad_norm": 0.2573021120589888, "learning_rate": 0.00042974104021069136, "loss": 2.9699649810791016, "step": 9635, "token_acc": 0.3045506690138984 }, { "epoch": 5.648490178833187, "grad_norm": 0.18323474374008664, "learning_rate": 0.0004297241982252672, "loss": 3.007740020751953, "step": 9636, "token_acc": 0.2973945260227566 }, { "epoch": 5.649076517150396, "grad_norm": 0.2126417772933353, "learning_rate": 0.00042970735455157645, "loss": 2.9918341636657715, "step": 9637, "token_acc": 0.30088923887535746 }, { "epoch": 5.6496628554676045, "grad_norm": 0.1812814982429344, "learning_rate": 0.0004296905091897772, "loss": 3.040578842163086, "step": 9638, "token_acc": 0.29455503018533374 }, { "epoch": 5.650249193784814, "grad_norm": 0.18309232079384316, "learning_rate": 0.0004296736621400278, "loss": 2.981400966644287, "step": 9639, "token_acc": 0.3022540009608986 }, { "epoch": 5.650835532102023, "grad_norm": 0.16718615678249557, "learning_rate": 0.0004296568134024864, "loss": 2.9951562881469727, "step": 9640, "token_acc": 0.3002097317613834 }, { "epoch": 5.651421870419232, "grad_norm": 0.1853777646607536, "learning_rate": 0.0004296399629773114, "loss": 3.020698308944702, "step": 9641, "token_acc": 0.2968197221416231 }, { "epoch": 5.652008208736441, "grad_norm": 0.17698208098032897, "learning_rate": 0.000429623110864661, "loss": 2.990372657775879, "step": 9642, "token_acc": 0.3002808261264784 }, { "epoch": 5.65259454705365, "grad_norm": 0.231477921332244, "learning_rate": 0.0004296062570646935, "loss": 3.031933307647705, "step": 9643, "token_acc": 0.2922380698273227 }, { "epoch": 5.653180885370859, "grad_norm": 0.16872941623725754, "learning_rate": 0.00042958940157756723, "loss": 3.007328748703003, "step": 9644, "token_acc": 0.29785110457363956 }, { "epoch": 5.653767223688068, "grad_norm": 0.19171234055105638, "learning_rate": 0.0004295725444034405, "loss": 3.008355140686035, "step": 9645, "token_acc": 0.2990646385247595 }, { "epoch": 5.654353562005277, "grad_norm": 0.18484908502502898, "learning_rate": 0.0004295556855424717, "loss": 2.999434232711792, "step": 9646, "token_acc": 0.2990242580295433 }, { "epoch": 5.654939900322486, "grad_norm": 0.2385347917215755, "learning_rate": 0.00042953882499481924, "loss": 3.022439479827881, "step": 9647, "token_acc": 0.2952998456491868 }, { "epoch": 5.655526238639695, "grad_norm": 0.17282707392578864, "learning_rate": 0.00042952196276064143, "loss": 2.9714865684509277, "step": 9648, "token_acc": 0.3018283147818686 }, { "epoch": 5.656112576956904, "grad_norm": 0.21653907044315882, "learning_rate": 0.0004295050988400967, "loss": 2.9956345558166504, "step": 9649, "token_acc": 0.3011193373237407 }, { "epoch": 5.656698915274113, "grad_norm": 0.19938927816284252, "learning_rate": 0.0004294882332333434, "loss": 2.9746618270874023, "step": 9650, "token_acc": 0.30127286154915506 }, { "epoch": 5.657285253591322, "grad_norm": 0.1760122764114551, "learning_rate": 0.0004294713659405401, "loss": 2.992704391479492, "step": 9651, "token_acc": 0.2991779325413082 }, { "epoch": 5.657871591908531, "grad_norm": 0.20716780027055753, "learning_rate": 0.0004294544969618451, "loss": 3.0144577026367188, "step": 9652, "token_acc": 0.29784884081009105 }, { "epoch": 5.65845793022574, "grad_norm": 0.1606883926285121, "learning_rate": 0.0004294376262974169, "loss": 2.9628968238830566, "step": 9653, "token_acc": 0.30408607420943695 }, { "epoch": 5.659044268542949, "grad_norm": 0.20776777506456132, "learning_rate": 0.00042942075394741407, "loss": 3.0030856132507324, "step": 9654, "token_acc": 0.2964939436630601 }, { "epoch": 5.6596306068601585, "grad_norm": 0.1833194884507952, "learning_rate": 0.0004294038799119949, "loss": 2.9778831005096436, "step": 9655, "token_acc": 0.30300539827584916 }, { "epoch": 5.660216945177368, "grad_norm": 0.2307086055516595, "learning_rate": 0.0004293870041913182, "loss": 3.0047645568847656, "step": 9656, "token_acc": 0.2978107081688882 }, { "epoch": 5.660803283494577, "grad_norm": 0.1629645673453858, "learning_rate": 0.00042937012678554223, "loss": 3.021730899810791, "step": 9657, "token_acc": 0.29500829936546624 }, { "epoch": 5.661389621811786, "grad_norm": 0.19321640486443206, "learning_rate": 0.00042935324769482564, "loss": 3.0084288120269775, "step": 9658, "token_acc": 0.29875639613964633 }, { "epoch": 5.661975960128994, "grad_norm": 0.16792054232838532, "learning_rate": 0.000429336366919327, "loss": 3.0241122245788574, "step": 9659, "token_acc": 0.29619280195364106 }, { "epoch": 5.662562298446203, "grad_norm": 0.15951579831358248, "learning_rate": 0.00042931948445920474, "loss": 2.98006010055542, "step": 9660, "token_acc": 0.30314749366245397 }, { "epoch": 5.663148636763412, "grad_norm": 0.1820638270397434, "learning_rate": 0.0004293026003146176, "loss": 3.0115294456481934, "step": 9661, "token_acc": 0.29739966140358953 }, { "epoch": 5.663734975080621, "grad_norm": 0.15257340074744735, "learning_rate": 0.00042928571448572417, "loss": 2.9487955570220947, "step": 9662, "token_acc": 0.30703578762485967 }, { "epoch": 5.6643213133978305, "grad_norm": 0.1792191473418344, "learning_rate": 0.0004292688269726831, "loss": 3.0508499145507812, "step": 9663, "token_acc": 0.29224626207453536 }, { "epoch": 5.66490765171504, "grad_norm": 0.20980940221123134, "learning_rate": 0.00042925193777565294, "loss": 3.053311824798584, "step": 9664, "token_acc": 0.2919631929649608 }, { "epoch": 5.665493990032249, "grad_norm": 0.17087286114275047, "learning_rate": 0.00042923504689479234, "loss": 3.009725570678711, "step": 9665, "token_acc": 0.2960150387541081 }, { "epoch": 5.666080328349458, "grad_norm": 0.1698506675528494, "learning_rate": 0.00042921815433026, "loss": 3.012098789215088, "step": 9666, "token_acc": 0.2967871060095963 }, { "epoch": 5.666666666666667, "grad_norm": 0.1903519607869863, "learning_rate": 0.00042920126008221454, "loss": 2.9759397506713867, "step": 9667, "token_acc": 0.29977653718687175 }, { "epoch": 5.667253004983876, "grad_norm": 0.16613858970975162, "learning_rate": 0.00042918436415081474, "loss": 2.9691386222839355, "step": 9668, "token_acc": 0.3035372541140895 }, { "epoch": 5.667839343301084, "grad_norm": 0.16956420996080152, "learning_rate": 0.0004291674665362194, "loss": 3.00679087638855, "step": 9669, "token_acc": 0.2971437048225267 }, { "epoch": 5.668425681618293, "grad_norm": 0.16343544576976046, "learning_rate": 0.00042915056723858704, "loss": 2.9896368980407715, "step": 9670, "token_acc": 0.30152768853512374 }, { "epoch": 5.6690120199355025, "grad_norm": 0.1620120027899225, "learning_rate": 0.0004291336662580766, "loss": 3.0608201026916504, "step": 9671, "token_acc": 0.29073280012975294 }, { "epoch": 5.669598358252712, "grad_norm": 0.15773349503321887, "learning_rate": 0.00042911676359484664, "loss": 3.003131151199341, "step": 9672, "token_acc": 0.2984501072080827 }, { "epoch": 5.670184696569921, "grad_norm": 0.1681830930163674, "learning_rate": 0.0004290998592490561, "loss": 3.0266337394714355, "step": 9673, "token_acc": 0.2956958028391378 }, { "epoch": 5.67077103488713, "grad_norm": 0.17026784322134483, "learning_rate": 0.0004290829532208637, "loss": 2.9676289558410645, "step": 9674, "token_acc": 0.30237150512612176 }, { "epoch": 5.671357373204339, "grad_norm": 0.14881734179531328, "learning_rate": 0.00042906604551042835, "loss": 3.0124878883361816, "step": 9675, "token_acc": 0.29810283511329955 }, { "epoch": 5.671943711521548, "grad_norm": 0.15746213050040386, "learning_rate": 0.00042904913611790875, "loss": 3.021151542663574, "step": 9676, "token_acc": 0.2953353756937941 }, { "epoch": 5.672530049838757, "grad_norm": 0.1508119232093372, "learning_rate": 0.0004290322250434637, "loss": 2.995553970336914, "step": 9677, "token_acc": 0.29879655822092444 }, { "epoch": 5.673116388155966, "grad_norm": 0.17271149428032068, "learning_rate": 0.00042901531228725225, "loss": 2.9819164276123047, "step": 9678, "token_acc": 0.30188738260876363 }, { "epoch": 5.673702726473175, "grad_norm": 0.22317560950998053, "learning_rate": 0.0004289983978494332, "loss": 2.9949841499328613, "step": 9679, "token_acc": 0.2983412883874897 }, { "epoch": 5.6742890647903845, "grad_norm": 0.37987441045838943, "learning_rate": 0.0004289814817301653, "loss": 3.043926954269409, "step": 9680, "token_acc": 0.2929892484369686 }, { "epoch": 5.674875403107593, "grad_norm": 0.46172890222334556, "learning_rate": 0.00042896456392960765, "loss": 3.0338521003723145, "step": 9681, "token_acc": 0.2925842754968263 }, { "epoch": 5.675461741424802, "grad_norm": 0.1653286598332045, "learning_rate": 0.000428947644447919, "loss": 2.9987215995788574, "step": 9682, "token_acc": 0.2984008878351452 }, { "epoch": 5.676048079742011, "grad_norm": 0.29159400659256407, "learning_rate": 0.0004289307232852584, "loss": 3.0331783294677734, "step": 9683, "token_acc": 0.29358516339182517 }, { "epoch": 5.67663441805922, "grad_norm": 0.18898488147756087, "learning_rate": 0.00042891380044178474, "loss": 3.05173397064209, "step": 9684, "token_acc": 0.29150446530799173 }, { "epoch": 5.677220756376429, "grad_norm": 0.22029151321848311, "learning_rate": 0.0004288968759176571, "loss": 3.046402931213379, "step": 9685, "token_acc": 0.2926949104465516 }, { "epoch": 5.677807094693638, "grad_norm": 0.16576300669822813, "learning_rate": 0.00042887994971303435, "loss": 3.056812047958374, "step": 9686, "token_acc": 0.2901332729541733 }, { "epoch": 5.678393433010847, "grad_norm": 0.22457991502511784, "learning_rate": 0.00042886302182807546, "loss": 3.003857374191284, "step": 9687, "token_acc": 0.2986999657885734 }, { "epoch": 5.6789797713280565, "grad_norm": 0.15561880908922285, "learning_rate": 0.0004288460922629395, "loss": 3.0109992027282715, "step": 9688, "token_acc": 0.29794032628657546 }, { "epoch": 5.679566109645266, "grad_norm": 0.18152768908813405, "learning_rate": 0.00042882916101778557, "loss": 2.9868319034576416, "step": 9689, "token_acc": 0.301632402480874 }, { "epoch": 5.680152447962474, "grad_norm": 0.17653165035954657, "learning_rate": 0.0004288122280927726, "loss": 2.988758087158203, "step": 9690, "token_acc": 0.3012203805633085 }, { "epoch": 5.680738786279683, "grad_norm": 0.15868739937927923, "learning_rate": 0.0004287952934880597, "loss": 2.97629451751709, "step": 9691, "token_acc": 0.3014322694691345 }, { "epoch": 5.681325124596892, "grad_norm": 0.17053649195480833, "learning_rate": 0.00042877835720380597, "loss": 3.0265848636627197, "step": 9692, "token_acc": 0.2963419236575372 }, { "epoch": 5.681911462914101, "grad_norm": 0.155066110951518, "learning_rate": 0.0004287614192401704, "loss": 3.077479600906372, "step": 9693, "token_acc": 0.2875623630031064 }, { "epoch": 5.68249780123131, "grad_norm": 0.16396740526187453, "learning_rate": 0.0004287444795973123, "loss": 3.0188803672790527, "step": 9694, "token_acc": 0.295553803307501 }, { "epoch": 5.683084139548519, "grad_norm": 0.1483217950896347, "learning_rate": 0.00042872753827539057, "loss": 3.0223069190979004, "step": 9695, "token_acc": 0.29533774772211796 }, { "epoch": 5.6836704778657285, "grad_norm": 0.19569883138269079, "learning_rate": 0.00042871059527456457, "loss": 3.030677318572998, "step": 9696, "token_acc": 0.29351466558408607 }, { "epoch": 5.684256816182938, "grad_norm": 0.16568520147825302, "learning_rate": 0.00042869365059499325, "loss": 2.9931516647338867, "step": 9697, "token_acc": 0.30169962125807015 }, { "epoch": 5.684843154500147, "grad_norm": 0.18911416548021692, "learning_rate": 0.0004286767042368359, "loss": 3.007972478866577, "step": 9698, "token_acc": 0.2972762811704428 }, { "epoch": 5.685429492817356, "grad_norm": 0.18121591454233538, "learning_rate": 0.0004286597562002517, "loss": 3.0184693336486816, "step": 9699, "token_acc": 0.29508342055039094 }, { "epoch": 5.686015831134565, "grad_norm": 0.1841168351292108, "learning_rate": 0.0004286428064853998, "loss": 3.033012866973877, "step": 9700, "token_acc": 0.2944825546399884 }, { "epoch": 5.686602169451774, "grad_norm": 0.17421679504180962, "learning_rate": 0.00042862585509243953, "loss": 2.986377239227295, "step": 9701, "token_acc": 0.30175944733376486 }, { "epoch": 5.687188507768982, "grad_norm": 0.23461696561100684, "learning_rate": 0.00042860890202153003, "loss": 2.979397773742676, "step": 9702, "token_acc": 0.3020695397455961 }, { "epoch": 5.687774846086191, "grad_norm": 0.23151070460380196, "learning_rate": 0.0004285919472728305, "loss": 3.036905527114868, "step": 9703, "token_acc": 0.2922642212134788 }, { "epoch": 5.6883611844034006, "grad_norm": 0.20276624522389372, "learning_rate": 0.0004285749908465003, "loss": 3.0349578857421875, "step": 9704, "token_acc": 0.2943399281327038 }, { "epoch": 5.68894752272061, "grad_norm": 0.2052356051437249, "learning_rate": 0.0004285580327426988, "loss": 3.0245180130004883, "step": 9705, "token_acc": 0.2948254090625125 }, { "epoch": 5.689533861037819, "grad_norm": 0.21843578573941294, "learning_rate": 0.00042854107296158507, "loss": 3.016998767852783, "step": 9706, "token_acc": 0.29631629833162537 }, { "epoch": 5.690120199355028, "grad_norm": 0.17463113248808024, "learning_rate": 0.00042852411150331863, "loss": 3.000985622406006, "step": 9707, "token_acc": 0.2986096678443723 }, { "epoch": 5.690706537672237, "grad_norm": 0.18187663569681634, "learning_rate": 0.00042850714836805873, "loss": 3.0590057373046875, "step": 9708, "token_acc": 0.2917847534553221 }, { "epoch": 5.691292875989446, "grad_norm": 0.20459450455746692, "learning_rate": 0.0004284901835559646, "loss": 2.9940104484558105, "step": 9709, "token_acc": 0.29937229161356105 }, { "epoch": 5.691879214306655, "grad_norm": 0.19848694857738772, "learning_rate": 0.0004284732170671958, "loss": 3.0109429359436035, "step": 9710, "token_acc": 0.2955458356988835 }, { "epoch": 5.692465552623864, "grad_norm": 0.1875707159858398, "learning_rate": 0.0004284562489019116, "loss": 3.0396504402160645, "step": 9711, "token_acc": 0.29188674818155746 }, { "epoch": 5.693051890941073, "grad_norm": 0.18397003362578113, "learning_rate": 0.0004284392790602715, "loss": 3.006220817565918, "step": 9712, "token_acc": 0.29829215320131114 }, { "epoch": 5.693638229258282, "grad_norm": 0.1960355377965541, "learning_rate": 0.00042842230754243474, "loss": 3.0132362842559814, "step": 9713, "token_acc": 0.29544653066458637 }, { "epoch": 5.694224567575491, "grad_norm": 0.21111888271164667, "learning_rate": 0.00042840533434856075, "loss": 3.0100584030151367, "step": 9714, "token_acc": 0.298651075700256 }, { "epoch": 5.6948109058927, "grad_norm": 0.24987361450120996, "learning_rate": 0.00042838835947880917, "loss": 2.991548538208008, "step": 9715, "token_acc": 0.30014962885653734 }, { "epoch": 5.695397244209909, "grad_norm": 0.17244989323190352, "learning_rate": 0.0004283713829333393, "loss": 3.0689945220947266, "step": 9716, "token_acc": 0.28985194559849287 }, { "epoch": 5.695983582527118, "grad_norm": 0.1796449712262519, "learning_rate": 0.00042835440471231063, "loss": 2.986182451248169, "step": 9717, "token_acc": 0.30105001274849913 }, { "epoch": 5.696569920844327, "grad_norm": 0.17306794291337274, "learning_rate": 0.00042833742481588266, "loss": 3.0349810123443604, "step": 9718, "token_acc": 0.2946296373532417 }, { "epoch": 5.697156259161536, "grad_norm": 0.16789664725774142, "learning_rate": 0.0004283204432442149, "loss": 3.0230164527893066, "step": 9719, "token_acc": 0.2941915291233863 }, { "epoch": 5.6977425974787455, "grad_norm": 0.1990958888628293, "learning_rate": 0.00042830345999746687, "loss": 2.998344898223877, "step": 9720, "token_acc": 0.2996156311790466 }, { "epoch": 5.698328935795955, "grad_norm": 0.19853982693005615, "learning_rate": 0.00042828647507579814, "loss": 2.9882311820983887, "step": 9721, "token_acc": 0.30040991089929 }, { "epoch": 5.698915274113164, "grad_norm": 0.18270764614196836, "learning_rate": 0.0004282694884793682, "loss": 3.003427028656006, "step": 9722, "token_acc": 0.2993785247551643 }, { "epoch": 5.699501612430373, "grad_norm": 0.22010723512213365, "learning_rate": 0.0004282525002083366, "loss": 2.9778850078582764, "step": 9723, "token_acc": 0.30137416312160037 }, { "epoch": 5.700087950747581, "grad_norm": 0.2723642953420089, "learning_rate": 0.000428235510262863, "loss": 2.9844465255737305, "step": 9724, "token_acc": 0.3017111962487655 }, { "epoch": 5.70067428906479, "grad_norm": 0.18108927568815072, "learning_rate": 0.00042821851864310694, "loss": 3.0325722694396973, "step": 9725, "token_acc": 0.29424398400961516 }, { "epoch": 5.701260627381999, "grad_norm": 0.22271844642587205, "learning_rate": 0.00042820152534922806, "loss": 3.0136730670928955, "step": 9726, "token_acc": 0.2982234127797068 }, { "epoch": 5.701846965699208, "grad_norm": 0.24488658869004318, "learning_rate": 0.00042818453038138596, "loss": 3.031216621398926, "step": 9727, "token_acc": 0.29369836920538234 }, { "epoch": 5.7024333040164175, "grad_norm": 0.1731579051312719, "learning_rate": 0.0004281675337397403, "loss": 2.9910900592803955, "step": 9728, "token_acc": 0.2986923084960764 }, { "epoch": 5.703019642333627, "grad_norm": 0.21383594010860807, "learning_rate": 0.00042815053542445084, "loss": 2.9857466220855713, "step": 9729, "token_acc": 0.30061557473894307 }, { "epoch": 5.703605980650836, "grad_norm": 0.18534351668934268, "learning_rate": 0.000428133535435677, "loss": 3.0403828620910645, "step": 9730, "token_acc": 0.29395040112891 }, { "epoch": 5.704192318968045, "grad_norm": 0.194058046589898, "learning_rate": 0.00042811653377357875, "loss": 3.026549816131592, "step": 9731, "token_acc": 0.29632029260009146 }, { "epoch": 5.704778657285254, "grad_norm": 0.19533187733946833, "learning_rate": 0.0004280995304383156, "loss": 3.010878086090088, "step": 9732, "token_acc": 0.29721484230999023 }, { "epoch": 5.705364995602462, "grad_norm": 0.15889017477815057, "learning_rate": 0.00042808252543004744, "loss": 2.9961342811584473, "step": 9733, "token_acc": 0.29910503748434764 }, { "epoch": 5.705951333919671, "grad_norm": 0.18710498167806622, "learning_rate": 0.0004280655187489338, "loss": 2.9783310890197754, "step": 9734, "token_acc": 0.3014467209725438 }, { "epoch": 5.70653767223688, "grad_norm": 0.1570021766033346, "learning_rate": 0.00042804851039513473, "loss": 2.96163010597229, "step": 9735, "token_acc": 0.30590998232573907 }, { "epoch": 5.7071240105540895, "grad_norm": 0.20027907481769028, "learning_rate": 0.00042803150036880964, "loss": 2.9812936782836914, "step": 9736, "token_acc": 0.3018255844496979 }, { "epoch": 5.707710348871299, "grad_norm": 0.16080911012704258, "learning_rate": 0.0004280144886701186, "loss": 3.0218868255615234, "step": 9737, "token_acc": 0.2958968552191588 }, { "epoch": 5.708296687188508, "grad_norm": 0.1975564427783344, "learning_rate": 0.00042799747529922137, "loss": 2.997437000274658, "step": 9738, "token_acc": 0.2993607349684892 }, { "epoch": 5.708883025505717, "grad_norm": 0.1704753932499822, "learning_rate": 0.0004279804602562777, "loss": 2.983586072921753, "step": 9739, "token_acc": 0.300933417914892 }, { "epoch": 5.709469363822926, "grad_norm": 0.16751104048917995, "learning_rate": 0.00042796344354144734, "loss": 3.0001845359802246, "step": 9740, "token_acc": 0.2971088991076718 }, { "epoch": 5.710055702140135, "grad_norm": 0.238968363089349, "learning_rate": 0.0004279464251548903, "loss": 3.0158004760742188, "step": 9741, "token_acc": 0.295391095782308 }, { "epoch": 5.710642040457344, "grad_norm": 0.17578908454802472, "learning_rate": 0.00042792940509676637, "loss": 3.036902904510498, "step": 9742, "token_acc": 0.29293363578814097 }, { "epoch": 5.711228378774553, "grad_norm": 0.17691966355134336, "learning_rate": 0.0004279123833672355, "loss": 3.0241262912750244, "step": 9743, "token_acc": 0.2938525764118015 }, { "epoch": 5.711814717091762, "grad_norm": 0.19849294832039296, "learning_rate": 0.00042789535996645743, "loss": 2.9944612979888916, "step": 9744, "token_acc": 0.30042127084704484 }, { "epoch": 5.7124010554089715, "grad_norm": 0.1810045138690793, "learning_rate": 0.0004278783348945922, "loss": 3.0278000831604004, "step": 9745, "token_acc": 0.29502459833438727 }, { "epoch": 5.71298739372618, "grad_norm": 0.18246860490966896, "learning_rate": 0.0004278613081517997, "loss": 3.004056215286255, "step": 9746, "token_acc": 0.2987751416226092 }, { "epoch": 5.713573732043389, "grad_norm": 0.1790329437786488, "learning_rate": 0.00042784427973823993, "loss": 3.0240578651428223, "step": 9747, "token_acc": 0.29692370239869903 }, { "epoch": 5.714160070360598, "grad_norm": 0.20027441640000265, "learning_rate": 0.0004278272496540728, "loss": 2.9672203063964844, "step": 9748, "token_acc": 0.30474545269789655 }, { "epoch": 5.714746408677807, "grad_norm": 0.22349385972403324, "learning_rate": 0.00042781021789945823, "loss": 2.992806911468506, "step": 9749, "token_acc": 0.2995803835757498 }, { "epoch": 5.715332746995016, "grad_norm": 0.17219798287010957, "learning_rate": 0.0004277931844745563, "loss": 3.0111806392669678, "step": 9750, "token_acc": 0.2964327686457248 }, { "epoch": 5.715919085312225, "grad_norm": 0.21507034256874447, "learning_rate": 0.00042777614937952697, "loss": 3.0356366634368896, "step": 9751, "token_acc": 0.29383720444481576 }, { "epoch": 5.716505423629434, "grad_norm": 0.16750405663613704, "learning_rate": 0.00042775911261453027, "loss": 2.9886884689331055, "step": 9752, "token_acc": 0.298380589983707 }, { "epoch": 5.7170917619466435, "grad_norm": 0.21003041646855272, "learning_rate": 0.0004277420741797262, "loss": 3.0114574432373047, "step": 9753, "token_acc": 0.2975876465602114 }, { "epoch": 5.717678100263853, "grad_norm": 0.20326612970657906, "learning_rate": 0.000427725034075275, "loss": 2.983753204345703, "step": 9754, "token_acc": 0.30083016302571547 }, { "epoch": 5.718264438581061, "grad_norm": 0.16083676880496495, "learning_rate": 0.00042770799230133647, "loss": 2.9960367679595947, "step": 9755, "token_acc": 0.2997880384658947 }, { "epoch": 5.71885077689827, "grad_norm": 0.2311905509721948, "learning_rate": 0.00042769094885807083, "loss": 3.0426080226898193, "step": 9756, "token_acc": 0.2931654092203111 }, { "epoch": 5.719437115215479, "grad_norm": 0.18502198678827375, "learning_rate": 0.0004276739037456382, "loss": 3.0202221870422363, "step": 9757, "token_acc": 0.29512087069387627 }, { "epoch": 5.720023453532688, "grad_norm": 0.17968974538929308, "learning_rate": 0.0004276568569641986, "loss": 2.999652862548828, "step": 9758, "token_acc": 0.2976344528627108 }, { "epoch": 5.720609791849897, "grad_norm": 0.15756421521310826, "learning_rate": 0.0004276398085139123, "loss": 3.016390800476074, "step": 9759, "token_acc": 0.29705845061493596 }, { "epoch": 5.721196130167106, "grad_norm": 0.1857479456575506, "learning_rate": 0.0004276227583949393, "loss": 2.991766929626465, "step": 9760, "token_acc": 0.2992602430440268 }, { "epoch": 5.7217824684843155, "grad_norm": 0.18131721528561273, "learning_rate": 0.00042760570660743994, "loss": 3.006643295288086, "step": 9761, "token_acc": 0.29766950598111913 }, { "epoch": 5.722368806801525, "grad_norm": 0.15176663597237508, "learning_rate": 0.0004275886531515741, "loss": 3.047158718109131, "step": 9762, "token_acc": 0.2920587246091153 }, { "epoch": 5.722955145118734, "grad_norm": 0.16835670084605237, "learning_rate": 0.0004275715980275023, "loss": 3.009458541870117, "step": 9763, "token_acc": 0.29540245483880695 }, { "epoch": 5.723541483435943, "grad_norm": 0.2033296025349335, "learning_rate": 0.0004275545412353846, "loss": 3.0113673210144043, "step": 9764, "token_acc": 0.2974983417037809 }, { "epoch": 5.724127821753152, "grad_norm": 0.16164466559274118, "learning_rate": 0.0004275374827753813, "loss": 3.0099995136260986, "step": 9765, "token_acc": 0.29614441520953005 }, { "epoch": 5.724714160070361, "grad_norm": 0.17865797682836773, "learning_rate": 0.0004275204226476526, "loss": 3.024019241333008, "step": 9766, "token_acc": 0.29525821645165995 }, { "epoch": 5.725300498387569, "grad_norm": 0.2593570335125841, "learning_rate": 0.00042750336085235866, "loss": 3.0303597450256348, "step": 9767, "token_acc": 0.2949683037580621 }, { "epoch": 5.725886836704778, "grad_norm": 0.2111550684505764, "learning_rate": 0.0004274862973896598, "loss": 2.978832721710205, "step": 9768, "token_acc": 0.3028105702991447 }, { "epoch": 5.7264731750219875, "grad_norm": 0.16434690790023865, "learning_rate": 0.0004274692322597164, "loss": 3.0385990142822266, "step": 9769, "token_acc": 0.2936921899993045 }, { "epoch": 5.727059513339197, "grad_norm": 0.21630121804126842, "learning_rate": 0.00042745216546268873, "loss": 2.9944584369659424, "step": 9770, "token_acc": 0.3004844660219101 }, { "epoch": 5.727645851656406, "grad_norm": 0.1771829562045815, "learning_rate": 0.00042743509699873705, "loss": 2.9621706008911133, "step": 9771, "token_acc": 0.3042818741943178 }, { "epoch": 5.728232189973615, "grad_norm": 0.16403831400169688, "learning_rate": 0.0004274180268680217, "loss": 3.0051217079162598, "step": 9772, "token_acc": 0.2986670889751102 }, { "epoch": 5.728818528290824, "grad_norm": 0.18986715685086744, "learning_rate": 0.0004274009550707031, "loss": 3.0222558975219727, "step": 9773, "token_acc": 0.2961210328147539 }, { "epoch": 5.729404866608033, "grad_norm": 0.1484794289155972, "learning_rate": 0.00042738388160694157, "loss": 2.989415407180786, "step": 9774, "token_acc": 0.29924619578432754 }, { "epoch": 5.729991204925242, "grad_norm": 0.23548169241021238, "learning_rate": 0.0004273668064768975, "loss": 3.024458885192871, "step": 9775, "token_acc": 0.29376841891128674 }, { "epoch": 5.730577543242451, "grad_norm": 0.16487381177316116, "learning_rate": 0.00042734972968073125, "loss": 3.020822525024414, "step": 9776, "token_acc": 0.29728829561249376 }, { "epoch": 5.7311638815596595, "grad_norm": 0.16802247038031837, "learning_rate": 0.00042733265121860334, "loss": 3.0019431114196777, "step": 9777, "token_acc": 0.2985817297913022 }, { "epoch": 5.731750219876869, "grad_norm": 0.16497398452427606, "learning_rate": 0.0004273155710906741, "loss": 3.002401113510132, "step": 9778, "token_acc": 0.2979241874989009 }, { "epoch": 5.732336558194078, "grad_norm": 0.18918092124654226, "learning_rate": 0.00042729848929710403, "loss": 3.0445446968078613, "step": 9779, "token_acc": 0.29243978619689226 }, { "epoch": 5.732922896511287, "grad_norm": 0.21983977584860623, "learning_rate": 0.00042728140583805354, "loss": 2.978278875350952, "step": 9780, "token_acc": 0.3018021678014853 }, { "epoch": 5.733509234828496, "grad_norm": 0.17150808864042918, "learning_rate": 0.00042726432071368316, "loss": 3.007329225540161, "step": 9781, "token_acc": 0.2970757521671952 }, { "epoch": 5.734095573145705, "grad_norm": 0.3468445096942142, "learning_rate": 0.00042724723392415333, "loss": 2.9984984397888184, "step": 9782, "token_acc": 0.29936849467231663 }, { "epoch": 5.734681911462914, "grad_norm": 0.31661307645515097, "learning_rate": 0.0004272301454696246, "loss": 3.0195417404174805, "step": 9783, "token_acc": 0.29593348466071145 }, { "epoch": 5.735268249780123, "grad_norm": 0.1893982672655234, "learning_rate": 0.00042721305535025744, "loss": 3.026937246322632, "step": 9784, "token_acc": 0.29479486044389064 }, { "epoch": 5.735854588097332, "grad_norm": 0.20813351102625063, "learning_rate": 0.0004271959635662124, "loss": 3.011892318725586, "step": 9785, "token_acc": 0.2964921403143874 }, { "epoch": 5.7364409264145415, "grad_norm": 0.19420941936563837, "learning_rate": 0.0004271788701176501, "loss": 3.0171279907226562, "step": 9786, "token_acc": 0.29744646772308947 }, { "epoch": 5.737027264731751, "grad_norm": 0.19765457516585758, "learning_rate": 0.0004271617750047311, "loss": 3.0471858978271484, "step": 9787, "token_acc": 0.2927863593984458 }, { "epoch": 5.73761360304896, "grad_norm": 0.21551670737881892, "learning_rate": 0.0004271446782276159, "loss": 3.04540753364563, "step": 9788, "token_acc": 0.29376635298327686 }, { "epoch": 5.738199941366168, "grad_norm": 0.17335568131810578, "learning_rate": 0.0004271275797864651, "loss": 3.0074214935302734, "step": 9789, "token_acc": 0.29775162955637113 }, { "epoch": 5.738786279683377, "grad_norm": 0.2262175397264559, "learning_rate": 0.00042711047968143945, "loss": 3.044785976409912, "step": 9790, "token_acc": 0.2926703803750759 }, { "epoch": 5.739372618000586, "grad_norm": 0.16936770866551007, "learning_rate": 0.00042709337791269953, "loss": 2.9827089309692383, "step": 9791, "token_acc": 0.3009463556082773 }, { "epoch": 5.739958956317795, "grad_norm": 0.2001771713243243, "learning_rate": 0.0004270762744804059, "loss": 2.970305919647217, "step": 9792, "token_acc": 0.3033125524117751 }, { "epoch": 5.740545294635004, "grad_norm": 0.14901210741679927, "learning_rate": 0.00042705916938471934, "loss": 2.980947494506836, "step": 9793, "token_acc": 0.30191288005906436 }, { "epoch": 5.7411316329522135, "grad_norm": 0.1860267515424033, "learning_rate": 0.00042704206262580036, "loss": 3.050400495529175, "step": 9794, "token_acc": 0.29157887185101145 }, { "epoch": 5.741717971269423, "grad_norm": 0.16210204620671523, "learning_rate": 0.0004270249542038099, "loss": 2.991826057434082, "step": 9795, "token_acc": 0.29750529672127707 }, { "epoch": 5.742304309586632, "grad_norm": 0.1666851259163461, "learning_rate": 0.0004270078441189085, "loss": 2.9970202445983887, "step": 9796, "token_acc": 0.29860864363843653 }, { "epoch": 5.742890647903841, "grad_norm": 0.1578545090205116, "learning_rate": 0.0004269907323712569, "loss": 3.029672384262085, "step": 9797, "token_acc": 0.29600779999788046 }, { "epoch": 5.743476986221049, "grad_norm": 0.18944046093002984, "learning_rate": 0.00042697361896101583, "loss": 2.9879541397094727, "step": 9798, "token_acc": 0.3003644112611457 }, { "epoch": 5.744063324538258, "grad_norm": 0.213559691757153, "learning_rate": 0.0004269565038883462, "loss": 2.989189386367798, "step": 9799, "token_acc": 0.29865113743317395 }, { "epoch": 5.744649662855467, "grad_norm": 0.1781893738300793, "learning_rate": 0.0004269393871534086, "loss": 2.99185848236084, "step": 9800, "token_acc": 0.300760268071098 }, { "epoch": 5.745236001172676, "grad_norm": 0.2662629491516313, "learning_rate": 0.0004269222687563639, "loss": 3.0291504859924316, "step": 9801, "token_acc": 0.2964964631313853 }, { "epoch": 5.7458223394898855, "grad_norm": 0.1818353558872522, "learning_rate": 0.0004269051486973728, "loss": 3.0381107330322266, "step": 9802, "token_acc": 0.29219570885345847 }, { "epoch": 5.746408677807095, "grad_norm": 0.2576406147144324, "learning_rate": 0.00042688802697659635, "loss": 2.9792261123657227, "step": 9803, "token_acc": 0.30034735350907704 }, { "epoch": 5.746995016124304, "grad_norm": 0.1613269765864322, "learning_rate": 0.00042687090359419517, "loss": 2.9641284942626953, "step": 9804, "token_acc": 0.30337067475661766 }, { "epoch": 5.747581354441513, "grad_norm": 0.24483848251258744, "learning_rate": 0.0004268537785503302, "loss": 2.9684858322143555, "step": 9805, "token_acc": 0.3031364927797926 }, { "epoch": 5.748167692758722, "grad_norm": 0.16307764805023037, "learning_rate": 0.0004268366518451623, "loss": 3.0233664512634277, "step": 9806, "token_acc": 0.29607322762318194 }, { "epoch": 5.748754031075931, "grad_norm": 0.19165530216029253, "learning_rate": 0.00042681952347885233, "loss": 3.0188350677490234, "step": 9807, "token_acc": 0.29867636847199247 }, { "epoch": 5.74934036939314, "grad_norm": 0.1818282656414819, "learning_rate": 0.00042680239345156126, "loss": 2.979160785675049, "step": 9808, "token_acc": 0.30208137234928684 }, { "epoch": 5.749926707710349, "grad_norm": 0.20846138689722382, "learning_rate": 0.0004267852617634499, "loss": 2.9984664916992188, "step": 9809, "token_acc": 0.2988695581618738 }, { "epoch": 5.7505130460275575, "grad_norm": 0.16870047331698834, "learning_rate": 0.00042676812841467923, "loss": 3.022526264190674, "step": 9810, "token_acc": 0.29490765164509175 }, { "epoch": 5.751099384344767, "grad_norm": 0.1744523519090359, "learning_rate": 0.00042675099340541017, "loss": 3.014735698699951, "step": 9811, "token_acc": 0.29642171536622225 }, { "epoch": 5.751685722661976, "grad_norm": 0.15760754157824028, "learning_rate": 0.00042673385673580366, "loss": 2.994710922241211, "step": 9812, "token_acc": 0.2981859174831423 }, { "epoch": 5.752272060979185, "grad_norm": 0.17359981542197367, "learning_rate": 0.00042671671840602087, "loss": 2.9937655925750732, "step": 9813, "token_acc": 0.2978452713076337 }, { "epoch": 5.752858399296394, "grad_norm": 0.16510595094494643, "learning_rate": 0.0004266995784162225, "loss": 3.0381357669830322, "step": 9814, "token_acc": 0.29298097890623315 }, { "epoch": 5.753444737613603, "grad_norm": 0.19059267994434784, "learning_rate": 0.0004266824367665698, "loss": 3.007462739944458, "step": 9815, "token_acc": 0.29770543698780927 }, { "epoch": 5.754031075930812, "grad_norm": 0.1589780408348912, "learning_rate": 0.0004266652934572236, "loss": 3.002833843231201, "step": 9816, "token_acc": 0.299048553833489 }, { "epoch": 5.754617414248021, "grad_norm": 0.18096755872241815, "learning_rate": 0.0004266481484883451, "loss": 2.9881269931793213, "step": 9817, "token_acc": 0.30013900346373423 }, { "epoch": 5.75520375256523, "grad_norm": 0.1621406845412763, "learning_rate": 0.0004266310018600952, "loss": 2.9817309379577637, "step": 9818, "token_acc": 0.3025529385302529 }, { "epoch": 5.7557900908824395, "grad_norm": 0.16863611510753743, "learning_rate": 0.00042661385357263516, "loss": 3.0115761756896973, "step": 9819, "token_acc": 0.2979749429144072 }, { "epoch": 5.756376429199648, "grad_norm": 0.1791824773520426, "learning_rate": 0.0004265967036261259, "loss": 3.0059618949890137, "step": 9820, "token_acc": 0.2991214496081466 }, { "epoch": 5.756962767516857, "grad_norm": 0.15483704425513048, "learning_rate": 0.00042657955202072856, "loss": 2.984379291534424, "step": 9821, "token_acc": 0.29944282441158093 }, { "epoch": 5.757549105834066, "grad_norm": 0.17150397239285364, "learning_rate": 0.00042656239875660425, "loss": 2.983255624771118, "step": 9822, "token_acc": 0.3016053351812012 }, { "epoch": 5.758135444151275, "grad_norm": 0.15887307691157063, "learning_rate": 0.0004265452438339141, "loss": 2.976199150085449, "step": 9823, "token_acc": 0.30170426498801567 }, { "epoch": 5.758721782468484, "grad_norm": 0.16143268127256333, "learning_rate": 0.00042652808725281946, "loss": 3.008729934692383, "step": 9824, "token_acc": 0.2984476467992138 }, { "epoch": 5.759308120785693, "grad_norm": 0.17055784408218197, "learning_rate": 0.00042651092901348117, "loss": 3.0651113986968994, "step": 9825, "token_acc": 0.29029814813812954 }, { "epoch": 5.759894459102902, "grad_norm": 0.1481536693558484, "learning_rate": 0.0004264937691160605, "loss": 3.024775743484497, "step": 9826, "token_acc": 0.2945779264413816 }, { "epoch": 5.7604807974201115, "grad_norm": 0.1601951083400055, "learning_rate": 0.0004264766075607188, "loss": 3.0059478282928467, "step": 9827, "token_acc": 0.2974255187730803 }, { "epoch": 5.761067135737321, "grad_norm": 0.17061466215621363, "learning_rate": 0.0004264594443476172, "loss": 2.9989688396453857, "step": 9828, "token_acc": 0.29873509349423816 }, { "epoch": 5.76165347405453, "grad_norm": 0.16158903886122816, "learning_rate": 0.00042644227947691684, "loss": 3.012486696243286, "step": 9829, "token_acc": 0.29633690285214276 }, { "epoch": 5.762239812371739, "grad_norm": 0.1726561587352895, "learning_rate": 0.00042642511294877904, "loss": 2.963540554046631, "step": 9830, "token_acc": 0.3035180926016506 }, { "epoch": 5.762826150688948, "grad_norm": 0.18255360400419068, "learning_rate": 0.0004264079447633651, "loss": 3.016244411468506, "step": 9831, "token_acc": 0.2952851555550835 }, { "epoch": 5.763412489006156, "grad_norm": 0.1952540596241095, "learning_rate": 0.0004263907749208361, "loss": 2.9847264289855957, "step": 9832, "token_acc": 0.2999788117304538 }, { "epoch": 5.763998827323365, "grad_norm": 0.21577931220425517, "learning_rate": 0.00042637360342135354, "loss": 3.010488271713257, "step": 9833, "token_acc": 0.2982501565426919 }, { "epoch": 5.764585165640574, "grad_norm": 0.1728646644286135, "learning_rate": 0.00042635643026507863, "loss": 2.9888224601745605, "step": 9834, "token_acc": 0.3006070243188329 }, { "epoch": 5.7651715039577835, "grad_norm": 0.1843046223580123, "learning_rate": 0.0004263392554521727, "loss": 2.995633840560913, "step": 9835, "token_acc": 0.2980454086519792 }, { "epoch": 5.765757842274993, "grad_norm": 0.22080286055160886, "learning_rate": 0.00042632207898279706, "loss": 3.0202553272247314, "step": 9836, "token_acc": 0.29518842317889155 }, { "epoch": 5.766344180592202, "grad_norm": 0.1942678526814422, "learning_rate": 0.0004263049008571131, "loss": 3.0228071212768555, "step": 9837, "token_acc": 0.2958257861327328 }, { "epoch": 5.766930518909411, "grad_norm": 0.19632194336078884, "learning_rate": 0.00042628772107528224, "loss": 3.0176773071289062, "step": 9838, "token_acc": 0.29681067753001716 }, { "epoch": 5.76751685722662, "grad_norm": 0.18122861862610815, "learning_rate": 0.0004262705396374657, "loss": 2.9950449466705322, "step": 9839, "token_acc": 0.30017655521164827 }, { "epoch": 5.768103195543829, "grad_norm": 0.18450854606986924, "learning_rate": 0.00042625335654382504, "loss": 2.9820404052734375, "step": 9840, "token_acc": 0.3026783015049113 }, { "epoch": 5.768689533861037, "grad_norm": 0.1855358493245563, "learning_rate": 0.00042623617179452156, "loss": 2.979336738586426, "step": 9841, "token_acc": 0.3014573657956873 }, { "epoch": 5.7692758721782464, "grad_norm": 0.171195440933651, "learning_rate": 0.0004262189853897167, "loss": 3.0250351428985596, "step": 9842, "token_acc": 0.29585189490932595 }, { "epoch": 5.769862210495456, "grad_norm": 0.22939461109295775, "learning_rate": 0.00042620179732957196, "loss": 2.9973955154418945, "step": 9843, "token_acc": 0.3001539627709102 }, { "epoch": 5.770448548812665, "grad_norm": 0.22404468869983718, "learning_rate": 0.00042618460761424876, "loss": 2.97257661819458, "step": 9844, "token_acc": 0.3020805431291509 }, { "epoch": 5.771034887129874, "grad_norm": 0.17821824589940463, "learning_rate": 0.00042616741624390856, "loss": 3.0196237564086914, "step": 9845, "token_acc": 0.2962530581583035 }, { "epoch": 5.771621225447083, "grad_norm": 0.2391526302564196, "learning_rate": 0.0004261502232187129, "loss": 2.9857356548309326, "step": 9846, "token_acc": 0.29999017705200054 }, { "epoch": 5.772207563764292, "grad_norm": 0.2694231248265357, "learning_rate": 0.0004261330285388233, "loss": 3.013343334197998, "step": 9847, "token_acc": 0.2982479009928384 }, { "epoch": 5.772793902081501, "grad_norm": 0.1791459476106535, "learning_rate": 0.0004261158322044012, "loss": 2.982802152633667, "step": 9848, "token_acc": 0.30107480837579753 }, { "epoch": 5.77338024039871, "grad_norm": 0.2022295237788424, "learning_rate": 0.0004260986342156081, "loss": 3.024275779724121, "step": 9849, "token_acc": 0.2961895522311039 }, { "epoch": 5.773966578715919, "grad_norm": 0.18355964882066209, "learning_rate": 0.00042608143457260574, "loss": 3.0137851238250732, "step": 9850, "token_acc": 0.29786150319887017 }, { "epoch": 5.7745529170331285, "grad_norm": 0.17175963604717426, "learning_rate": 0.00042606423327555553, "loss": 3.011504650115967, "step": 9851, "token_acc": 0.2948839936429236 }, { "epoch": 5.775139255350338, "grad_norm": 0.25252950666588686, "learning_rate": 0.0004260470303246191, "loss": 3.01700758934021, "step": 9852, "token_acc": 0.29523926086280267 }, { "epoch": 5.775725593667546, "grad_norm": 0.2361043160689279, "learning_rate": 0.0004260298257199581, "loss": 2.9895331859588623, "step": 9853, "token_acc": 0.30056262268729617 }, { "epoch": 5.776311931984755, "grad_norm": 0.20070251007576217, "learning_rate": 0.00042601261946173405, "loss": 3.05723237991333, "step": 9854, "token_acc": 0.2910841195351531 }, { "epoch": 5.776898270301964, "grad_norm": 0.18049050140552117, "learning_rate": 0.0004259954115501086, "loss": 3.012880325317383, "step": 9855, "token_acc": 0.29730930998049415 }, { "epoch": 5.777484608619173, "grad_norm": 0.20172210634782356, "learning_rate": 0.0004259782019852434, "loss": 3.0195188522338867, "step": 9856, "token_acc": 0.295771397038731 }, { "epoch": 5.778070946936382, "grad_norm": 0.17809974345665736, "learning_rate": 0.00042596099076730015, "loss": 2.981475830078125, "step": 9857, "token_acc": 0.3019364411755287 }, { "epoch": 5.778657285253591, "grad_norm": 0.18613424724006009, "learning_rate": 0.00042594377789644054, "loss": 3.0010063648223877, "step": 9858, "token_acc": 0.2984356229265175 }, { "epoch": 5.7792436235708005, "grad_norm": 0.18165367219150833, "learning_rate": 0.00042592656337282623, "loss": 3.0383360385894775, "step": 9859, "token_acc": 0.2929335071707953 }, { "epoch": 5.77982996188801, "grad_norm": 0.1746716071503972, "learning_rate": 0.0004259093471966188, "loss": 2.9904322624206543, "step": 9860, "token_acc": 0.30081722155089796 }, { "epoch": 5.780416300205219, "grad_norm": 0.18130247616422657, "learning_rate": 0.0004258921293679802, "loss": 3.0089967250823975, "step": 9861, "token_acc": 0.2983422173276554 }, { "epoch": 5.781002638522428, "grad_norm": 0.1869687863912142, "learning_rate": 0.000425874909887072, "loss": 3.0213675498962402, "step": 9862, "token_acc": 0.296800129070424 }, { "epoch": 5.781588976839636, "grad_norm": 0.21147826203105444, "learning_rate": 0.0004258576887540561, "loss": 3.0386123657226562, "step": 9863, "token_acc": 0.2940331677885734 }, { "epoch": 5.782175315156845, "grad_norm": 0.156330527476051, "learning_rate": 0.0004258404659690941, "loss": 3.0121331214904785, "step": 9864, "token_acc": 0.29718483860792766 }, { "epoch": 5.782761653474054, "grad_norm": 0.20342599706877254, "learning_rate": 0.000425823241532348, "loss": 3.004225730895996, "step": 9865, "token_acc": 0.2984643504501129 }, { "epoch": 5.783347991791263, "grad_norm": 0.21684627553868843, "learning_rate": 0.00042580601544397936, "loss": 3.011324405670166, "step": 9866, "token_acc": 0.2966895934104269 }, { "epoch": 5.7839343301084725, "grad_norm": 0.16770357802135302, "learning_rate": 0.0004257887877041501, "loss": 2.9937868118286133, "step": 9867, "token_acc": 0.2988383701110826 }, { "epoch": 5.784520668425682, "grad_norm": 0.20331196359580697, "learning_rate": 0.00042577155831302203, "loss": 3.02339506149292, "step": 9868, "token_acc": 0.2957570388387685 }, { "epoch": 5.785107006742891, "grad_norm": 0.23845863094953398, "learning_rate": 0.00042575432727075713, "loss": 2.983982563018799, "step": 9869, "token_acc": 0.30085166863760776 }, { "epoch": 5.7856933450601, "grad_norm": 0.17406971275578026, "learning_rate": 0.0004257370945775171, "loss": 2.949066162109375, "step": 9870, "token_acc": 0.30657564929870407 }, { "epoch": 5.786279683377309, "grad_norm": 0.16104200731844928, "learning_rate": 0.00042571986023346386, "loss": 3.007042169570923, "step": 9871, "token_acc": 0.29838674720606645 }, { "epoch": 5.786866021694518, "grad_norm": 0.15193710981436453, "learning_rate": 0.0004257026242387594, "loss": 2.980504035949707, "step": 9872, "token_acc": 0.30315834466279407 }, { "epoch": 5.787452360011727, "grad_norm": 0.1704912964935057, "learning_rate": 0.00042568538659356546, "loss": 3.005622386932373, "step": 9873, "token_acc": 0.2975203313673693 }, { "epoch": 5.788038698328936, "grad_norm": 0.18606616275557095, "learning_rate": 0.0004256681472980441, "loss": 3.0079498291015625, "step": 9874, "token_acc": 0.29871990903592094 }, { "epoch": 5.7886250366461445, "grad_norm": 0.17978125562898373, "learning_rate": 0.00042565090635235717, "loss": 3.004889726638794, "step": 9875, "token_acc": 0.29727987291632285 }, { "epoch": 5.789211374963354, "grad_norm": 0.18121020103396734, "learning_rate": 0.0004256336637566667, "loss": 3.0541625022888184, "step": 9876, "token_acc": 0.29197883959751714 }, { "epoch": 5.789797713280563, "grad_norm": 0.17341035990252748, "learning_rate": 0.0004256164195111346, "loss": 2.9958696365356445, "step": 9877, "token_acc": 0.29954415390092376 }, { "epoch": 5.790384051597772, "grad_norm": 0.17478342312582604, "learning_rate": 0.00042559917361592294, "loss": 2.9630792140960693, "step": 9878, "token_acc": 0.3039548740091649 }, { "epoch": 5.790970389914981, "grad_norm": 0.17855634615418253, "learning_rate": 0.0004255819260711936, "loss": 3.0145084857940674, "step": 9879, "token_acc": 0.29548514990263464 }, { "epoch": 5.79155672823219, "grad_norm": 0.18151171473310634, "learning_rate": 0.0004255646768771087, "loss": 3.022407293319702, "step": 9880, "token_acc": 0.29410911238985515 }, { "epoch": 5.792143066549399, "grad_norm": 0.15461045173057483, "learning_rate": 0.00042554742603383025, "loss": 3.0339534282684326, "step": 9881, "token_acc": 0.2931750695987865 }, { "epoch": 5.792729404866608, "grad_norm": 0.16895235214048931, "learning_rate": 0.0004255301735415202, "loss": 3.0054728984832764, "step": 9882, "token_acc": 0.2979912946818558 }, { "epoch": 5.793315743183817, "grad_norm": 0.16020655949294255, "learning_rate": 0.0004255129194003408, "loss": 2.9826042652130127, "step": 9883, "token_acc": 0.3008270047625177 }, { "epoch": 5.7939020815010265, "grad_norm": 0.1572664882809678, "learning_rate": 0.000425495663610454, "loss": 3.0225157737731934, "step": 9884, "token_acc": 0.2957892947000855 }, { "epoch": 5.794488419818235, "grad_norm": 0.16709593831118263, "learning_rate": 0.00042547840617202193, "loss": 3.0286941528320312, "step": 9885, "token_acc": 0.2948386281747078 }, { "epoch": 5.795074758135444, "grad_norm": 0.21423684240921123, "learning_rate": 0.00042546114708520667, "loss": 3.038414716720581, "step": 9886, "token_acc": 0.29237983464217543 }, { "epoch": 5.795661096452653, "grad_norm": 0.2594387257095789, "learning_rate": 0.0004254438863501704, "loss": 2.9988722801208496, "step": 9887, "token_acc": 0.2974965015689933 }, { "epoch": 5.796247434769862, "grad_norm": 0.27718640358647034, "learning_rate": 0.00042542662396707524, "loss": 3.008835792541504, "step": 9888, "token_acc": 0.29856044020911987 }, { "epoch": 5.796833773087071, "grad_norm": 0.1706571923942546, "learning_rate": 0.00042540935993608324, "loss": 3.0124828815460205, "step": 9889, "token_acc": 0.2974697486654576 }, { "epoch": 5.79742011140428, "grad_norm": 0.18361752711194348, "learning_rate": 0.0004253920942573568, "loss": 3.0069122314453125, "step": 9890, "token_acc": 0.29760818107276016 }, { "epoch": 5.798006449721489, "grad_norm": 0.19120750205207077, "learning_rate": 0.0004253748269310579, "loss": 2.9694414138793945, "step": 9891, "token_acc": 0.3039701041197858 }, { "epoch": 5.7985927880386985, "grad_norm": 0.17759839898621618, "learning_rate": 0.0004253575579573488, "loss": 3.014781951904297, "step": 9892, "token_acc": 0.29786470575999 }, { "epoch": 5.799179126355908, "grad_norm": 0.17854784113921143, "learning_rate": 0.0004253402873363918, "loss": 3.0146846771240234, "step": 9893, "token_acc": 0.29626095054855084 }, { "epoch": 5.799765464673117, "grad_norm": 0.16822589885417866, "learning_rate": 0.00042532301506834904, "loss": 3.0046703815460205, "step": 9894, "token_acc": 0.2981885355209971 }, { "epoch": 5.800351802990326, "grad_norm": 0.17326018957401854, "learning_rate": 0.0004253057411533828, "loss": 3.0333333015441895, "step": 9895, "token_acc": 0.29528862895434327 }, { "epoch": 5.800938141307535, "grad_norm": 0.17263074484874288, "learning_rate": 0.0004252884655916554, "loss": 3.0302329063415527, "step": 9896, "token_acc": 0.29509516206165737 }, { "epoch": 5.801524479624743, "grad_norm": 0.1776082236260518, "learning_rate": 0.0004252711883833291, "loss": 3.0505847930908203, "step": 9897, "token_acc": 0.2930789133247089 }, { "epoch": 5.802110817941952, "grad_norm": 0.1811347585820215, "learning_rate": 0.000425253909528566, "loss": 3.0150270462036133, "step": 9898, "token_acc": 0.29435353961323174 }, { "epoch": 5.802697156259161, "grad_norm": 0.16261723627282812, "learning_rate": 0.00042523662902752875, "loss": 2.995666980743408, "step": 9899, "token_acc": 0.2994882559208546 }, { "epoch": 5.8032834945763705, "grad_norm": 0.18289604008314098, "learning_rate": 0.00042521934688037946, "loss": 2.9709839820861816, "step": 9900, "token_acc": 0.30213232906273896 }, { "epoch": 5.80386983289358, "grad_norm": 0.1956725728162049, "learning_rate": 0.0004252020630872804, "loss": 3.048267364501953, "step": 9901, "token_acc": 0.2928976764605454 }, { "epoch": 5.804456171210789, "grad_norm": 0.17546838421050795, "learning_rate": 0.0004251847776483942, "loss": 2.974883794784546, "step": 9902, "token_acc": 0.30272165273701934 }, { "epoch": 5.805042509527998, "grad_norm": 0.1666667587613834, "learning_rate": 0.00042516749056388303, "loss": 3.0285680294036865, "step": 9903, "token_acc": 0.2936522053015124 }, { "epoch": 5.805628847845207, "grad_norm": 0.21010598647358766, "learning_rate": 0.0004251502018339093, "loss": 2.959278106689453, "step": 9904, "token_acc": 0.3052134344304492 }, { "epoch": 5.806215186162416, "grad_norm": 0.1918767663017535, "learning_rate": 0.0004251329114586354, "loss": 2.9866018295288086, "step": 9905, "token_acc": 0.3002852077391505 }, { "epoch": 5.806801524479624, "grad_norm": 0.19178689390597403, "learning_rate": 0.00042511561943822386, "loss": 3.036616325378418, "step": 9906, "token_acc": 0.2932062976874452 }, { "epoch": 5.807387862796833, "grad_norm": 0.20518040770597976, "learning_rate": 0.0004250983257728371, "loss": 3.040219306945801, "step": 9907, "token_acc": 0.2967445942711242 }, { "epoch": 5.8079742011140425, "grad_norm": 0.19064656090011026, "learning_rate": 0.00042508103046263746, "loss": 3.007669448852539, "step": 9908, "token_acc": 0.2970445976816316 }, { "epoch": 5.808560539431252, "grad_norm": 0.18329839468225553, "learning_rate": 0.00042506373350778743, "loss": 3.007746934890747, "step": 9909, "token_acc": 0.2966362329460028 }, { "epoch": 5.809146877748461, "grad_norm": 0.1816163410490305, "learning_rate": 0.00042504643490844953, "loss": 2.9976625442504883, "step": 9910, "token_acc": 0.2976666109651055 }, { "epoch": 5.80973321606567, "grad_norm": 0.2097161392658969, "learning_rate": 0.0004250291346647863, "loss": 3.026937246322632, "step": 9911, "token_acc": 0.29505573035689736 }, { "epoch": 5.810319554382879, "grad_norm": 0.19706998113774662, "learning_rate": 0.0004250118327769602, "loss": 2.9947919845581055, "step": 9912, "token_acc": 0.29995824164000195 }, { "epoch": 5.810905892700088, "grad_norm": 0.15714298494345658, "learning_rate": 0.0004249945292451337, "loss": 3.0226359367370605, "step": 9913, "token_acc": 0.29586274729516054 }, { "epoch": 5.811492231017297, "grad_norm": 0.1687407601029449, "learning_rate": 0.00042497722406946946, "loss": 3.010319232940674, "step": 9914, "token_acc": 0.29809959808732395 }, { "epoch": 5.812078569334506, "grad_norm": 0.17948614148038625, "learning_rate": 0.00042495991725012994, "loss": 3.0053184032440186, "step": 9915, "token_acc": 0.2994510581148214 }, { "epoch": 5.812664907651715, "grad_norm": 0.2086592467517009, "learning_rate": 0.0004249426087872778, "loss": 3.0264275074005127, "step": 9916, "token_acc": 0.295315836829382 }, { "epoch": 5.8132512459689245, "grad_norm": 0.23335396366070113, "learning_rate": 0.0004249252986810756, "loss": 3.0102686882019043, "step": 9917, "token_acc": 0.2970574787467355 }, { "epoch": 5.813837584286133, "grad_norm": 0.21996269038311886, "learning_rate": 0.00042490798693168593, "loss": 3.043732166290283, "step": 9918, "token_acc": 0.2933223435675527 }, { "epoch": 5.814423922603342, "grad_norm": 0.18530469826867668, "learning_rate": 0.0004248906735392714, "loss": 3.0129292011260986, "step": 9919, "token_acc": 0.2971605174716196 }, { "epoch": 5.815010260920551, "grad_norm": 0.2377449686025687, "learning_rate": 0.0004248733585039946, "loss": 2.9897301197052, "step": 9920, "token_acc": 0.30024687517248644 }, { "epoch": 5.81559659923776, "grad_norm": 0.27267475489364945, "learning_rate": 0.00042485604182601833, "loss": 2.972597122192383, "step": 9921, "token_acc": 0.30305584490970827 }, { "epoch": 5.816182937554969, "grad_norm": 0.1757501901623698, "learning_rate": 0.0004248387235055051, "loss": 3.0212197303771973, "step": 9922, "token_acc": 0.29612465738232174 }, { "epoch": 5.816769275872178, "grad_norm": 0.1895733874698817, "learning_rate": 0.00042482140354261776, "loss": 3.0097126960754395, "step": 9923, "token_acc": 0.29815154327853677 }, { "epoch": 5.817355614189387, "grad_norm": 0.20299232679116552, "learning_rate": 0.00042480408193751887, "loss": 3.0479159355163574, "step": 9924, "token_acc": 0.2908756202560031 }, { "epoch": 5.8179419525065965, "grad_norm": 0.17620247341020376, "learning_rate": 0.0004247867586903711, "loss": 3.021477460861206, "step": 9925, "token_acc": 0.2961721902898373 }, { "epoch": 5.818528290823806, "grad_norm": 0.2049642520910694, "learning_rate": 0.0004247694338013373, "loss": 3.039742946624756, "step": 9926, "token_acc": 0.29330553941837817 }, { "epoch": 5.819114629141015, "grad_norm": 0.19089721742243326, "learning_rate": 0.0004247521072705802, "loss": 3.023261070251465, "step": 9927, "token_acc": 0.2946545400475813 }, { "epoch": 5.819700967458223, "grad_norm": 0.1731140297627653, "learning_rate": 0.00042473477909826254, "loss": 2.9618992805480957, "step": 9928, "token_acc": 0.3030949143607262 }, { "epoch": 5.820287305775432, "grad_norm": 0.19273139677956433, "learning_rate": 0.0004247174492845471, "loss": 3.0052127838134766, "step": 9929, "token_acc": 0.2991691816613384 }, { "epoch": 5.820873644092641, "grad_norm": 0.1621394562792739, "learning_rate": 0.00042470011782959663, "loss": 3.032313346862793, "step": 9930, "token_acc": 0.29331483010714104 }, { "epoch": 5.82145998240985, "grad_norm": 0.1772828852103662, "learning_rate": 0.0004246827847335739, "loss": 3.045546531677246, "step": 9931, "token_acc": 0.2927859401389224 }, { "epoch": 5.822046320727059, "grad_norm": 0.2382340101664907, "learning_rate": 0.0004246654499966419, "loss": 2.982382297515869, "step": 9932, "token_acc": 0.3027504480922232 }, { "epoch": 5.8226326590442685, "grad_norm": 0.20607170315934709, "learning_rate": 0.0004246481136189633, "loss": 3.0268819332122803, "step": 9933, "token_acc": 0.2941412074811982 }, { "epoch": 5.823218997361478, "grad_norm": 0.16556030420120882, "learning_rate": 0.0004246307756007011, "loss": 2.988980770111084, "step": 9934, "token_acc": 0.30174394952164646 }, { "epoch": 5.823805335678687, "grad_norm": 0.17229705862178424, "learning_rate": 0.00042461343594201795, "loss": 3.0542287826538086, "step": 9935, "token_acc": 0.2906349922111995 }, { "epoch": 5.824391673995896, "grad_norm": 0.15428436541313784, "learning_rate": 0.00042459609464307697, "loss": 3.036468505859375, "step": 9936, "token_acc": 0.2939639439246826 }, { "epoch": 5.824978012313105, "grad_norm": 0.1871352827475086, "learning_rate": 0.0004245787517040409, "loss": 3.027794599533081, "step": 9937, "token_acc": 0.2960834359007579 }, { "epoch": 5.825564350630314, "grad_norm": 0.17925905677672996, "learning_rate": 0.00042456140712507275, "loss": 3.0783824920654297, "step": 9938, "token_acc": 0.2878753922008068 }, { "epoch": 5.826150688947523, "grad_norm": 0.19512006381030642, "learning_rate": 0.0004245440609063354, "loss": 2.99771785736084, "step": 9939, "token_acc": 0.2985421734271251 }, { "epoch": 5.826737027264731, "grad_norm": 0.1740642098808907, "learning_rate": 0.00042452671304799174, "loss": 3.0366663932800293, "step": 9940, "token_acc": 0.2942588325652842 }, { "epoch": 5.8273233655819405, "grad_norm": 0.1895174215358304, "learning_rate": 0.00042450936355020486, "loss": 3.0096302032470703, "step": 9941, "token_acc": 0.29793306888428006 }, { "epoch": 5.82790970389915, "grad_norm": 0.16667071548354603, "learning_rate": 0.00042449201241313763, "loss": 3.041743278503418, "step": 9942, "token_acc": 0.2920382440371146 }, { "epoch": 5.828496042216359, "grad_norm": 0.1926521572238255, "learning_rate": 0.000424474659636953, "loss": 3.0008723735809326, "step": 9943, "token_acc": 0.300528268870288 }, { "epoch": 5.829082380533568, "grad_norm": 0.27653223054714676, "learning_rate": 0.0004244573052218141, "loss": 2.988206386566162, "step": 9944, "token_acc": 0.29988626853501 }, { "epoch": 5.829668718850777, "grad_norm": 0.31921464609985134, "learning_rate": 0.00042443994916788397, "loss": 3.0417234897613525, "step": 9945, "token_acc": 0.29328729207535004 }, { "epoch": 5.830255057167986, "grad_norm": 0.18686841873703186, "learning_rate": 0.0004244225914753255, "loss": 3.0199027061462402, "step": 9946, "token_acc": 0.2958733534843021 }, { "epoch": 5.830841395485195, "grad_norm": 0.2055391020820378, "learning_rate": 0.00042440523214430187, "loss": 3.0208704471588135, "step": 9947, "token_acc": 0.2954877548365143 }, { "epoch": 5.831427733802404, "grad_norm": 0.20446241064715343, "learning_rate": 0.0004243878711749761, "loss": 2.98987078666687, "step": 9948, "token_acc": 0.30055845005062437 }, { "epoch": 5.8320140721196125, "grad_norm": 0.16865969175337753, "learning_rate": 0.00042437050856751127, "loss": 3.0320911407470703, "step": 9949, "token_acc": 0.2944001907818166 }, { "epoch": 5.832600410436822, "grad_norm": 0.20136093713224978, "learning_rate": 0.0004243531443220704, "loss": 2.9527812004089355, "step": 9950, "token_acc": 0.3060652945535865 }, { "epoch": 5.833186748754031, "grad_norm": 0.17683303919843515, "learning_rate": 0.0004243357784388168, "loss": 3.008302688598633, "step": 9951, "token_acc": 0.29778024242296813 }, { "epoch": 5.83377308707124, "grad_norm": 0.16422772292648222, "learning_rate": 0.0004243184109179134, "loss": 2.995554208755493, "step": 9952, "token_acc": 0.30057305201843354 }, { "epoch": 5.834359425388449, "grad_norm": 0.17638838422798378, "learning_rate": 0.00042430104175952344, "loss": 2.981992244720459, "step": 9953, "token_acc": 0.3014567261853168 }, { "epoch": 5.834945763705658, "grad_norm": 0.16829007990277933, "learning_rate": 0.00042428367096381003, "loss": 3.0044472217559814, "step": 9954, "token_acc": 0.2994327312684568 }, { "epoch": 5.835532102022867, "grad_norm": 0.16626726644792333, "learning_rate": 0.0004242662985309365, "loss": 2.9926421642303467, "step": 9955, "token_acc": 0.29958532796667803 }, { "epoch": 5.836118440340076, "grad_norm": 0.15885383165849284, "learning_rate": 0.0004242489244610658, "loss": 2.9899938106536865, "step": 9956, "token_acc": 0.29993209574499957 }, { "epoch": 5.836704778657285, "grad_norm": 0.15598748013134373, "learning_rate": 0.0004242315487543613, "loss": 2.981785774230957, "step": 9957, "token_acc": 0.3004575840709582 }, { "epoch": 5.8372911169744945, "grad_norm": 0.17810021420414673, "learning_rate": 0.0004242141714109862, "loss": 3.001772880554199, "step": 9958, "token_acc": 0.2987717218593155 }, { "epoch": 5.837877455291704, "grad_norm": 0.15093821765840398, "learning_rate": 0.00042419679243110376, "loss": 2.9479687213897705, "step": 9959, "token_acc": 0.3046957073465015 }, { "epoch": 5.838463793608913, "grad_norm": 0.17913905170088082, "learning_rate": 0.00042417941181487707, "loss": 2.9735822677612305, "step": 9960, "token_acc": 0.30178581266675586 }, { "epoch": 5.839050131926121, "grad_norm": 0.16544547317507688, "learning_rate": 0.0004241620295624696, "loss": 3.018216848373413, "step": 9961, "token_acc": 0.2960263276763245 }, { "epoch": 5.83963647024333, "grad_norm": 0.15202959355547385, "learning_rate": 0.00042414464567404455, "loss": 3.01279878616333, "step": 9962, "token_acc": 0.2957606236657356 }, { "epoch": 5.840222808560539, "grad_norm": 0.17288497200996936, "learning_rate": 0.00042412726014976523, "loss": 2.9706430435180664, "step": 9963, "token_acc": 0.3040054166721984 }, { "epoch": 5.840809146877748, "grad_norm": 0.1573875942851172, "learning_rate": 0.00042410987298979486, "loss": 3.0085582733154297, "step": 9964, "token_acc": 0.29829367461588674 }, { "epoch": 5.841395485194957, "grad_norm": 0.17070485609223432, "learning_rate": 0.0004240924841942969, "loss": 3.004000186920166, "step": 9965, "token_acc": 0.2989459227812 }, { "epoch": 5.8419818235121665, "grad_norm": 0.1565068737039995, "learning_rate": 0.0004240750937634347, "loss": 2.9882073402404785, "step": 9966, "token_acc": 0.3003839753297722 }, { "epoch": 5.842568161829376, "grad_norm": 0.185847502278089, "learning_rate": 0.00042405770169737147, "loss": 3.0152997970581055, "step": 9967, "token_acc": 0.29861415397156976 }, { "epoch": 5.843154500146585, "grad_norm": 0.23822248929003415, "learning_rate": 0.00042404030799627067, "loss": 3.0349934101104736, "step": 9968, "token_acc": 0.2941174888157771 }, { "epoch": 5.843740838463794, "grad_norm": 0.288853400246377, "learning_rate": 0.00042402291266029567, "loss": 3.035841464996338, "step": 9969, "token_acc": 0.29273378560606456 }, { "epoch": 5.844327176781003, "grad_norm": 0.19553709852321036, "learning_rate": 0.00042400551568960997, "loss": 3.033818244934082, "step": 9970, "token_acc": 0.2934002830245379 }, { "epoch": 5.844913515098211, "grad_norm": 0.18562729822762683, "learning_rate": 0.000423988117084377, "loss": 2.963871955871582, "step": 9971, "token_acc": 0.3052341055231444 }, { "epoch": 5.84549985341542, "grad_norm": 0.22645461006625403, "learning_rate": 0.00042397071684476006, "loss": 2.992323398590088, "step": 9972, "token_acc": 0.29948532694310886 }, { "epoch": 5.8460861917326294, "grad_norm": 0.2034308076779647, "learning_rate": 0.00042395331497092263, "loss": 2.9774386882781982, "step": 9973, "token_acc": 0.3013217704707845 }, { "epoch": 5.846672530049839, "grad_norm": 0.17630827789734335, "learning_rate": 0.0004239359114630282, "loss": 3.010213851928711, "step": 9974, "token_acc": 0.29740926004025175 }, { "epoch": 5.847258868367048, "grad_norm": 0.15202877286755886, "learning_rate": 0.00042391850632124027, "loss": 3.0469069480895996, "step": 9975, "token_acc": 0.2928263250253988 }, { "epoch": 5.847845206684257, "grad_norm": 0.16524180912842562, "learning_rate": 0.00042390109954572243, "loss": 2.9999923706054688, "step": 9976, "token_acc": 0.2977966614772531 }, { "epoch": 5.848431545001466, "grad_norm": 0.19842541139418873, "learning_rate": 0.00042388369113663805, "loss": 3.0255346298217773, "step": 9977, "token_acc": 0.29530012078258605 }, { "epoch": 5.849017883318675, "grad_norm": 0.23061938638648846, "learning_rate": 0.0004238662810941506, "loss": 2.981649875640869, "step": 9978, "token_acc": 0.3009559139756576 }, { "epoch": 5.849604221635884, "grad_norm": 0.17567699642177417, "learning_rate": 0.0004238488694184238, "loss": 2.9582037925720215, "step": 9979, "token_acc": 0.30470684935966125 }, { "epoch": 5.850190559953093, "grad_norm": 0.1849544207014276, "learning_rate": 0.00042383145610962116, "loss": 3.008603096008301, "step": 9980, "token_acc": 0.29719393427313556 }, { "epoch": 5.850776898270302, "grad_norm": 0.1571779266102812, "learning_rate": 0.0004238140411679062, "loss": 3.0345168113708496, "step": 9981, "token_acc": 0.29378139240026224 }, { "epoch": 5.8513632365875115, "grad_norm": 0.16655801763866693, "learning_rate": 0.0004237966245934426, "loss": 2.976304531097412, "step": 9982, "token_acc": 0.30085239420607507 }, { "epoch": 5.85194957490472, "grad_norm": 0.16608412857119426, "learning_rate": 0.0004237792063863938, "loss": 3.003843307495117, "step": 9983, "token_acc": 0.2980717791268346 }, { "epoch": 5.852535913221929, "grad_norm": 0.17708581616201324, "learning_rate": 0.0004237617865469236, "loss": 3.0005133152008057, "step": 9984, "token_acc": 0.2995318916745642 }, { "epoch": 5.853122251539138, "grad_norm": 0.1778489834230273, "learning_rate": 0.00042374436507519554, "loss": 3.0067050457000732, "step": 9985, "token_acc": 0.2987595986593696 }, { "epoch": 5.853708589856347, "grad_norm": 0.21598815941824, "learning_rate": 0.0004237269419713733, "loss": 3.0277063846588135, "step": 9986, "token_acc": 0.29448556050098335 }, { "epoch": 5.854294928173556, "grad_norm": 0.25153577806676974, "learning_rate": 0.0004237095172356206, "loss": 3.039735794067383, "step": 9987, "token_acc": 0.2948286796419214 }, { "epoch": 5.854881266490765, "grad_norm": 0.22169680231067176, "learning_rate": 0.0004236920908681009, "loss": 2.97993803024292, "step": 9988, "token_acc": 0.30163205244833635 }, { "epoch": 5.855467604807974, "grad_norm": 0.16037444612800486, "learning_rate": 0.00042367466286897816, "loss": 3.0701088905334473, "step": 9989, "token_acc": 0.28724421301004843 }, { "epoch": 5.8560539431251835, "grad_norm": 0.28998339505038306, "learning_rate": 0.000423657233238416, "loss": 3.080392360687256, "step": 9990, "token_acc": 0.28919700320487685 }, { "epoch": 5.856640281442393, "grad_norm": 0.31270408535737, "learning_rate": 0.00042363980197657815, "loss": 3.026762008666992, "step": 9991, "token_acc": 0.29613566786615825 }, { "epoch": 5.857226619759601, "grad_norm": 0.15025587048822409, "learning_rate": 0.00042362236908362837, "loss": 3.029123306274414, "step": 9992, "token_acc": 0.2941401161099376 }, { "epoch": 5.85781295807681, "grad_norm": 0.23840128305711616, "learning_rate": 0.00042360493455973034, "loss": 2.972878932952881, "step": 9993, "token_acc": 0.30066985595598467 }, { "epoch": 5.858399296394019, "grad_norm": 0.14763499637312424, "learning_rate": 0.0004235874984050478, "loss": 2.985220193862915, "step": 9994, "token_acc": 0.3005376444303605 }, { "epoch": 5.858985634711228, "grad_norm": 0.2336876774261463, "learning_rate": 0.0004235700606197448, "loss": 3.011638879776001, "step": 9995, "token_acc": 0.29578717383051534 }, { "epoch": 5.859571973028437, "grad_norm": 0.1623394203731384, "learning_rate": 0.00042355262120398484, "loss": 3.051365375518799, "step": 9996, "token_acc": 0.290441474598927 }, { "epoch": 5.860158311345646, "grad_norm": 0.2210879482021252, "learning_rate": 0.0004235351801579319, "loss": 3.061344623565674, "step": 9997, "token_acc": 0.289923532149744 }, { "epoch": 5.8607446496628555, "grad_norm": 0.16200233129833125, "learning_rate": 0.0004235177374817498, "loss": 3.0351545810699463, "step": 9998, "token_acc": 0.292527715173705 }, { "epoch": 5.861330987980065, "grad_norm": 0.22726253865968601, "learning_rate": 0.0004235002931756024, "loss": 3.027992010116577, "step": 9999, "token_acc": 0.29617437698990595 }, { "epoch": 5.861917326297274, "grad_norm": 0.1593521513185362, "learning_rate": 0.0004234828472396535, "loss": 2.9697537422180176, "step": 10000, "token_acc": 0.30195530430548057 }, { "epoch": 5.862503664614483, "grad_norm": 0.17330643505791402, "learning_rate": 0.00042346539967406705, "loss": 3.0172080993652344, "step": 10001, "token_acc": 0.2964358741112228 }, { "epoch": 5.863090002931692, "grad_norm": 0.15705309844577311, "learning_rate": 0.00042344795047900694, "loss": 2.997549295425415, "step": 10002, "token_acc": 0.2988051706313638 }, { "epoch": 5.863676341248901, "grad_norm": 0.18221318803017367, "learning_rate": 0.000423430499654637, "loss": 3.0182929039001465, "step": 10003, "token_acc": 0.29566994933890267 }, { "epoch": 5.86426267956611, "grad_norm": 0.15657318098032233, "learning_rate": 0.0004234130472011212, "loss": 3.0519533157348633, "step": 10004, "token_acc": 0.2934261741389128 }, { "epoch": 5.864849017883318, "grad_norm": 0.18449563535452232, "learning_rate": 0.00042339559311862357, "loss": 3.04988694190979, "step": 10005, "token_acc": 0.2916774146950483 }, { "epoch": 5.8654353562005275, "grad_norm": 0.15247236271147513, "learning_rate": 0.00042337813740730794, "loss": 2.9692835807800293, "step": 10006, "token_acc": 0.30247149945732116 }, { "epoch": 5.866021694517737, "grad_norm": 0.17167170801019946, "learning_rate": 0.0004233606800673384, "loss": 3.03755521774292, "step": 10007, "token_acc": 0.2942136385086061 }, { "epoch": 5.866608032834946, "grad_norm": 0.16569413257247906, "learning_rate": 0.0004233432210988788, "loss": 3.003567934036255, "step": 10008, "token_acc": 0.29582393977050736 }, { "epoch": 5.867194371152155, "grad_norm": 0.16403796620204583, "learning_rate": 0.00042332576050209327, "loss": 2.9730613231658936, "step": 10009, "token_acc": 0.3018683416721425 }, { "epoch": 5.867780709469364, "grad_norm": 0.15527960156900567, "learning_rate": 0.0004233082982771458, "loss": 3.0426597595214844, "step": 10010, "token_acc": 0.29251465208669314 }, { "epoch": 5.868367047786573, "grad_norm": 0.17287883552585795, "learning_rate": 0.00042329083442420036, "loss": 2.973691463470459, "step": 10011, "token_acc": 0.30258306370291305 }, { "epoch": 5.868953386103782, "grad_norm": 0.15627604500139708, "learning_rate": 0.00042327336894342106, "loss": 3.0062315464019775, "step": 10012, "token_acc": 0.29665731638138754 }, { "epoch": 5.869539724420991, "grad_norm": 0.16479267427658223, "learning_rate": 0.00042325590183497187, "loss": 3.0075550079345703, "step": 10013, "token_acc": 0.298632909246471 }, { "epoch": 5.8701260627381995, "grad_norm": 0.16848292514437746, "learning_rate": 0.00042323843309901703, "loss": 3.049063205718994, "step": 10014, "token_acc": 0.2927030130466635 }, { "epoch": 5.870712401055409, "grad_norm": 0.16466197200629742, "learning_rate": 0.0004232209627357206, "loss": 3.0021820068359375, "step": 10015, "token_acc": 0.2998244813166587 }, { "epoch": 5.871298739372618, "grad_norm": 0.250844522405624, "learning_rate": 0.00042320349074524656, "loss": 2.9807190895080566, "step": 10016, "token_acc": 0.2988374791103417 }, { "epoch": 5.871885077689827, "grad_norm": 0.30475767325698316, "learning_rate": 0.00042318601712775916, "loss": 3.0048766136169434, "step": 10017, "token_acc": 0.3003643767219022 }, { "epoch": 5.872471416007036, "grad_norm": 0.26139063510196614, "learning_rate": 0.0004231685418834225, "loss": 3.018401861190796, "step": 10018, "token_acc": 0.2953928612756288 }, { "epoch": 5.873057754324245, "grad_norm": 0.1577548289212022, "learning_rate": 0.00042315106501240066, "loss": 3.000736951828003, "step": 10019, "token_acc": 0.299025417793533 }, { "epoch": 5.873644092641454, "grad_norm": 0.22560844017948412, "learning_rate": 0.000423133586514858, "loss": 3.0134692192077637, "step": 10020, "token_acc": 0.2980477246219962 }, { "epoch": 5.874230430958663, "grad_norm": 0.16825441308293498, "learning_rate": 0.00042311610639095856, "loss": 3.022550582885742, "step": 10021, "token_acc": 0.29709109379400006 }, { "epoch": 5.874816769275872, "grad_norm": 0.2319930068238738, "learning_rate": 0.0004230986246408665, "loss": 3.000293731689453, "step": 10022, "token_acc": 0.29891590445482663 }, { "epoch": 5.8754031075930815, "grad_norm": 0.18342840000589894, "learning_rate": 0.00042308114126474617, "loss": 3.0302324295043945, "step": 10023, "token_acc": 0.2940948278062201 }, { "epoch": 5.875989445910291, "grad_norm": 0.18755149717289787, "learning_rate": 0.0004230636562627618, "loss": 3.000781536102295, "step": 10024, "token_acc": 0.3002076162789238 }, { "epoch": 5.8765757842275, "grad_norm": 0.19518953629190688, "learning_rate": 0.0004230461696350775, "loss": 3.042587995529175, "step": 10025, "token_acc": 0.292569172367593 }, { "epoch": 5.877162122544708, "grad_norm": 0.16479337513603584, "learning_rate": 0.00042302868138185766, "loss": 2.980506420135498, "step": 10026, "token_acc": 0.3002999565544279 }, { "epoch": 5.877748460861917, "grad_norm": 0.19573420565503852, "learning_rate": 0.0004230111915032665, "loss": 3.01444673538208, "step": 10027, "token_acc": 0.2961651856955868 }, { "epoch": 5.878334799179126, "grad_norm": 0.17286499789409712, "learning_rate": 0.00042299369999946836, "loss": 3.0103507041931152, "step": 10028, "token_acc": 0.29722196024186026 }, { "epoch": 5.878921137496335, "grad_norm": 0.22643062319185542, "learning_rate": 0.00042297620687062755, "loss": 3.048881769180298, "step": 10029, "token_acc": 0.29269626094369006 }, { "epoch": 5.879507475813544, "grad_norm": 0.1652456923383218, "learning_rate": 0.00042295871211690827, "loss": 3.025496006011963, "step": 10030, "token_acc": 0.29473120742412584 }, { "epoch": 5.8800938141307535, "grad_norm": 0.30059949164679156, "learning_rate": 0.0004229412157384751, "loss": 3.0132880210876465, "step": 10031, "token_acc": 0.2980761840651113 }, { "epoch": 5.880680152447963, "grad_norm": 0.15363081105578819, "learning_rate": 0.0004229237177354921, "loss": 2.975437879562378, "step": 10032, "token_acc": 0.30338283245866576 }, { "epoch": 5.881266490765172, "grad_norm": 0.31549443305675395, "learning_rate": 0.0004229062181081238, "loss": 3.012751817703247, "step": 10033, "token_acc": 0.2965051487843297 }, { "epoch": 5.881852829082381, "grad_norm": 0.1620672092408641, "learning_rate": 0.00042288871685653464, "loss": 2.9819223880767822, "step": 10034, "token_acc": 0.3025931312813659 }, { "epoch": 5.88243916739959, "grad_norm": 0.26012782705430415, "learning_rate": 0.00042287121398088895, "loss": 3.044726610183716, "step": 10035, "token_acc": 0.29393716122915853 }, { "epoch": 5.883025505716798, "grad_norm": 0.14391103455766485, "learning_rate": 0.00042285370948135116, "loss": 2.966080904006958, "step": 10036, "token_acc": 0.3045379974527441 }, { "epoch": 5.883611844034007, "grad_norm": 0.23902293422252838, "learning_rate": 0.00042283620335808564, "loss": 2.988171100616455, "step": 10037, "token_acc": 0.30130849935164444 }, { "epoch": 5.884198182351216, "grad_norm": 0.1585695493076413, "learning_rate": 0.0004228186956112569, "loss": 3.031554937362671, "step": 10038, "token_acc": 0.29391588904785293 }, { "epoch": 5.8847845206684255, "grad_norm": 0.2225791666270798, "learning_rate": 0.00042280118624102943, "loss": 3.0251078605651855, "step": 10039, "token_acc": 0.2941930616479032 }, { "epoch": 5.885370858985635, "grad_norm": 0.1670203481737096, "learning_rate": 0.0004227836752475677, "loss": 3.010450839996338, "step": 10040, "token_acc": 0.2960606648184929 }, { "epoch": 5.885957197302844, "grad_norm": 0.17366190492008451, "learning_rate": 0.00042276616263103606, "loss": 3.0037269592285156, "step": 10041, "token_acc": 0.29986869692284884 }, { "epoch": 5.886543535620053, "grad_norm": 0.2175738807681619, "learning_rate": 0.0004227486483915992, "loss": 2.995307445526123, "step": 10042, "token_acc": 0.30061494130803623 }, { "epoch": 5.887129873937262, "grad_norm": 0.16041855497488258, "learning_rate": 0.00042273113252942155, "loss": 3.0140137672424316, "step": 10043, "token_acc": 0.29779458955370497 }, { "epoch": 5.887716212254471, "grad_norm": 0.22573114688853205, "learning_rate": 0.00042271361504466766, "loss": 3.006498098373413, "step": 10044, "token_acc": 0.29725368885417497 }, { "epoch": 5.88830255057168, "grad_norm": 0.16405933175243043, "learning_rate": 0.00042269609593750216, "loss": 2.991373062133789, "step": 10045, "token_acc": 0.3015227771325332 }, { "epoch": 5.888888888888889, "grad_norm": 0.22142054993720608, "learning_rate": 0.0004226785752080895, "loss": 3.01389741897583, "step": 10046, "token_acc": 0.296225701900419 }, { "epoch": 5.889475227206098, "grad_norm": 0.17432813970734776, "learning_rate": 0.0004226610528565943, "loss": 2.981689929962158, "step": 10047, "token_acc": 0.3014212662961011 }, { "epoch": 5.890061565523307, "grad_norm": 0.20480444213756044, "learning_rate": 0.0004226435288831811, "loss": 3.0245089530944824, "step": 10048, "token_acc": 0.29495018753230007 }, { "epoch": 5.890647903840516, "grad_norm": 0.20257929713420675, "learning_rate": 0.0004226260032880147, "loss": 2.9766602516174316, "step": 10049, "token_acc": 0.3019105475175835 }, { "epoch": 5.891234242157725, "grad_norm": 0.15983918650224122, "learning_rate": 0.0004226084760712596, "loss": 2.996727466583252, "step": 10050, "token_acc": 0.30193033319832013 }, { "epoch": 5.891820580474934, "grad_norm": 0.19353037821900418, "learning_rate": 0.00042259094723308047, "loss": 3.0193777084350586, "step": 10051, "token_acc": 0.2955161937828894 }, { "epoch": 5.892406918792143, "grad_norm": 0.1612425404916145, "learning_rate": 0.0004225734167736419, "loss": 3.0128660202026367, "step": 10052, "token_acc": 0.2970894412577013 }, { "epoch": 5.892993257109352, "grad_norm": 0.21342947143734461, "learning_rate": 0.0004225558846931086, "loss": 3.012537717819214, "step": 10053, "token_acc": 0.29938055982488004 }, { "epoch": 5.893579595426561, "grad_norm": 0.1490338844860703, "learning_rate": 0.0004225383509916454, "loss": 2.994819402694702, "step": 10054, "token_acc": 0.29935380322468974 }, { "epoch": 5.89416593374377, "grad_norm": 0.1837830558151019, "learning_rate": 0.0004225208156694168, "loss": 2.9809417724609375, "step": 10055, "token_acc": 0.3014735496759157 }, { "epoch": 5.8947522720609795, "grad_norm": 0.14608987894109293, "learning_rate": 0.00042250327872658767, "loss": 3.0008561611175537, "step": 10056, "token_acc": 0.2980989520495876 }, { "epoch": 5.895338610378188, "grad_norm": 0.17734471261151086, "learning_rate": 0.0004224857401633226, "loss": 3.030193328857422, "step": 10057, "token_acc": 0.29490195069226416 }, { "epoch": 5.895924948695397, "grad_norm": 0.18912485448980312, "learning_rate": 0.00042246819997978654, "loss": 3.008021354675293, "step": 10058, "token_acc": 0.29973014447970603 }, { "epoch": 5.896511287012606, "grad_norm": 0.144317069993294, "learning_rate": 0.0004224506581761441, "loss": 2.986654281616211, "step": 10059, "token_acc": 0.3009143450943002 }, { "epoch": 5.897097625329815, "grad_norm": 0.166776700108989, "learning_rate": 0.0004224331147525601, "loss": 3.02883243560791, "step": 10060, "token_acc": 0.29305233086749016 }, { "epoch": 5.897683963647024, "grad_norm": 0.15621421055863774, "learning_rate": 0.00042241556970919934, "loss": 2.978274345397949, "step": 10061, "token_acc": 0.3023130335684859 }, { "epoch": 5.898270301964233, "grad_norm": 0.17391352353632042, "learning_rate": 0.00042239802304622665, "loss": 3.0054380893707275, "step": 10062, "token_acc": 0.29787828838926617 }, { "epoch": 5.898856640281442, "grad_norm": 0.16676243594658033, "learning_rate": 0.0004223804747638068, "loss": 3.0046634674072266, "step": 10063, "token_acc": 0.29894731329213364 }, { "epoch": 5.8994429785986515, "grad_norm": 0.21260519971642114, "learning_rate": 0.0004223629248621048, "loss": 3.021902561187744, "step": 10064, "token_acc": 0.29586290435295765 }, { "epoch": 5.900029316915861, "grad_norm": 0.18957657084814813, "learning_rate": 0.00042234537334128526, "loss": 2.99540638923645, "step": 10065, "token_acc": 0.3002756596879295 }, { "epoch": 5.90061565523307, "grad_norm": 0.17104956859457543, "learning_rate": 0.00042232782020151316, "loss": 3.010921001434326, "step": 10066, "token_acc": 0.29734299516908214 }, { "epoch": 5.901201993550279, "grad_norm": 0.18837925304786562, "learning_rate": 0.0004223102654429535, "loss": 2.971719264984131, "step": 10067, "token_acc": 0.30193314300838475 }, { "epoch": 5.901788331867488, "grad_norm": 0.18217170296890248, "learning_rate": 0.000422292709065771, "loss": 3.038811206817627, "step": 10068, "token_acc": 0.29390959617552176 }, { "epoch": 5.902374670184696, "grad_norm": 0.15696668377741466, "learning_rate": 0.0004222751510701307, "loss": 3.0397796630859375, "step": 10069, "token_acc": 0.29142475743640833 }, { "epoch": 5.902961008501905, "grad_norm": 0.20711992650186103, "learning_rate": 0.00042225759145619754, "loss": 3.0648930072784424, "step": 10070, "token_acc": 0.28867804472541797 }, { "epoch": 5.903547346819114, "grad_norm": 0.1896654278087999, "learning_rate": 0.0004222400302241364, "loss": 3.005308151245117, "step": 10071, "token_acc": 0.29679277235681417 }, { "epoch": 5.9041336851363235, "grad_norm": 0.16042788725347884, "learning_rate": 0.00042222246737411216, "loss": 2.9597105979919434, "step": 10072, "token_acc": 0.30532019781323244 }, { "epoch": 5.904720023453533, "grad_norm": 0.15987663656730164, "learning_rate": 0.0004222049029062901, "loss": 3.018336296081543, "step": 10073, "token_acc": 0.2959441717614564 }, { "epoch": 5.905306361770742, "grad_norm": 0.15060876743971716, "learning_rate": 0.0004221873368208349, "loss": 3.0405611991882324, "step": 10074, "token_acc": 0.2934697735585419 }, { "epoch": 5.905892700087951, "grad_norm": 0.1567229842625141, "learning_rate": 0.0004221697691179118, "loss": 3.0078377723693848, "step": 10075, "token_acc": 0.29613654224218394 }, { "epoch": 5.90647903840516, "grad_norm": 0.1796836867231402, "learning_rate": 0.0004221521997976856, "loss": 3.005740165710449, "step": 10076, "token_acc": 0.29786077197436983 }, { "epoch": 5.907065376722369, "grad_norm": 0.19579248019580392, "learning_rate": 0.0004221346288603215, "loss": 2.9656715393066406, "step": 10077, "token_acc": 0.3035575736431283 }, { "epoch": 5.907651715039578, "grad_norm": 0.17121102330562168, "learning_rate": 0.00042211705630598457, "loss": 3.025899648666382, "step": 10078, "token_acc": 0.29430141006463123 }, { "epoch": 5.908238053356786, "grad_norm": 0.15890179811204816, "learning_rate": 0.00042209948213483986, "loss": 2.960113525390625, "step": 10079, "token_acc": 0.30467723001520125 }, { "epoch": 5.9088243916739955, "grad_norm": 0.17909616112135726, "learning_rate": 0.00042208190634705235, "loss": 2.9747910499572754, "step": 10080, "token_acc": 0.30368928228380204 }, { "epoch": 5.909410729991205, "grad_norm": 0.1555424350502692, "learning_rate": 0.0004220643289427871, "loss": 2.9822983741760254, "step": 10081, "token_acc": 0.30000025906534405 }, { "epoch": 5.909997068308414, "grad_norm": 0.16369627275082183, "learning_rate": 0.0004220467499222095, "loss": 3.0656371116638184, "step": 10082, "token_acc": 0.2908480436155713 }, { "epoch": 5.910583406625623, "grad_norm": 0.2349843770150894, "learning_rate": 0.00042202916928548454, "loss": 3.0482265949249268, "step": 10083, "token_acc": 0.2924696736831585 }, { "epoch": 5.911169744942832, "grad_norm": 0.21026545184142623, "learning_rate": 0.00042201158703277723, "loss": 3.0417137145996094, "step": 10084, "token_acc": 0.29291762272565686 }, { "epoch": 5.911756083260041, "grad_norm": 0.1606142225559163, "learning_rate": 0.00042199400316425296, "loss": 3.072801351547241, "step": 10085, "token_acc": 0.28848822806659735 }, { "epoch": 5.91234242157725, "grad_norm": 0.19337590010188072, "learning_rate": 0.0004219764176800767, "loss": 3.010916233062744, "step": 10086, "token_acc": 0.297163927495916 }, { "epoch": 5.912928759894459, "grad_norm": 0.1988391131517098, "learning_rate": 0.00042195883058041375, "loss": 3.0219666957855225, "step": 10087, "token_acc": 0.29657923308105044 }, { "epoch": 5.913515098211668, "grad_norm": 0.16708615931787546, "learning_rate": 0.0004219412418654294, "loss": 2.984231472015381, "step": 10088, "token_acc": 0.30143759327057634 }, { "epoch": 5.9141014365288775, "grad_norm": 0.18235467398098493, "learning_rate": 0.00042192365153528866, "loss": 3.020191192626953, "step": 10089, "token_acc": 0.2960278027379883 }, { "epoch": 5.914687774846087, "grad_norm": 0.1638703920961271, "learning_rate": 0.0004219060595901569, "loss": 3.034750461578369, "step": 10090, "token_acc": 0.2942687184792448 }, { "epoch": 5.915274113163295, "grad_norm": 0.16346942959519864, "learning_rate": 0.0004218884660301994, "loss": 3.013485908508301, "step": 10091, "token_acc": 0.29581557843474576 }, { "epoch": 5.915860451480504, "grad_norm": 0.17220907253259696, "learning_rate": 0.0004218708708555814, "loss": 2.9683408737182617, "step": 10092, "token_acc": 0.30378970891766205 }, { "epoch": 5.916446789797713, "grad_norm": 0.2572340807126346, "learning_rate": 0.0004218532740664681, "loss": 3.024446487426758, "step": 10093, "token_acc": 0.29551667059489656 }, { "epoch": 5.917033128114922, "grad_norm": 0.3472846970067426, "learning_rate": 0.00042183567566302495, "loss": 3.021104097366333, "step": 10094, "token_acc": 0.29650833693036577 }, { "epoch": 5.917619466432131, "grad_norm": 0.14610202725191773, "learning_rate": 0.0004218180756454171, "loss": 2.9883360862731934, "step": 10095, "token_acc": 0.2999964331937183 }, { "epoch": 5.91820580474934, "grad_norm": 0.30741693626857425, "learning_rate": 0.0004218004740138099, "loss": 2.999293327331543, "step": 10096, "token_acc": 0.29815387438143187 }, { "epoch": 5.9187921430665495, "grad_norm": 0.18159162380598898, "learning_rate": 0.0004217828707683689, "loss": 3.000601291656494, "step": 10097, "token_acc": 0.2980950021514349 }, { "epoch": 5.919378481383759, "grad_norm": 0.24605915103767623, "learning_rate": 0.00042176526590925924, "loss": 3.0329198837280273, "step": 10098, "token_acc": 0.2942846134086822 }, { "epoch": 5.919964819700968, "grad_norm": 0.14876797430958535, "learning_rate": 0.0004217476594366464, "loss": 2.993206739425659, "step": 10099, "token_acc": 0.3004081568864428 }, { "epoch": 5.920551158018176, "grad_norm": 0.232993958363065, "learning_rate": 0.0004217300513506957, "loss": 3.0578975677490234, "step": 10100, "token_acc": 0.2902110806314792 }, { "epoch": 5.921137496335385, "grad_norm": 0.14631291090468782, "learning_rate": 0.00042171244165157255, "loss": 3.0388808250427246, "step": 10101, "token_acc": 0.29397384056775044 }, { "epoch": 5.921723834652594, "grad_norm": 0.20128754777790575, "learning_rate": 0.00042169483033944246, "loss": 2.991434097290039, "step": 10102, "token_acc": 0.2989116559411478 }, { "epoch": 5.922310172969803, "grad_norm": 0.17051339520599743, "learning_rate": 0.0004216772174144707, "loss": 3.0356264114379883, "step": 10103, "token_acc": 0.2945274381922584 }, { "epoch": 5.9228965112870124, "grad_norm": 0.21949858594867216, "learning_rate": 0.0004216596028768229, "loss": 3.0281591415405273, "step": 10104, "token_acc": 0.2961920224145608 }, { "epoch": 5.923482849604222, "grad_norm": 0.15615513171713064, "learning_rate": 0.00042164198672666446, "loss": 3.053191661834717, "step": 10105, "token_acc": 0.2919570777356787 }, { "epoch": 5.924069187921431, "grad_norm": 0.17428113614385846, "learning_rate": 0.0004216243689641608, "loss": 3.0184764862060547, "step": 10106, "token_acc": 0.296030211867119 }, { "epoch": 5.92465552623864, "grad_norm": 0.15108839836139726, "learning_rate": 0.00042160674958947755, "loss": 2.9939656257629395, "step": 10107, "token_acc": 0.2990989709674276 }, { "epoch": 5.925241864555849, "grad_norm": 0.17859686426085583, "learning_rate": 0.00042158912860278, "loss": 3.020618438720703, "step": 10108, "token_acc": 0.2948943824248984 }, { "epoch": 5.925828202873058, "grad_norm": 0.16219310528037026, "learning_rate": 0.0004215715060042339, "loss": 3.0204007625579834, "step": 10109, "token_acc": 0.29779476544816275 }, { "epoch": 5.926414541190267, "grad_norm": 0.17026218455976855, "learning_rate": 0.00042155388179400464, "loss": 3.0147199630737305, "step": 10110, "token_acc": 0.2972232189682299 }, { "epoch": 5.927000879507476, "grad_norm": 0.1664879749656846, "learning_rate": 0.00042153625597225785, "loss": 2.9834794998168945, "step": 10111, "token_acc": 0.3025141344260688 }, { "epoch": 5.927587217824685, "grad_norm": 0.1610867208063306, "learning_rate": 0.0004215186285391591, "loss": 3.011470317840576, "step": 10112, "token_acc": 0.29861466189441394 }, { "epoch": 5.928173556141894, "grad_norm": 0.16211481305449554, "learning_rate": 0.00042150099949487396, "loss": 2.9802756309509277, "step": 10113, "token_acc": 0.30140612635875225 }, { "epoch": 5.928759894459103, "grad_norm": 0.18800682230113608, "learning_rate": 0.00042148336883956805, "loss": 3.047673225402832, "step": 10114, "token_acc": 0.29372559107598045 }, { "epoch": 5.929346232776312, "grad_norm": 0.21650815408753366, "learning_rate": 0.0004214657365734069, "loss": 3.0441136360168457, "step": 10115, "token_acc": 0.29390858341860193 }, { "epoch": 5.929932571093521, "grad_norm": 0.16771390255794055, "learning_rate": 0.0004214481026965562, "loss": 2.9964442253112793, "step": 10116, "token_acc": 0.2987840838577004 }, { "epoch": 5.93051890941073, "grad_norm": 0.24865785494638254, "learning_rate": 0.00042143046720918164, "loss": 3.0219011306762695, "step": 10117, "token_acc": 0.2962811218858909 }, { "epoch": 5.931105247727939, "grad_norm": 0.3250852109243497, "learning_rate": 0.00042141283011144895, "loss": 3.0204553604125977, "step": 10118, "token_acc": 0.29671923240849507 }, { "epoch": 5.931691586045148, "grad_norm": 0.21793301030392387, "learning_rate": 0.00042139519140352354, "loss": 3.014747381210327, "step": 10119, "token_acc": 0.2974399838101743 }, { "epoch": 5.932277924362357, "grad_norm": 0.23285777578008057, "learning_rate": 0.0004213775510855713, "loss": 3.0208816528320312, "step": 10120, "token_acc": 0.29494999845832864 }, { "epoch": 5.9328642626795665, "grad_norm": 0.23156039871883682, "learning_rate": 0.000421359909157758, "loss": 3.0300447940826416, "step": 10121, "token_acc": 0.29296029166161774 }, { "epoch": 5.933450600996775, "grad_norm": 0.16926181555819733, "learning_rate": 0.0004213422656202491, "loss": 2.975583553314209, "step": 10122, "token_acc": 0.3022562912133464 }, { "epoch": 5.934036939313984, "grad_norm": 0.21167627465243666, "learning_rate": 0.0004213246204732106, "loss": 2.9940714836120605, "step": 10123, "token_acc": 0.2998531495555842 }, { "epoch": 5.934623277631193, "grad_norm": 0.19142962243362605, "learning_rate": 0.00042130697371680823, "loss": 3.028719425201416, "step": 10124, "token_acc": 0.2949912870685683 }, { "epoch": 5.935209615948402, "grad_norm": 0.19928050904484318, "learning_rate": 0.00042128932535120755, "loss": 2.958505153656006, "step": 10125, "token_acc": 0.30484854250606164 }, { "epoch": 5.935795954265611, "grad_norm": 0.17769264041902896, "learning_rate": 0.0004212716753765745, "loss": 2.967970848083496, "step": 10126, "token_acc": 0.30144919274807674 }, { "epoch": 5.93638229258282, "grad_norm": 0.22636247680469265, "learning_rate": 0.00042125402379307485, "loss": 2.9788336753845215, "step": 10127, "token_acc": 0.3019274073508645 }, { "epoch": 5.936968630900029, "grad_norm": 0.15558066191604647, "learning_rate": 0.0004212363706008745, "loss": 3.0154614448547363, "step": 10128, "token_acc": 0.29628383528805663 }, { "epoch": 5.9375549692172385, "grad_norm": 0.21101759767864942, "learning_rate": 0.00042121871580013916, "loss": 3.0050694942474365, "step": 10129, "token_acc": 0.2971880539292045 }, { "epoch": 5.938141307534448, "grad_norm": 0.15956575178319748, "learning_rate": 0.00042120105939103463, "loss": 2.990670680999756, "step": 10130, "token_acc": 0.29903663866083974 }, { "epoch": 5.938727645851657, "grad_norm": 0.18672151806483672, "learning_rate": 0.0004211834013737269, "loss": 3.01796293258667, "step": 10131, "token_acc": 0.2968043773372932 }, { "epoch": 5.939313984168866, "grad_norm": 0.18281724163599583, "learning_rate": 0.0004211657417483817, "loss": 3.025787115097046, "step": 10132, "token_acc": 0.29653590344437436 }, { "epoch": 5.939900322486075, "grad_norm": 0.17068006411827066, "learning_rate": 0.00042114808051516516, "loss": 3.026735782623291, "step": 10133, "token_acc": 0.2971053215301164 }, { "epoch": 5.940486660803283, "grad_norm": 0.16124717346598438, "learning_rate": 0.000421130417674243, "loss": 2.952767848968506, "step": 10134, "token_acc": 0.30526819825264 }, { "epoch": 5.941072999120492, "grad_norm": 0.18565891505849802, "learning_rate": 0.0004211127532257811, "loss": 3.005120277404785, "step": 10135, "token_acc": 0.2994960478991464 }, { "epoch": 5.941659337437701, "grad_norm": 0.18690578146756098, "learning_rate": 0.00042109508716994544, "loss": 2.98258113861084, "step": 10136, "token_acc": 0.3015057843208205 }, { "epoch": 5.9422456757549105, "grad_norm": 0.18339414119882347, "learning_rate": 0.00042107741950690204, "loss": 3.0450644493103027, "step": 10137, "token_acc": 0.291850049189665 }, { "epoch": 5.94283201407212, "grad_norm": 0.17149843543263027, "learning_rate": 0.0004210597502368168, "loss": 3.0039820671081543, "step": 10138, "token_acc": 0.2983694493687451 }, { "epoch": 5.943418352389329, "grad_norm": 0.16643873121497746, "learning_rate": 0.0004210420793598557, "loss": 2.9848217964172363, "step": 10139, "token_acc": 0.3014339947680725 }, { "epoch": 5.944004690706538, "grad_norm": 0.1838409624411319, "learning_rate": 0.00042102440687618475, "loss": 2.9607200622558594, "step": 10140, "token_acc": 0.30480847595762023 }, { "epoch": 5.944591029023747, "grad_norm": 0.16136157026772766, "learning_rate": 0.00042100673278596995, "loss": 3.0131280422210693, "step": 10141, "token_acc": 0.29757970524573785 }, { "epoch": 5.945177367340956, "grad_norm": 0.17821963070933206, "learning_rate": 0.0004209890570893774, "loss": 3.0361881256103516, "step": 10142, "token_acc": 0.2955748820048396 }, { "epoch": 5.945763705658165, "grad_norm": 0.1790502665619974, "learning_rate": 0.000420971379786573, "loss": 2.9963536262512207, "step": 10143, "token_acc": 0.2983562900969223 }, { "epoch": 5.946350043975373, "grad_norm": 0.17485340806531377, "learning_rate": 0.0004209537008777229, "loss": 2.980264663696289, "step": 10144, "token_acc": 0.30024540960924473 }, { "epoch": 5.9469363822925825, "grad_norm": 0.19346446317447036, "learning_rate": 0.0004209360203629931, "loss": 2.991318702697754, "step": 10145, "token_acc": 0.2998691888687903 }, { "epoch": 5.947522720609792, "grad_norm": 0.19431975203143642, "learning_rate": 0.00042091833824254974, "loss": 3.015496253967285, "step": 10146, "token_acc": 0.29626794456355043 }, { "epoch": 5.948109058927001, "grad_norm": 0.1780084962740806, "learning_rate": 0.00042090065451655894, "loss": 3.0227599143981934, "step": 10147, "token_acc": 0.29663602921800725 }, { "epoch": 5.94869539724421, "grad_norm": 0.1606896798211275, "learning_rate": 0.0004208829691851868, "loss": 3.0569238662719727, "step": 10148, "token_acc": 0.29111936419127854 }, { "epoch": 5.949281735561419, "grad_norm": 0.16162398875864414, "learning_rate": 0.0004208652822485994, "loss": 3.025092601776123, "step": 10149, "token_acc": 0.29523710779040685 }, { "epoch": 5.949868073878628, "grad_norm": 0.14706922334470088, "learning_rate": 0.0004208475937069629, "loss": 2.971788167953491, "step": 10150, "token_acc": 0.3023514430169077 }, { "epoch": 5.950454412195837, "grad_norm": 0.16040543950593195, "learning_rate": 0.0004208299035604435, "loss": 2.999964714050293, "step": 10151, "token_acc": 0.299005364907074 }, { "epoch": 5.951040750513046, "grad_norm": 0.19364090382557161, "learning_rate": 0.0004208122118092074, "loss": 3.0230162143707275, "step": 10152, "token_acc": 0.29543316356832383 }, { "epoch": 5.951627088830255, "grad_norm": 0.19574007473981236, "learning_rate": 0.00042079451845342065, "loss": 3.0151703357696533, "step": 10153, "token_acc": 0.298155662504063 }, { "epoch": 5.9522134271474645, "grad_norm": 0.17282084313920243, "learning_rate": 0.0004207768234932496, "loss": 3.0512194633483887, "step": 10154, "token_acc": 0.29242574409171257 }, { "epoch": 5.952799765464674, "grad_norm": 0.1728576488477399, "learning_rate": 0.00042075912692886037, "loss": 3.0385985374450684, "step": 10155, "token_acc": 0.2927045497805811 }, { "epoch": 5.953386103781882, "grad_norm": 0.20075972573551354, "learning_rate": 0.0004207414287604193, "loss": 2.997769832611084, "step": 10156, "token_acc": 0.2983158675742021 }, { "epoch": 5.953972442099091, "grad_norm": 0.19920310925867202, "learning_rate": 0.0004207237289880925, "loss": 3.0041701793670654, "step": 10157, "token_acc": 0.29838202745746417 }, { "epoch": 5.9545587804163, "grad_norm": 0.1703959870445464, "learning_rate": 0.00042070602761204646, "loss": 3.024268627166748, "step": 10158, "token_acc": 0.29591052898824327 }, { "epoch": 5.955145118733509, "grad_norm": 0.1689078528622826, "learning_rate": 0.00042068832463244723, "loss": 3.0222389698028564, "step": 10159, "token_acc": 0.29559484162802036 }, { "epoch": 5.955731457050718, "grad_norm": 0.28107386772482545, "learning_rate": 0.0004206706200494612, "loss": 3.0285372734069824, "step": 10160, "token_acc": 0.2936767472145854 }, { "epoch": 5.956317795367927, "grad_norm": 0.32293390818038675, "learning_rate": 0.00042065291386325464, "loss": 3.0013487339019775, "step": 10161, "token_acc": 0.29854484795392194 }, { "epoch": 5.9569041336851365, "grad_norm": 0.17980249612167645, "learning_rate": 0.00042063520607399396, "loss": 3.004537343978882, "step": 10162, "token_acc": 0.29593235528852146 }, { "epoch": 5.957490472002346, "grad_norm": 0.2835992235177475, "learning_rate": 0.0004206174966818455, "loss": 2.979393720626831, "step": 10163, "token_acc": 0.3017877687146938 }, { "epoch": 5.958076810319555, "grad_norm": 0.31382247076774583, "learning_rate": 0.00042059978568697546, "loss": 3.059453010559082, "step": 10164, "token_acc": 0.290568353575525 }, { "epoch": 5.958663148636763, "grad_norm": 0.18438570128047704, "learning_rate": 0.00042058207308955044, "loss": 3.0478997230529785, "step": 10165, "token_acc": 0.2905588197996365 }, { "epoch": 5.959249486953972, "grad_norm": 0.21976389456502318, "learning_rate": 0.0004205643588897366, "loss": 3.006145477294922, "step": 10166, "token_acc": 0.29787883912952384 }, { "epoch": 5.959835825271181, "grad_norm": 0.18165579240596308, "learning_rate": 0.00042054664308770055, "loss": 3.003462314605713, "step": 10167, "token_acc": 0.29640935719124856 }, { "epoch": 5.96042216358839, "grad_norm": 0.17607163701784415, "learning_rate": 0.0004205289256836085, "loss": 2.9869227409362793, "step": 10168, "token_acc": 0.29992468753198154 }, { "epoch": 5.961008501905599, "grad_norm": 0.16877812078672985, "learning_rate": 0.000420511206677627, "loss": 2.9978489875793457, "step": 10169, "token_acc": 0.29977983518794493 }, { "epoch": 5.9615948402228085, "grad_norm": 0.1640217566138485, "learning_rate": 0.00042049348606992257, "loss": 3.0411765575408936, "step": 10170, "token_acc": 0.29139203900812144 }, { "epoch": 5.962181178540018, "grad_norm": 0.16581981663744633, "learning_rate": 0.0004204757638606614, "loss": 3.007805824279785, "step": 10171, "token_acc": 0.29770460603447824 }, { "epoch": 5.962767516857227, "grad_norm": 0.15590179006372804, "learning_rate": 0.0004204580400500103, "loss": 3.007822036743164, "step": 10172, "token_acc": 0.29901279519987695 }, { "epoch": 5.963353855174436, "grad_norm": 0.15782226567640084, "learning_rate": 0.0004204403146381356, "loss": 3.0108377933502197, "step": 10173, "token_acc": 0.2988230263453107 }, { "epoch": 5.963940193491645, "grad_norm": 0.15488764902337943, "learning_rate": 0.0004204225876252038, "loss": 3.0299644470214844, "step": 10174, "token_acc": 0.2947123917035231 }, { "epoch": 5.964526531808854, "grad_norm": 0.16047125951682964, "learning_rate": 0.0004204048590113814, "loss": 2.986888885498047, "step": 10175, "token_acc": 0.29897792778625026 }, { "epoch": 5.965112870126063, "grad_norm": 0.148887769167955, "learning_rate": 0.000420387128796835, "loss": 3.0119035243988037, "step": 10176, "token_acc": 0.29797941348650275 }, { "epoch": 5.965699208443271, "grad_norm": 0.17127556745838854, "learning_rate": 0.00042036939698173115, "loss": 3.0405898094177246, "step": 10177, "token_acc": 0.2927879059776772 }, { "epoch": 5.9662855467604805, "grad_norm": 0.15769680984664589, "learning_rate": 0.0004203516635662363, "loss": 3.0122013092041016, "step": 10178, "token_acc": 0.2955767260590826 }, { "epoch": 5.96687188507769, "grad_norm": 0.14976668413094577, "learning_rate": 0.00042033392855051724, "loss": 2.9921820163726807, "step": 10179, "token_acc": 0.30129953683241495 }, { "epoch": 5.967458223394899, "grad_norm": 0.16479827443943812, "learning_rate": 0.00042031619193474035, "loss": 2.9297056198120117, "step": 10180, "token_acc": 0.3078925066437883 }, { "epoch": 5.968044561712108, "grad_norm": 0.16215011606989627, "learning_rate": 0.0004202984537190724, "loss": 2.997084140777588, "step": 10181, "token_acc": 0.2976976694716117 }, { "epoch": 5.968630900029317, "grad_norm": 0.16104988354849414, "learning_rate": 0.00042028071390367997, "loss": 3.022481918334961, "step": 10182, "token_acc": 0.29570990134221997 }, { "epoch": 5.969217238346526, "grad_norm": 0.16811743541530252, "learning_rate": 0.0004202629724887297, "loss": 3.023376703262329, "step": 10183, "token_acc": 0.29402472582359473 }, { "epoch": 5.969803576663735, "grad_norm": 0.16818534641799762, "learning_rate": 0.00042024522947438814, "loss": 2.993215560913086, "step": 10184, "token_acc": 0.3017783467722414 }, { "epoch": 5.970389914980944, "grad_norm": 0.1618383286085388, "learning_rate": 0.0004202274848608221, "loss": 3.0567498207092285, "step": 10185, "token_acc": 0.2902952910878935 }, { "epoch": 5.970976253298153, "grad_norm": 0.1693569419786793, "learning_rate": 0.0004202097386481982, "loss": 3.079111099243164, "step": 10186, "token_acc": 0.28904556467037873 }, { "epoch": 5.971562591615362, "grad_norm": 0.20173180485046935, "learning_rate": 0.00042019199083668325, "loss": 3.0212135314941406, "step": 10187, "token_acc": 0.29545552346405907 }, { "epoch": 5.972148929932571, "grad_norm": 0.19181097673967357, "learning_rate": 0.0004201742414264439, "loss": 2.9974780082702637, "step": 10188, "token_acc": 0.29880486415585367 }, { "epoch": 5.97273526824978, "grad_norm": 0.15255212681466096, "learning_rate": 0.00042015649041764674, "loss": 2.981656312942505, "step": 10189, "token_acc": 0.30045094467678723 }, { "epoch": 5.973321606566989, "grad_norm": 0.16750701396309994, "learning_rate": 0.0004201387378104587, "loss": 2.9705629348754883, "step": 10190, "token_acc": 0.3016007472713081 }, { "epoch": 5.973907944884198, "grad_norm": 0.1988683883181944, "learning_rate": 0.0004201209836050465, "loss": 3.028747320175171, "step": 10191, "token_acc": 0.2945310645033637 }, { "epoch": 5.974494283201407, "grad_norm": 0.23337242869419822, "learning_rate": 0.0004201032278015769, "loss": 3.009829044342041, "step": 10192, "token_acc": 0.2967162890159148 }, { "epoch": 5.975080621518616, "grad_norm": 0.19976368089196495, "learning_rate": 0.00042008547040021666, "loss": 3.0155391693115234, "step": 10193, "token_acc": 0.2975462600661399 }, { "epoch": 5.975666959835825, "grad_norm": 0.17116752011754288, "learning_rate": 0.00042006771140113265, "loss": 3.0229172706604004, "step": 10194, "token_acc": 0.29530639677794174 }, { "epoch": 5.9762532981530345, "grad_norm": 0.15143046214575706, "learning_rate": 0.0004200499508044916, "loss": 3.013610363006592, "step": 10195, "token_acc": 0.29653473215704973 }, { "epoch": 5.976839636470244, "grad_norm": 0.17577597328590638, "learning_rate": 0.00042003218861046045, "loss": 3.0347273349761963, "step": 10196, "token_acc": 0.2939757967069533 }, { "epoch": 5.977425974787453, "grad_norm": 0.18523430910513905, "learning_rate": 0.00042001442481920604, "loss": 3.039219856262207, "step": 10197, "token_acc": 0.294815289470921 }, { "epoch": 5.978012313104662, "grad_norm": 0.20451882411876415, "learning_rate": 0.0004199966594308952, "loss": 3.0615601539611816, "step": 10198, "token_acc": 0.29018676938236215 }, { "epoch": 5.97859865142187, "grad_norm": 0.15991013191907558, "learning_rate": 0.00041997889244569476, "loss": 3.0142476558685303, "step": 10199, "token_acc": 0.2987742080361775 }, { "epoch": 5.979184989739079, "grad_norm": 0.21747726175218823, "learning_rate": 0.0004199611238637717, "loss": 3.0526537895202637, "step": 10200, "token_acc": 0.29054927429949196 }, { "epoch": 5.979771328056288, "grad_norm": 0.27418772009522324, "learning_rate": 0.00041994335368529295, "loss": 2.94643235206604, "step": 10201, "token_acc": 0.3055137355364387 }, { "epoch": 5.980357666373497, "grad_norm": 0.1833981945104493, "learning_rate": 0.0004199255819104254, "loss": 3.0539803504943848, "step": 10202, "token_acc": 0.2897983020547229 }, { "epoch": 5.9809440046907065, "grad_norm": 0.22275993132558394, "learning_rate": 0.00041990780853933587, "loss": 3.0202012062072754, "step": 10203, "token_acc": 0.29544636685367226 }, { "epoch": 5.981530343007916, "grad_norm": 0.2870112044490932, "learning_rate": 0.0004198900335721916, "loss": 3.0273287296295166, "step": 10204, "token_acc": 0.2956271121187492 }, { "epoch": 5.982116681325125, "grad_norm": 0.17647883945140513, "learning_rate": 0.00041987225700915924, "loss": 2.98618745803833, "step": 10205, "token_acc": 0.301177848636689 }, { "epoch": 5.982703019642334, "grad_norm": 0.2654677882195503, "learning_rate": 0.000419854478850406, "loss": 3.0113375186920166, "step": 10206, "token_acc": 0.2971034440570135 }, { "epoch": 5.983289357959543, "grad_norm": 0.17716159808297402, "learning_rate": 0.00041983669909609886, "loss": 3.0044643878936768, "step": 10207, "token_acc": 0.298307702375499 }, { "epoch": 5.983875696276751, "grad_norm": 0.2056691154845506, "learning_rate": 0.00041981891774640467, "loss": 3.036376953125, "step": 10208, "token_acc": 0.29454017585161674 }, { "epoch": 5.98446203459396, "grad_norm": 0.16510792824010562, "learning_rate": 0.0004198011348014907, "loss": 3.0273358821868896, "step": 10209, "token_acc": 0.29516528067347964 }, { "epoch": 5.985048372911169, "grad_norm": 0.21722174258949137, "learning_rate": 0.0004197833502615238, "loss": 3.0159785747528076, "step": 10210, "token_acc": 0.2960712583193087 }, { "epoch": 5.9856347112283785, "grad_norm": 0.19078106039372017, "learning_rate": 0.00041976556412667116, "loss": 3.020984411239624, "step": 10211, "token_acc": 0.29661512656216193 }, { "epoch": 5.986221049545588, "grad_norm": 0.2225403146966675, "learning_rate": 0.0004197477763970998, "loss": 3.043126106262207, "step": 10212, "token_acc": 0.2935525175575876 }, { "epoch": 5.986807387862797, "grad_norm": 0.19659209912739298, "learning_rate": 0.0004197299870729768, "loss": 2.9881772994995117, "step": 10213, "token_acc": 0.3005287119611131 }, { "epoch": 5.987393726180006, "grad_norm": 0.17777550847436593, "learning_rate": 0.0004197121961544693, "loss": 3.027704954147339, "step": 10214, "token_acc": 0.29740364995270124 }, { "epoch": 5.987980064497215, "grad_norm": 0.1668465078725287, "learning_rate": 0.0004196944036417444, "loss": 3.013617753982544, "step": 10215, "token_acc": 0.2955202540029648 }, { "epoch": 5.988566402814424, "grad_norm": 0.19592716842153865, "learning_rate": 0.0004196766095349692, "loss": 3.01521372795105, "step": 10216, "token_acc": 0.29671640869898397 }, { "epoch": 5.989152741131633, "grad_norm": 0.1666962793766802, "learning_rate": 0.000419658813834311, "loss": 3.039407730102539, "step": 10217, "token_acc": 0.2947220447953425 }, { "epoch": 5.989739079448842, "grad_norm": 0.19626625633076203, "learning_rate": 0.0004196410165399367, "loss": 2.996209144592285, "step": 10218, "token_acc": 0.2984555349208961 }, { "epoch": 5.990325417766051, "grad_norm": 0.16703075487887545, "learning_rate": 0.00041962321765201375, "loss": 3.010937213897705, "step": 10219, "token_acc": 0.2978081515384177 }, { "epoch": 5.99091175608326, "grad_norm": 0.16751913684880004, "learning_rate": 0.00041960541717070925, "loss": 2.983689069747925, "step": 10220, "token_acc": 0.301797365184583 }, { "epoch": 5.991498094400469, "grad_norm": 0.18181719918290598, "learning_rate": 0.00041958761509619036, "loss": 2.9922404289245605, "step": 10221, "token_acc": 0.29966833616167454 }, { "epoch": 5.992084432717678, "grad_norm": 0.16532789154897104, "learning_rate": 0.00041956981142862444, "loss": 3.0589213371276855, "step": 10222, "token_acc": 0.29111120215920555 }, { "epoch": 5.992670771034887, "grad_norm": 0.18308493192581085, "learning_rate": 0.00041955200616817855, "loss": 2.977748394012451, "step": 10223, "token_acc": 0.30077948412418437 }, { "epoch": 5.993257109352096, "grad_norm": 0.18598531445637626, "learning_rate": 0.00041953419931502005, "loss": 2.9840216636657715, "step": 10224, "token_acc": 0.3001503278928953 }, { "epoch": 5.993843447669305, "grad_norm": 0.17486518973025356, "learning_rate": 0.00041951639086931623, "loss": 2.999925374984741, "step": 10225, "token_acc": 0.29937232384885126 }, { "epoch": 5.994429785986514, "grad_norm": 0.18042436118828006, "learning_rate": 0.0004194985808312343, "loss": 3.027662515640259, "step": 10226, "token_acc": 0.2963155760754411 }, { "epoch": 5.995016124303723, "grad_norm": 0.1998489285459203, "learning_rate": 0.00041948076920094167, "loss": 3.0158166885375977, "step": 10227, "token_acc": 0.29682251475261007 }, { "epoch": 5.9956024626209325, "grad_norm": 0.15512991124024128, "learning_rate": 0.0004194629559786055, "loss": 2.97990083694458, "step": 10228, "token_acc": 0.3021337693097738 }, { "epoch": 5.996188800938142, "grad_norm": 0.21406460170670397, "learning_rate": 0.0004194451411643933, "loss": 3.0143604278564453, "step": 10229, "token_acc": 0.29750987980842875 }, { "epoch": 5.99677513925535, "grad_norm": 0.18499744631889353, "learning_rate": 0.0004194273247584722, "loss": 3.0046916007995605, "step": 10230, "token_acc": 0.29774000594061434 }, { "epoch": 5.997361477572559, "grad_norm": 0.15606292161293817, "learning_rate": 0.0004194095067610099, "loss": 3.0212793350219727, "step": 10231, "token_acc": 0.2963970001104033 }, { "epoch": 5.997947815889768, "grad_norm": 0.16688105034440717, "learning_rate": 0.0004193916871721734, "loss": 3.0289816856384277, "step": 10232, "token_acc": 0.2940353026094184 }, { "epoch": 5.998534154206977, "grad_norm": 0.18476601808057938, "learning_rate": 0.0004193738659921303, "loss": 2.9925098419189453, "step": 10233, "token_acc": 0.3007105849472005 }, { "epoch": 5.999120492524186, "grad_norm": 0.17962035332060558, "learning_rate": 0.0004193560432210479, "loss": 3.032536745071411, "step": 10234, "token_acc": 0.29506226324381724 }, { "epoch": 5.999706830841395, "grad_norm": 0.16718451680398844, "learning_rate": 0.00041933821885909383, "loss": 3.0617456436157227, "step": 10235, "token_acc": 0.29062680935172797 }, { "epoch": 6.0, "grad_norm": 0.21063145652853174, "learning_rate": 0.0004193203929064353, "loss": 3.056640625, "step": 10236, "token_acc": 0.2909936897151622 }, { "epoch": 6.0, "eval_loss": 3.0649304389953613, "eval_runtime": 6.4806, "eval_samples_per_second": 39.503, "eval_steps_per_second": 4.938, "eval_token_acc": 0.29070042328995666, "step": 10236 }, { "epoch": 6.000586338317209, "grad_norm": 0.1732140922757181, "learning_rate": 0.00041930256536323987, "loss": 2.9602222442626953, "step": 10237, "token_acc": 0.3034973715878143 }, { "epoch": 6.001172676634418, "grad_norm": 0.23073474418582104, "learning_rate": 0.0004192847362296749, "loss": 2.8903746604919434, "step": 10238, "token_acc": 0.3131769465357366 }, { "epoch": 6.001759014951627, "grad_norm": 0.3609871729948302, "learning_rate": 0.00041926690550590795, "loss": 2.9143409729003906, "step": 10239, "token_acc": 0.3083252396049039 }, { "epoch": 6.0023453532688364, "grad_norm": 0.3052491497679607, "learning_rate": 0.0004192490731921066, "loss": 2.9281511306762695, "step": 10240, "token_acc": 0.30790565018847943 }, { "epoch": 6.002931691586046, "grad_norm": 0.1671651248234393, "learning_rate": 0.0004192312392884382, "loss": 2.920990228652954, "step": 10241, "token_acc": 0.30904523285274377 }, { "epoch": 6.003518029903254, "grad_norm": 0.25736012696735594, "learning_rate": 0.00041921340379507045, "loss": 2.9117870330810547, "step": 10242, "token_acc": 0.3114368728339256 }, { "epoch": 6.004104368220463, "grad_norm": 0.20279930532446444, "learning_rate": 0.0004191955667121707, "loss": 2.939326047897339, "step": 10243, "token_acc": 0.3078686079619528 }, { "epoch": 6.004690706537672, "grad_norm": 0.22137151923084947, "learning_rate": 0.0004191777280399066, "loss": 2.911097526550293, "step": 10244, "token_acc": 0.3088605357291407 }, { "epoch": 6.005277044854881, "grad_norm": 0.1829015509180036, "learning_rate": 0.0004191598877784457, "loss": 2.9374403953552246, "step": 10245, "token_acc": 0.30779441422095577 }, { "epoch": 6.00586338317209, "grad_norm": 0.20160660293676944, "learning_rate": 0.00041914204592795567, "loss": 2.9247970581054688, "step": 10246, "token_acc": 0.3088559747792386 }, { "epoch": 6.006449721489299, "grad_norm": 0.20762467217449807, "learning_rate": 0.00041912420248860395, "loss": 2.9592928886413574, "step": 10247, "token_acc": 0.30244030319147597 }, { "epoch": 6.0070360598065085, "grad_norm": 0.19888383328183237, "learning_rate": 0.00041910635746055837, "loss": 2.8911755084991455, "step": 10248, "token_acc": 0.31474624841752874 }, { "epoch": 6.007622398123718, "grad_norm": 0.18415535820961962, "learning_rate": 0.00041908851084398633, "loss": 2.9320225715637207, "step": 10249, "token_acc": 0.3070092871485944 }, { "epoch": 6.008208736440927, "grad_norm": 0.20120319718821617, "learning_rate": 0.00041907066263905556, "loss": 2.929243564605713, "step": 10250, "token_acc": 0.30765369736030607 }, { "epoch": 6.008795074758136, "grad_norm": 0.1818432437470063, "learning_rate": 0.0004190528128459339, "loss": 2.9245409965515137, "step": 10251, "token_acc": 0.3074367499610369 }, { "epoch": 6.009381413075345, "grad_norm": 0.20991402729349298, "learning_rate": 0.00041903496146478863, "loss": 2.9130775928497314, "step": 10252, "token_acc": 0.30931196994038246 }, { "epoch": 6.009967751392553, "grad_norm": 0.18934900162091142, "learning_rate": 0.0004190171084957878, "loss": 2.9686741828918457, "step": 10253, "token_acc": 0.30122390583338754 }, { "epoch": 6.010554089709762, "grad_norm": 0.20253345313546836, "learning_rate": 0.00041899925393909906, "loss": 2.9493823051452637, "step": 10254, "token_acc": 0.3041237941691618 }, { "epoch": 6.011140428026971, "grad_norm": 0.1700873310626552, "learning_rate": 0.00041898139779489, "loss": 2.949160575866699, "step": 10255, "token_acc": 0.3051789116863703 }, { "epoch": 6.0117267663441805, "grad_norm": 0.18633794896729666, "learning_rate": 0.0004189635400633284, "loss": 2.900892496109009, "step": 10256, "token_acc": 0.31239673819057373 }, { "epoch": 6.01231310466139, "grad_norm": 0.18970582546479434, "learning_rate": 0.00041894568074458196, "loss": 2.8754091262817383, "step": 10257, "token_acc": 0.3153083741832259 }, { "epoch": 6.012899442978599, "grad_norm": 0.18164607035165228, "learning_rate": 0.00041892781983881856, "loss": 2.9724740982055664, "step": 10258, "token_acc": 0.300861901301041 }, { "epoch": 6.013485781295808, "grad_norm": 0.1899247818119006, "learning_rate": 0.00041890995734620597, "loss": 2.908720016479492, "step": 10259, "token_acc": 0.31182330349692394 }, { "epoch": 6.014072119613017, "grad_norm": 0.16423428634503887, "learning_rate": 0.0004188920932669119, "loss": 2.943769931793213, "step": 10260, "token_acc": 0.30470131059553685 }, { "epoch": 6.014658457930226, "grad_norm": 0.15987445710260936, "learning_rate": 0.0004188742276011042, "loss": 2.970900774002075, "step": 10261, "token_acc": 0.3021199469882295 }, { "epoch": 6.015244796247435, "grad_norm": 0.180316375322308, "learning_rate": 0.0004188563603489507, "loss": 2.9571022987365723, "step": 10262, "token_acc": 0.30539840561949644 }, { "epoch": 6.015831134564644, "grad_norm": 0.18304974203301286, "learning_rate": 0.00041883849151061925, "loss": 2.9161009788513184, "step": 10263, "token_acc": 0.31026708882378445 }, { "epoch": 6.0164174728818525, "grad_norm": 0.16850110187330536, "learning_rate": 0.0004188206210862776, "loss": 2.971259117126465, "step": 10264, "token_acc": 0.3013107725368899 }, { "epoch": 6.017003811199062, "grad_norm": 0.20797054182622418, "learning_rate": 0.0004188027490760938, "loss": 2.919766902923584, "step": 10265, "token_acc": 0.30772082018927444 }, { "epoch": 6.017590149516271, "grad_norm": 0.17961505033452962, "learning_rate": 0.00041878487548023557, "loss": 2.9463272094726562, "step": 10266, "token_acc": 0.30418695861796236 }, { "epoch": 6.01817648783348, "grad_norm": 0.18104506752635316, "learning_rate": 0.0004187670002988709, "loss": 2.926412582397461, "step": 10267, "token_acc": 0.30650189840204967 }, { "epoch": 6.018762826150689, "grad_norm": 0.1771857256890836, "learning_rate": 0.0004187491235321678, "loss": 2.954050302505493, "step": 10268, "token_acc": 0.30280952002644734 }, { "epoch": 6.019349164467898, "grad_norm": 0.16411790394919737, "learning_rate": 0.0004187312451802939, "loss": 2.9356212615966797, "step": 10269, "token_acc": 0.30725775640603387 }, { "epoch": 6.019935502785107, "grad_norm": 0.15502051172818598, "learning_rate": 0.0004187133652434174, "loss": 2.9147231578826904, "step": 10270, "token_acc": 0.30916826583830875 }, { "epoch": 6.020521841102316, "grad_norm": 0.16839230176558526, "learning_rate": 0.0004186954837217062, "loss": 2.9511795043945312, "step": 10271, "token_acc": 0.30318921901351265 }, { "epoch": 6.021108179419525, "grad_norm": 0.17475458469242688, "learning_rate": 0.0004186776006153281, "loss": 2.899600028991699, "step": 10272, "token_acc": 0.3110405490186036 }, { "epoch": 6.0216945177367345, "grad_norm": 0.22619432671172965, "learning_rate": 0.0004186597159244514, "loss": 2.8895201683044434, "step": 10273, "token_acc": 0.3129109439648905 }, { "epoch": 6.022280856053943, "grad_norm": 0.19186210027787526, "learning_rate": 0.0004186418296492439, "loss": 2.899771213531494, "step": 10274, "token_acc": 0.31199427490776815 }, { "epoch": 6.022867194371152, "grad_norm": 0.16170970079912325, "learning_rate": 0.0004186239417898736, "loss": 2.9532153606414795, "step": 10275, "token_acc": 0.30277884424439 }, { "epoch": 6.023453532688361, "grad_norm": 0.16659249526852862, "learning_rate": 0.0004186060523465087, "loss": 2.9661145210266113, "step": 10276, "token_acc": 0.30138699195281704 }, { "epoch": 6.02403987100557, "grad_norm": 0.1550768826486709, "learning_rate": 0.00041858816131931697, "loss": 2.931694507598877, "step": 10277, "token_acc": 0.30713176000591846 }, { "epoch": 6.024626209322779, "grad_norm": 0.2023897108379906, "learning_rate": 0.0004185702687084668, "loss": 2.952353000640869, "step": 10278, "token_acc": 0.3042075776010866 }, { "epoch": 6.025212547639988, "grad_norm": 0.2737210806263657, "learning_rate": 0.0004185523745141261, "loss": 2.9173951148986816, "step": 10279, "token_acc": 0.3083536401011936 }, { "epoch": 6.025798885957197, "grad_norm": 0.24692656329680757, "learning_rate": 0.0004185344787364629, "loss": 2.926464080810547, "step": 10280, "token_acc": 0.30737464055580044 }, { "epoch": 6.0263852242744065, "grad_norm": 0.1766469610697834, "learning_rate": 0.0004185165813756454, "loss": 2.9276132583618164, "step": 10281, "token_acc": 0.30764688815478036 }, { "epoch": 6.026971562591616, "grad_norm": 0.20095621938661906, "learning_rate": 0.0004184986824318416, "loss": 2.943746328353882, "step": 10282, "token_acc": 0.30490956730965296 }, { "epoch": 6.027557900908825, "grad_norm": 0.20193416637082162, "learning_rate": 0.00041848078190521987, "loss": 2.946881055831909, "step": 10283, "token_acc": 0.30474138720848964 }, { "epoch": 6.028144239226034, "grad_norm": 0.17148757224654243, "learning_rate": 0.0004184628797959482, "loss": 2.9447832107543945, "step": 10284, "token_acc": 0.3054198110436394 }, { "epoch": 6.028730577543242, "grad_norm": 0.22323579976070798, "learning_rate": 0.0004184449761041947, "loss": 2.942300796508789, "step": 10285, "token_acc": 0.3051546712345452 }, { "epoch": 6.029316915860451, "grad_norm": 0.15810423850894004, "learning_rate": 0.00041842707083012776, "loss": 2.872720718383789, "step": 10286, "token_acc": 0.3152277343563658 }, { "epoch": 6.02990325417766, "grad_norm": 0.25053218335349375, "learning_rate": 0.00041840916397391535, "loss": 2.9371237754821777, "step": 10287, "token_acc": 0.30627491885652564 }, { "epoch": 6.030489592494869, "grad_norm": 0.2464954453440709, "learning_rate": 0.0004183912555357259, "loss": 2.939788818359375, "step": 10288, "token_acc": 0.30507402905417785 }, { "epoch": 6.0310759308120785, "grad_norm": 0.14457853428184936, "learning_rate": 0.00041837334551572734, "loss": 2.9254703521728516, "step": 10289, "token_acc": 0.3081608453305416 }, { "epoch": 6.031662269129288, "grad_norm": 0.1888141336440917, "learning_rate": 0.0004183554339140882, "loss": 2.918151378631592, "step": 10290, "token_acc": 0.3088473701709576 }, { "epoch": 6.032248607446497, "grad_norm": 0.15168523100534667, "learning_rate": 0.0004183375207309766, "loss": 2.971987724304199, "step": 10291, "token_acc": 0.30259228974709307 }, { "epoch": 6.032834945763706, "grad_norm": 0.1994691072720343, "learning_rate": 0.00041831960596656084, "loss": 2.924440860748291, "step": 10292, "token_acc": 0.30785049546177956 }, { "epoch": 6.033421284080915, "grad_norm": 0.16615673005090645, "learning_rate": 0.0004183016896210091, "loss": 2.9166624546051025, "step": 10293, "token_acc": 0.3078486012521779 }, { "epoch": 6.034007622398124, "grad_norm": 0.157124867727336, "learning_rate": 0.0004182837716944899, "loss": 2.941511631011963, "step": 10294, "token_acc": 0.3049785646619318 }, { "epoch": 6.034593960715333, "grad_norm": 0.1539725454567096, "learning_rate": 0.0004182658521871714, "loss": 2.9362523555755615, "step": 10295, "token_acc": 0.3079132987746913 }, { "epoch": 6.035180299032541, "grad_norm": 0.1719964590210007, "learning_rate": 0.0004182479310992219, "loss": 2.915773391723633, "step": 10296, "token_acc": 0.3092828439606234 }, { "epoch": 6.0357666373497505, "grad_norm": 0.15213747989747783, "learning_rate": 0.0004182300084308099, "loss": 2.931304454803467, "step": 10297, "token_acc": 0.307035988370769 }, { "epoch": 6.03635297566696, "grad_norm": 0.1650825013505614, "learning_rate": 0.00041821208418210356, "loss": 2.979233980178833, "step": 10298, "token_acc": 0.30156367523059263 }, { "epoch": 6.036939313984169, "grad_norm": 0.1728601046774603, "learning_rate": 0.00041819415835327146, "loss": 2.950711488723755, "step": 10299, "token_acc": 0.30277904863654603 }, { "epoch": 6.037525652301378, "grad_norm": 0.16217665300653067, "learning_rate": 0.00041817623094448183, "loss": 2.903207778930664, "step": 10300, "token_acc": 0.3144722782231533 }, { "epoch": 6.038111990618587, "grad_norm": 0.1684995085779338, "learning_rate": 0.0004181583019559031, "loss": 2.93789005279541, "step": 10301, "token_acc": 0.30763651164732403 }, { "epoch": 6.038698328935796, "grad_norm": 0.15511565110013817, "learning_rate": 0.0004181403713877038, "loss": 2.90281343460083, "step": 10302, "token_acc": 0.31053696589342794 }, { "epoch": 6.039284667253005, "grad_norm": 0.1619090760156598, "learning_rate": 0.0004181224392400522, "loss": 2.907012462615967, "step": 10303, "token_acc": 0.3120054145446404 }, { "epoch": 6.039871005570214, "grad_norm": 0.15782127629841022, "learning_rate": 0.00041810450551311686, "loss": 2.9243760108947754, "step": 10304, "token_acc": 0.3067929490908171 }, { "epoch": 6.040457343887423, "grad_norm": 0.15300743518901036, "learning_rate": 0.0004180865702070662, "loss": 2.9212276935577393, "step": 10305, "token_acc": 0.30850843719963017 }, { "epoch": 6.0410436822046325, "grad_norm": 0.1648611663087758, "learning_rate": 0.00041806863332206873, "loss": 2.9567737579345703, "step": 10306, "token_acc": 0.30360432281049854 }, { "epoch": 6.041630020521841, "grad_norm": 0.18277209953764542, "learning_rate": 0.00041805069485829297, "loss": 2.9483766555786133, "step": 10307, "token_acc": 0.30584636001693316 }, { "epoch": 6.04221635883905, "grad_norm": 0.1774457935808168, "learning_rate": 0.0004180327548159073, "loss": 2.8721115589141846, "step": 10308, "token_acc": 0.3162860942198818 }, { "epoch": 6.042802697156259, "grad_norm": 0.17867244998495968, "learning_rate": 0.00041801481319508036, "loss": 2.9132578372955322, "step": 10309, "token_acc": 0.3100189566446484 }, { "epoch": 6.043389035473468, "grad_norm": 0.1950571612596303, "learning_rate": 0.00041799686999598076, "loss": 2.916032314300537, "step": 10310, "token_acc": 0.30868813833762354 }, { "epoch": 6.043975373790677, "grad_norm": 0.16812822607809844, "learning_rate": 0.00041797892521877676, "loss": 2.9665660858154297, "step": 10311, "token_acc": 0.3012493996859704 }, { "epoch": 6.044561712107886, "grad_norm": 0.1854890423740394, "learning_rate": 0.00041796097886363727, "loss": 2.933084011077881, "step": 10312, "token_acc": 0.3065175802237407 }, { "epoch": 6.045148050425095, "grad_norm": 0.17313592687949653, "learning_rate": 0.0004179430309307306, "loss": 2.929915428161621, "step": 10313, "token_acc": 0.3061842188521651 }, { "epoch": 6.0457343887423045, "grad_norm": 0.16677884034130305, "learning_rate": 0.0004179250814202255, "loss": 2.9196548461914062, "step": 10314, "token_acc": 0.3095116587890463 }, { "epoch": 6.046320727059514, "grad_norm": 0.18826194550535516, "learning_rate": 0.00041790713033229056, "loss": 2.9299111366271973, "step": 10315, "token_acc": 0.3077982785001814 }, { "epoch": 6.046907065376723, "grad_norm": 0.20063663884843969, "learning_rate": 0.0004178891776670943, "loss": 2.9062259197235107, "step": 10316, "token_acc": 0.31244908691572404 }, { "epoch": 6.047493403693931, "grad_norm": 0.1699002414670016, "learning_rate": 0.00041787122342480556, "loss": 2.9566714763641357, "step": 10317, "token_acc": 0.3038845317340012 }, { "epoch": 6.04807974201114, "grad_norm": 0.16554430243884574, "learning_rate": 0.00041785326760559284, "loss": 2.92295241355896, "step": 10318, "token_acc": 0.3102327570730059 }, { "epoch": 6.048666080328349, "grad_norm": 0.16112018530797267, "learning_rate": 0.00041783531020962483, "loss": 2.944362163543701, "step": 10319, "token_acc": 0.30557263190117856 }, { "epoch": 6.049252418645558, "grad_norm": 0.1651824161066362, "learning_rate": 0.0004178173512370703, "loss": 2.947007656097412, "step": 10320, "token_acc": 0.3061213710561273 }, { "epoch": 6.049838756962767, "grad_norm": 0.1795722350359318, "learning_rate": 0.00041779939068809787, "loss": 2.9627013206481934, "step": 10321, "token_acc": 0.30487512487512486 }, { "epoch": 6.0504250952799765, "grad_norm": 0.16799045230146586, "learning_rate": 0.00041778142856287626, "loss": 2.9076757431030273, "step": 10322, "token_acc": 0.31302323825141326 }, { "epoch": 6.051011433597186, "grad_norm": 0.1806317288147836, "learning_rate": 0.00041776346486157423, "loss": 2.9344029426574707, "step": 10323, "token_acc": 0.3085921505200229 }, { "epoch": 6.051597771914395, "grad_norm": 0.23655106092417819, "learning_rate": 0.00041774549958436055, "loss": 2.932159423828125, "step": 10324, "token_acc": 0.306933621110085 }, { "epoch": 6.052184110231604, "grad_norm": 0.4308092768756055, "learning_rate": 0.000417727532731404, "loss": 2.9882700443267822, "step": 10325, "token_acc": 0.30047418307904256 }, { "epoch": 6.052770448548813, "grad_norm": 0.3698731130131215, "learning_rate": 0.0004177095643028731, "loss": 2.929323196411133, "step": 10326, "token_acc": 0.3084223367440934 }, { "epoch": 6.053356786866022, "grad_norm": 0.25251449455407415, "learning_rate": 0.0004176915942989369, "loss": 2.947962760925293, "step": 10327, "token_acc": 0.3062000812237715 }, { "epoch": 6.05394312518323, "grad_norm": 0.3089726113668326, "learning_rate": 0.0004176736227197642, "loss": 2.9194841384887695, "step": 10328, "token_acc": 0.31022529384672826 }, { "epoch": 6.054529463500439, "grad_norm": 0.24805624562202344, "learning_rate": 0.0004176556495655237, "loss": 2.907470703125, "step": 10329, "token_acc": 0.3106066336124942 }, { "epoch": 6.0551158018176485, "grad_norm": 0.23731230430167546, "learning_rate": 0.00041763767483638436, "loss": 2.908590793609619, "step": 10330, "token_acc": 0.3115572770701053 }, { "epoch": 6.055702140134858, "grad_norm": 0.26702752980083244, "learning_rate": 0.0004176196985325149, "loss": 2.92924427986145, "step": 10331, "token_acc": 0.30931783750796293 }, { "epoch": 6.056288478452067, "grad_norm": 0.16294038506958036, "learning_rate": 0.0004176017206540843, "loss": 2.961214780807495, "step": 10332, "token_acc": 0.3022245555521242 }, { "epoch": 6.056874816769276, "grad_norm": 0.22398987850303756, "learning_rate": 0.0004175837412012613, "loss": 2.931196689605713, "step": 10333, "token_acc": 0.30635985657908643 }, { "epoch": 6.057461155086485, "grad_norm": 0.2242662464076655, "learning_rate": 0.0004175657601742149, "loss": 2.9264841079711914, "step": 10334, "token_acc": 0.30820708072235004 }, { "epoch": 6.058047493403694, "grad_norm": 0.16316157263405973, "learning_rate": 0.000417547777573114, "loss": 2.9220733642578125, "step": 10335, "token_acc": 0.30918539949331436 }, { "epoch": 6.058633831720903, "grad_norm": 0.21982210638704597, "learning_rate": 0.0004175297933981275, "loss": 2.9503307342529297, "step": 10336, "token_acc": 0.30366494941675787 }, { "epoch": 6.059220170038112, "grad_norm": 0.21920984189241072, "learning_rate": 0.00041751180764942436, "loss": 2.9321038722991943, "step": 10337, "token_acc": 0.30667895003045037 }, { "epoch": 6.059806508355321, "grad_norm": 0.20396125926540085, "learning_rate": 0.00041749382032717355, "loss": 2.965158700942993, "step": 10338, "token_acc": 0.302707999106472 }, { "epoch": 6.06039284667253, "grad_norm": 0.25614332075158935, "learning_rate": 0.0004174758314315439, "loss": 2.96305513381958, "step": 10339, "token_acc": 0.30326771216863657 }, { "epoch": 6.060979184989739, "grad_norm": 0.15565924371182874, "learning_rate": 0.0004174578409627045, "loss": 2.932305335998535, "step": 10340, "token_acc": 0.30747541279436974 }, { "epoch": 6.061565523306948, "grad_norm": 0.1746454932040459, "learning_rate": 0.0004174398489208244, "loss": 2.8862733840942383, "step": 10341, "token_acc": 0.3145143114717151 }, { "epoch": 6.062151861624157, "grad_norm": 0.16985274150991805, "learning_rate": 0.0004174218553060725, "loss": 2.9634270668029785, "step": 10342, "token_acc": 0.30349763832839866 }, { "epoch": 6.062738199941366, "grad_norm": 0.15919040365585663, "learning_rate": 0.00041740386011861793, "loss": 2.939575672149658, "step": 10343, "token_acc": 0.3073319695217765 }, { "epoch": 6.063324538258575, "grad_norm": 0.16570205638267985, "learning_rate": 0.0004173858633586296, "loss": 2.9637322425842285, "step": 10344, "token_acc": 0.3025522231457888 }, { "epoch": 6.063910876575784, "grad_norm": 0.20100833698627005, "learning_rate": 0.00041736786502627666, "loss": 2.9333088397979736, "step": 10345, "token_acc": 0.30603469721767596 }, { "epoch": 6.064497214892993, "grad_norm": 0.16385173446042253, "learning_rate": 0.0004173498651217282, "loss": 2.9404385089874268, "step": 10346, "token_acc": 0.30659420157379136 }, { "epoch": 6.0650835532102025, "grad_norm": 0.17535650844877715, "learning_rate": 0.00041733186364515323, "loss": 2.934835433959961, "step": 10347, "token_acc": 0.3061625921362644 }, { "epoch": 6.065669891527412, "grad_norm": 0.15514254421292142, "learning_rate": 0.0004173138605967209, "loss": 2.907177209854126, "step": 10348, "token_acc": 0.3100863605638361 }, { "epoch": 6.066256229844621, "grad_norm": 0.17387340040836205, "learning_rate": 0.00041729585597660024, "loss": 2.894984483718872, "step": 10349, "token_acc": 0.31431665397935005 }, { "epoch": 6.066842568161829, "grad_norm": 0.17624445673799993, "learning_rate": 0.0004172778497849605, "loss": 2.9011363983154297, "step": 10350, "token_acc": 0.3123098260848731 }, { "epoch": 6.067428906479038, "grad_norm": 0.1726879628423072, "learning_rate": 0.0004172598420219708, "loss": 2.950467109680176, "step": 10351, "token_acc": 0.3032352006777507 }, { "epoch": 6.068015244796247, "grad_norm": 0.16052206973811742, "learning_rate": 0.0004172418326878003, "loss": 2.9330697059631348, "step": 10352, "token_acc": 0.3067943193240513 }, { "epoch": 6.068601583113456, "grad_norm": 0.2174646690809696, "learning_rate": 0.00041722382178261807, "loss": 2.950131416320801, "step": 10353, "token_acc": 0.304539651201053 }, { "epoch": 6.069187921430665, "grad_norm": 0.15493196486403835, "learning_rate": 0.00041720580930659334, "loss": 2.9184255599975586, "step": 10354, "token_acc": 0.3103981455803172 }, { "epoch": 6.0697742597478745, "grad_norm": 0.19493495426098145, "learning_rate": 0.00041718779525989544, "loss": 2.9730288982391357, "step": 10355, "token_acc": 0.30046266465263183 }, { "epoch": 6.070360598065084, "grad_norm": 0.1537075069181297, "learning_rate": 0.0004171697796426934, "loss": 2.9841341972351074, "step": 10356, "token_acc": 0.3002169006273792 }, { "epoch": 6.070946936382293, "grad_norm": 0.2060060782229896, "learning_rate": 0.0004171517624551566, "loss": 2.953420639038086, "step": 10357, "token_acc": 0.30366265047648167 }, { "epoch": 6.071533274699502, "grad_norm": 0.19142239605354902, "learning_rate": 0.0004171337436974543, "loss": 2.934462785720825, "step": 10358, "token_acc": 0.3072923409963075 }, { "epoch": 6.072119613016711, "grad_norm": 0.18570545252263554, "learning_rate": 0.00041711572336975555, "loss": 2.9109275341033936, "step": 10359, "token_acc": 0.31031673550717115 }, { "epoch": 6.07270595133392, "grad_norm": 0.20572328910303564, "learning_rate": 0.0004170977014722298, "loss": 2.9132065773010254, "step": 10360, "token_acc": 0.3096357130351046 }, { "epoch": 6.073292289651128, "grad_norm": 0.15045101696421187, "learning_rate": 0.0004170796780050464, "loss": 2.898057460784912, "step": 10361, "token_acc": 0.31256209265915225 }, { "epoch": 6.073878627968337, "grad_norm": 0.23070075014343516, "learning_rate": 0.0004170616529683745, "loss": 2.9632296562194824, "step": 10362, "token_acc": 0.30218249604658226 }, { "epoch": 6.0744649662855466, "grad_norm": 0.15220079606623535, "learning_rate": 0.0004170436263623835, "loss": 2.9829108715057373, "step": 10363, "token_acc": 0.2989781144208137 }, { "epoch": 6.075051304602756, "grad_norm": 0.21366120440288014, "learning_rate": 0.00041702559818724275, "loss": 2.951282024383545, "step": 10364, "token_acc": 0.30401181340001726 }, { "epoch": 6.075637642919965, "grad_norm": 0.1655978200246757, "learning_rate": 0.0004170075684431215, "loss": 2.9437506198883057, "step": 10365, "token_acc": 0.30710179209426564 }, { "epoch": 6.076223981237174, "grad_norm": 0.18383866862705062, "learning_rate": 0.0004169895371301893, "loss": 2.9523637294769287, "step": 10366, "token_acc": 0.303704137015428 }, { "epoch": 6.076810319554383, "grad_norm": 0.1698849773799001, "learning_rate": 0.0004169715042486153, "loss": 2.9248464107513428, "step": 10367, "token_acc": 0.3083811245901197 }, { "epoch": 6.077396657871592, "grad_norm": 0.20629911334996334, "learning_rate": 0.0004169534697985691, "loss": 2.964049816131592, "step": 10368, "token_acc": 0.3036072736618904 }, { "epoch": 6.077982996188801, "grad_norm": 0.16738523168024455, "learning_rate": 0.00041693543378022003, "loss": 2.9208626747131348, "step": 10369, "token_acc": 0.30869069113229797 }, { "epoch": 6.07856933450601, "grad_norm": 0.1963817078039234, "learning_rate": 0.0004169173961937375, "loss": 2.990365505218506, "step": 10370, "token_acc": 0.2982556032920959 }, { "epoch": 6.0791556728232194, "grad_norm": 0.18358411341470202, "learning_rate": 0.00041689935703929096, "loss": 2.9318721294403076, "step": 10371, "token_acc": 0.30747021872800434 }, { "epoch": 6.079742011140428, "grad_norm": 0.20610675953206628, "learning_rate": 0.0004168813163170498, "loss": 2.9484333992004395, "step": 10372, "token_acc": 0.30545186844179006 }, { "epoch": 6.080328349457637, "grad_norm": 0.22195504045360578, "learning_rate": 0.00041686327402718355, "loss": 2.940018653869629, "step": 10373, "token_acc": 0.3061286020164649 }, { "epoch": 6.080914687774846, "grad_norm": 0.16776124936257816, "learning_rate": 0.00041684523016986174, "loss": 2.930710554122925, "step": 10374, "token_acc": 0.3078344163298432 }, { "epoch": 6.081501026092055, "grad_norm": 0.21464859329026204, "learning_rate": 0.00041682718474525375, "loss": 2.886188507080078, "step": 10375, "token_acc": 0.3129126386034356 }, { "epoch": 6.082087364409264, "grad_norm": 0.1554091487789094, "learning_rate": 0.00041680913775352926, "loss": 2.9119367599487305, "step": 10376, "token_acc": 0.30948510774386745 }, { "epoch": 6.082673702726473, "grad_norm": 0.17676620794112669, "learning_rate": 0.0004167910891948576, "loss": 2.939983606338501, "step": 10377, "token_acc": 0.30554569211944094 }, { "epoch": 6.083260041043682, "grad_norm": 0.1913897392675056, "learning_rate": 0.00041677303906940857, "loss": 2.921018600463867, "step": 10378, "token_acc": 0.3108177373032812 }, { "epoch": 6.0838463793608915, "grad_norm": 0.1551338293577213, "learning_rate": 0.0004167549873773514, "loss": 2.90962553024292, "step": 10379, "token_acc": 0.31186792676034125 }, { "epoch": 6.084432717678101, "grad_norm": 0.17885400981708033, "learning_rate": 0.0004167369341188559, "loss": 2.9341928958892822, "step": 10380, "token_acc": 0.3062773951158422 }, { "epoch": 6.08501905599531, "grad_norm": 0.16633794368815003, "learning_rate": 0.0004167188792940916, "loss": 2.9201507568359375, "step": 10381, "token_acc": 0.3093780642462118 }, { "epoch": 6.085605394312518, "grad_norm": 0.1621370149843927, "learning_rate": 0.0004167008229032281, "loss": 2.9466941356658936, "step": 10382, "token_acc": 0.30603684210526316 }, { "epoch": 6.086191732629727, "grad_norm": 0.15176935183733528, "learning_rate": 0.0004166827649464349, "loss": 2.954894542694092, "step": 10383, "token_acc": 0.3041822516445738 }, { "epoch": 6.086778070946936, "grad_norm": 0.18001307935781927, "learning_rate": 0.0004166647054238818, "loss": 2.9423749446868896, "step": 10384, "token_acc": 0.3037615480318712 }, { "epoch": 6.087364409264145, "grad_norm": 0.1546156823247517, "learning_rate": 0.0004166466443357384, "loss": 2.9306800365448, "step": 10385, "token_acc": 0.30816204543863523 }, { "epoch": 6.087950747581354, "grad_norm": 0.16072157698909545, "learning_rate": 0.0004166285816821743, "loss": 2.947432518005371, "step": 10386, "token_acc": 0.3060800033913214 }, { "epoch": 6.0885370858985635, "grad_norm": 0.16588454146279294, "learning_rate": 0.0004166105174633592, "loss": 2.9169092178344727, "step": 10387, "token_acc": 0.3093053525940477 }, { "epoch": 6.089123424215773, "grad_norm": 0.17697315716621784, "learning_rate": 0.0004165924516794628, "loss": 2.971461534500122, "step": 10388, "token_acc": 0.3019210127905279 }, { "epoch": 6.089709762532982, "grad_norm": 0.17075973054790755, "learning_rate": 0.0004165743843306548, "loss": 2.9590089321136475, "step": 10389, "token_acc": 0.30229917455204347 }, { "epoch": 6.090296100850191, "grad_norm": 0.17848769234985856, "learning_rate": 0.0004165563154171049, "loss": 2.9149913787841797, "step": 10390, "token_acc": 0.3095460271543881 }, { "epoch": 6.0908824391674, "grad_norm": 0.16891302500068286, "learning_rate": 0.000416538244938983, "loss": 2.939793109893799, "step": 10391, "token_acc": 0.30691706590654405 }, { "epoch": 6.091468777484609, "grad_norm": 0.16493776937037627, "learning_rate": 0.00041652017289645863, "loss": 2.9628772735595703, "step": 10392, "token_acc": 0.3013392620262971 }, { "epoch": 6.092055115801817, "grad_norm": 0.17433678963184748, "learning_rate": 0.00041650209928970153, "loss": 2.9630610942840576, "step": 10393, "token_acc": 0.3026333887849352 }, { "epoch": 6.092641454119026, "grad_norm": 0.14871093322053644, "learning_rate": 0.00041648402411888165, "loss": 2.914952516555786, "step": 10394, "token_acc": 0.30964090867988225 }, { "epoch": 6.0932277924362355, "grad_norm": 0.1495034650058218, "learning_rate": 0.00041646594738416865, "loss": 2.9515929222106934, "step": 10395, "token_acc": 0.30394859311233624 }, { "epoch": 6.093814130753445, "grad_norm": 0.1610707294777987, "learning_rate": 0.0004164478690857324, "loss": 3.0026893615722656, "step": 10396, "token_acc": 0.2969383530073514 }, { "epoch": 6.094400469070654, "grad_norm": 0.1574988176923822, "learning_rate": 0.00041642978922374274, "loss": 2.929046869277954, "step": 10397, "token_acc": 0.3087426751522527 }, { "epoch": 6.094986807387863, "grad_norm": 0.16448617471376137, "learning_rate": 0.0004164117077983695, "loss": 2.924130916595459, "step": 10398, "token_acc": 0.3075816856321824 }, { "epoch": 6.095573145705072, "grad_norm": 0.16510430189383757, "learning_rate": 0.0004163936248097825, "loss": 2.952380418777466, "step": 10399, "token_acc": 0.3034254320567603 }, { "epoch": 6.096159484022281, "grad_norm": 0.181306744805994, "learning_rate": 0.00041637554025815155, "loss": 2.958345890045166, "step": 10400, "token_acc": 0.3029982194771097 }, { "epoch": 6.09674582233949, "grad_norm": 0.23725446881122828, "learning_rate": 0.0004163574541436467, "loss": 2.929875373840332, "step": 10401, "token_acc": 0.30899364431502 }, { "epoch": 6.097332160656699, "grad_norm": 0.3648114968090372, "learning_rate": 0.00041633936646643765, "loss": 2.9426755905151367, "step": 10402, "token_acc": 0.30700762813576526 }, { "epoch": 6.097918498973908, "grad_norm": 0.4005656859160203, "learning_rate": 0.00041632127722669444, "loss": 2.942589044570923, "step": 10403, "token_acc": 0.30582858653408673 }, { "epoch": 6.098504837291117, "grad_norm": 0.17847403506218268, "learning_rate": 0.00041630318642458684, "loss": 2.963380813598633, "step": 10404, "token_acc": 0.3020968904352968 }, { "epoch": 6.099091175608326, "grad_norm": 0.3123065790070527, "learning_rate": 0.000416285094060285, "loss": 2.9427084922790527, "step": 10405, "token_acc": 0.30541245892704605 }, { "epoch": 6.099677513925535, "grad_norm": 0.17612652484812036, "learning_rate": 0.0004162670001339588, "loss": 2.8996973037719727, "step": 10406, "token_acc": 0.31091194414808215 }, { "epoch": 6.100263852242744, "grad_norm": 0.23617927244769796, "learning_rate": 0.00041624890464577813, "loss": 2.955127716064453, "step": 10407, "token_acc": 0.30449148514032987 }, { "epoch": 6.100850190559953, "grad_norm": 0.14860805524360848, "learning_rate": 0.00041623080759591307, "loss": 2.9312400817871094, "step": 10408, "token_acc": 0.3069817165830432 }, { "epoch": 6.101436528877162, "grad_norm": 0.18863507958330297, "learning_rate": 0.0004162127089845336, "loss": 2.9177653789520264, "step": 10409, "token_acc": 0.3097544634059827 }, { "epoch": 6.102022867194371, "grad_norm": 0.16664255285207497, "learning_rate": 0.0004161946088118096, "loss": 2.922409772872925, "step": 10410, "token_acc": 0.3086878244299888 }, { "epoch": 6.10260920551158, "grad_norm": 0.19018691709468127, "learning_rate": 0.0004161765070779112, "loss": 2.981858253479004, "step": 10411, "token_acc": 0.3005881660351508 }, { "epoch": 6.1031955438287895, "grad_norm": 0.17707820506394414, "learning_rate": 0.0004161584037830085, "loss": 2.9327139854431152, "step": 10412, "token_acc": 0.3072470969766712 }, { "epoch": 6.103781882145999, "grad_norm": 0.16876120101255232, "learning_rate": 0.00041614029892727156, "loss": 2.935960531234741, "step": 10413, "token_acc": 0.3085030505482777 }, { "epoch": 6.104368220463208, "grad_norm": 0.16341499004660603, "learning_rate": 0.00041612219251087025, "loss": 2.972388744354248, "step": 10414, "token_acc": 0.3020122672442619 }, { "epoch": 6.104954558780416, "grad_norm": 0.1605948534220989, "learning_rate": 0.00041610408453397486, "loss": 2.972555637359619, "step": 10415, "token_acc": 0.299198486179631 }, { "epoch": 6.105540897097625, "grad_norm": 0.15588455452603286, "learning_rate": 0.0004160859749967555, "loss": 2.9388070106506348, "step": 10416, "token_acc": 0.30679257034577767 }, { "epoch": 6.106127235414834, "grad_norm": 0.17067469574548513, "learning_rate": 0.0004160678638993821, "loss": 2.928025245666504, "step": 10417, "token_acc": 0.3081767046590163 }, { "epoch": 6.106713573732043, "grad_norm": 0.16536639626237284, "learning_rate": 0.00041604975124202495, "loss": 2.9623336791992188, "step": 10418, "token_acc": 0.3037119750781387 }, { "epoch": 6.107299912049252, "grad_norm": 0.16178444705512532, "learning_rate": 0.000416031637024854, "loss": 2.943077802658081, "step": 10419, "token_acc": 0.3056953026033453 }, { "epoch": 6.1078862503664615, "grad_norm": 0.15830505530652575, "learning_rate": 0.00041601352124803976, "loss": 2.9410009384155273, "step": 10420, "token_acc": 0.30542754328055155 }, { "epoch": 6.108472588683671, "grad_norm": 0.16561414011616415, "learning_rate": 0.00041599540391175214, "loss": 2.8999428749084473, "step": 10421, "token_acc": 0.3130207093549155 }, { "epoch": 6.10905892700088, "grad_norm": 0.163444004215403, "learning_rate": 0.00041597728501616127, "loss": 2.975067615509033, "step": 10422, "token_acc": 0.30048044973245447 }, { "epoch": 6.109645265318089, "grad_norm": 0.16056654286590044, "learning_rate": 0.0004159591645614376, "loss": 2.928494453430176, "step": 10423, "token_acc": 0.30758260546859467 }, { "epoch": 6.110231603635298, "grad_norm": 0.18785863947086578, "learning_rate": 0.0004159410425477511, "loss": 2.9437694549560547, "step": 10424, "token_acc": 0.30490544939622427 }, { "epoch": 6.110817941952506, "grad_norm": 0.1505626054166065, "learning_rate": 0.00041592291897527223, "loss": 2.9447240829467773, "step": 10425, "token_acc": 0.30535251120018864 }, { "epoch": 6.111404280269715, "grad_norm": 0.18227095075713756, "learning_rate": 0.000415904793844171, "loss": 2.9433984756469727, "step": 10426, "token_acc": 0.3053795151474672 }, { "epoch": 6.111990618586924, "grad_norm": 0.16627628629075097, "learning_rate": 0.00041588666715461787, "loss": 2.896047353744507, "step": 10427, "token_acc": 0.31125370464281255 }, { "epoch": 6.1125769569041335, "grad_norm": 0.2025082858542502, "learning_rate": 0.000415868538906783, "loss": 2.935548782348633, "step": 10428, "token_acc": 0.30552157981609324 }, { "epoch": 6.113163295221343, "grad_norm": 0.27137076450341086, "learning_rate": 0.0004158504091008367, "loss": 2.9595813751220703, "step": 10429, "token_acc": 0.3024967119624162 }, { "epoch": 6.113749633538552, "grad_norm": 0.21093432711283527, "learning_rate": 0.00041583227773694936, "loss": 2.9636895656585693, "step": 10430, "token_acc": 0.3020448981876848 }, { "epoch": 6.114335971855761, "grad_norm": 0.16684935143641036, "learning_rate": 0.00041581414481529124, "loss": 2.8368446826934814, "step": 10431, "token_acc": 0.32148063265333315 }, { "epoch": 6.11492231017297, "grad_norm": 0.1962838716864451, "learning_rate": 0.0004157960103360326, "loss": 2.94777774810791, "step": 10432, "token_acc": 0.3046489913794975 }, { "epoch": 6.115508648490179, "grad_norm": 0.16862376808320972, "learning_rate": 0.0004157778742993439, "loss": 2.923959970474243, "step": 10433, "token_acc": 0.3085891864105799 }, { "epoch": 6.116094986807388, "grad_norm": 0.19066300799687697, "learning_rate": 0.0004157597367053954, "loss": 2.9415836334228516, "step": 10434, "token_acc": 0.30624595867022214 }, { "epoch": 6.116681325124597, "grad_norm": 0.21738615505634146, "learning_rate": 0.0004157415975543576, "loss": 2.9260506629943848, "step": 10435, "token_acc": 0.30755254056930037 }, { "epoch": 6.1172676634418055, "grad_norm": 0.1580646757013759, "learning_rate": 0.0004157234568464008, "loss": 2.895148754119873, "step": 10436, "token_acc": 0.31291539243032523 }, { "epoch": 6.117854001759015, "grad_norm": 0.227311014506415, "learning_rate": 0.0004157053145816955, "loss": 2.929617166519165, "step": 10437, "token_acc": 0.3068810105366432 }, { "epoch": 6.118440340076224, "grad_norm": 0.16883044052799437, "learning_rate": 0.000415687170760412, "loss": 2.9710538387298584, "step": 10438, "token_acc": 0.3027455672045928 }, { "epoch": 6.119026678393433, "grad_norm": 0.20366480542445012, "learning_rate": 0.0004156690253827208, "loss": 2.8886756896972656, "step": 10439, "token_acc": 0.3138385969981374 }, { "epoch": 6.119613016710642, "grad_norm": 0.19595579540082303, "learning_rate": 0.0004156508784487923, "loss": 2.952850103378296, "step": 10440, "token_acc": 0.30421357814650063 }, { "epoch": 6.120199355027851, "grad_norm": 0.1811051002729143, "learning_rate": 0.0004156327299587971, "loss": 2.922309398651123, "step": 10441, "token_acc": 0.30829966334296066 }, { "epoch": 6.12078569334506, "grad_norm": 0.20561917908929286, "learning_rate": 0.00041561457991290553, "loss": 2.9297471046447754, "step": 10442, "token_acc": 0.30786297136164403 }, { "epoch": 6.121372031662269, "grad_norm": 0.15780783618226757, "learning_rate": 0.0004155964283112882, "loss": 2.9822909832000732, "step": 10443, "token_acc": 0.30000659221854525 }, { "epoch": 6.121958369979478, "grad_norm": 0.19662208076033094, "learning_rate": 0.00041557827515411553, "loss": 2.9561009407043457, "step": 10444, "token_acc": 0.3045006728311329 }, { "epoch": 6.1225447082966875, "grad_norm": 0.16968830696216852, "learning_rate": 0.00041556012044155813, "loss": 2.9536328315734863, "step": 10445, "token_acc": 0.30562220026866965 }, { "epoch": 6.123131046613897, "grad_norm": 0.14749029166195343, "learning_rate": 0.0004155419641737865, "loss": 2.9082694053649902, "step": 10446, "token_acc": 0.3103008273705883 }, { "epoch": 6.123717384931105, "grad_norm": 0.16137758711028985, "learning_rate": 0.0004155238063509711, "loss": 2.9429492950439453, "step": 10447, "token_acc": 0.30485365648576823 }, { "epoch": 6.124303723248314, "grad_norm": 0.15868052175954908, "learning_rate": 0.00041550564697328266, "loss": 2.941316604614258, "step": 10448, "token_acc": 0.3039662722107625 }, { "epoch": 6.124890061565523, "grad_norm": 0.1756635081284907, "learning_rate": 0.00041548748604089164, "loss": 2.9978253841400146, "step": 10449, "token_acc": 0.2979858400325896 }, { "epoch": 6.125476399882732, "grad_norm": 0.1557158814790115, "learning_rate": 0.0004154693235539688, "loss": 2.9624266624450684, "step": 10450, "token_acc": 0.3028324842312598 }, { "epoch": 6.126062738199941, "grad_norm": 0.16639843939532703, "learning_rate": 0.00041545115951268453, "loss": 2.912062406539917, "step": 10451, "token_acc": 0.3103679441736234 }, { "epoch": 6.12664907651715, "grad_norm": 0.16494038756588472, "learning_rate": 0.0004154329939172096, "loss": 2.9039361476898193, "step": 10452, "token_acc": 0.3128679222926574 }, { "epoch": 6.1272354148343595, "grad_norm": 0.15824868073976797, "learning_rate": 0.0004154148267677146, "loss": 2.9260544776916504, "step": 10453, "token_acc": 0.30908763867319616 }, { "epoch": 6.127821753151569, "grad_norm": 0.1666834313703923, "learning_rate": 0.00041539665806437015, "loss": 2.9439620971679688, "step": 10454, "token_acc": 0.30692399979692664 }, { "epoch": 6.128408091468778, "grad_norm": 0.1594622401797017, "learning_rate": 0.000415378487807347, "loss": 2.9661808013916016, "step": 10455, "token_acc": 0.3019629455098254 }, { "epoch": 6.128994429785987, "grad_norm": 0.15216042026501475, "learning_rate": 0.0004153603159968159, "loss": 2.919219493865967, "step": 10456, "token_acc": 0.30898766249474796 }, { "epoch": 6.129580768103196, "grad_norm": 0.15332796688813455, "learning_rate": 0.0004153421426329474, "loss": 2.966148853302002, "step": 10457, "token_acc": 0.30279541276559363 }, { "epoch": 6.130167106420404, "grad_norm": 0.20204300457221028, "learning_rate": 0.0004153239677159122, "loss": 2.9437081813812256, "step": 10458, "token_acc": 0.3043364575188619 }, { "epoch": 6.130753444737613, "grad_norm": 0.3072558255041798, "learning_rate": 0.0004153057912458812, "loss": 2.9648566246032715, "step": 10459, "token_acc": 0.30356991319534743 }, { "epoch": 6.131339783054822, "grad_norm": 0.23521640693762688, "learning_rate": 0.000415287613223025, "loss": 2.90767240524292, "step": 10460, "token_acc": 0.30989104471031276 }, { "epoch": 6.1319261213720315, "grad_norm": 0.15682580678508576, "learning_rate": 0.00041526943364751445, "loss": 2.9724202156066895, "step": 10461, "token_acc": 0.3031968720032636 }, { "epoch": 6.132512459689241, "grad_norm": 0.23129569301531008, "learning_rate": 0.0004152512525195202, "loss": 2.927483558654785, "step": 10462, "token_acc": 0.30769992294190673 }, { "epoch": 6.13309879800645, "grad_norm": 0.17371185396455913, "learning_rate": 0.0004152330698392132, "loss": 2.91770601272583, "step": 10463, "token_acc": 0.30873325446813954 }, { "epoch": 6.133685136323659, "grad_norm": 0.2386006605877323, "learning_rate": 0.0004152148856067641, "loss": 2.950911521911621, "step": 10464, "token_acc": 0.3046612943842861 }, { "epoch": 6.134271474640868, "grad_norm": 0.3034959128654779, "learning_rate": 0.00041519669982234374, "loss": 2.9836935997009277, "step": 10465, "token_acc": 0.2996892957717207 }, { "epoch": 6.134857812958077, "grad_norm": 0.16925752192189283, "learning_rate": 0.000415178512486123, "loss": 2.9244637489318848, "step": 10466, "token_acc": 0.30740564289873484 }, { "epoch": 6.135444151275286, "grad_norm": 0.20227191017184934, "learning_rate": 0.00041516032359827283, "loss": 2.9298830032348633, "step": 10467, "token_acc": 0.3075545952619801 }, { "epoch": 6.136030489592494, "grad_norm": 0.16884120129399502, "learning_rate": 0.00041514213315896385, "loss": 2.947310209274292, "step": 10468, "token_acc": 0.3046397108416165 }, { "epoch": 6.1366168279097035, "grad_norm": 0.16223727692594062, "learning_rate": 0.00041512394116836713, "loss": 2.944086790084839, "step": 10469, "token_acc": 0.30537660506753356 }, { "epoch": 6.137203166226913, "grad_norm": 0.1662959516848505, "learning_rate": 0.00041510574762665354, "loss": 2.9615859985351562, "step": 10470, "token_acc": 0.3018576526026203 }, { "epoch": 6.137789504544122, "grad_norm": 0.15726566868467232, "learning_rate": 0.00041508755253399384, "loss": 2.907604694366455, "step": 10471, "token_acc": 0.30987845881858855 }, { "epoch": 6.138375842861331, "grad_norm": 0.1940583384552843, "learning_rate": 0.00041506935589055905, "loss": 2.957211494445801, "step": 10472, "token_acc": 0.30282178637239743 }, { "epoch": 6.13896218117854, "grad_norm": 0.16130884560368403, "learning_rate": 0.0004150511576965201, "loss": 2.940397024154663, "step": 10473, "token_acc": 0.30500466182354996 }, { "epoch": 6.139548519495749, "grad_norm": 0.17157180505395575, "learning_rate": 0.00041503295795204795, "loss": 2.913651943206787, "step": 10474, "token_acc": 0.3099711038040271 }, { "epoch": 6.140134857812958, "grad_norm": 0.16033968612033972, "learning_rate": 0.0004150147566573136, "loss": 2.9319586753845215, "step": 10475, "token_acc": 0.30749353338783425 }, { "epoch": 6.140721196130167, "grad_norm": 0.15857423117067557, "learning_rate": 0.00041499655381248784, "loss": 2.942533016204834, "step": 10476, "token_acc": 0.3065783033108068 }, { "epoch": 6.141307534447376, "grad_norm": 0.166747210589665, "learning_rate": 0.00041497834941774185, "loss": 2.9234941005706787, "step": 10477, "token_acc": 0.3093667923547003 }, { "epoch": 6.1418938727645855, "grad_norm": 0.17077343685105867, "learning_rate": 0.0004149601434732466, "loss": 2.9386777877807617, "step": 10478, "token_acc": 0.3074772027133957 }, { "epoch": 6.142480211081795, "grad_norm": 0.15781378144997013, "learning_rate": 0.00041494193597917307, "loss": 2.921415090560913, "step": 10479, "token_acc": 0.30959037762472524 }, { "epoch": 6.143066549399003, "grad_norm": 0.1599309841549574, "learning_rate": 0.00041492372693569234, "loss": 2.9854774475097656, "step": 10480, "token_acc": 0.29958764686549993 }, { "epoch": 6.143652887716212, "grad_norm": 0.17385564690899588, "learning_rate": 0.00041490551634297546, "loss": 2.9373931884765625, "step": 10481, "token_acc": 0.3066968108084338 }, { "epoch": 6.144239226033421, "grad_norm": 0.16403678017085097, "learning_rate": 0.0004148873042011934, "loss": 2.947756290435791, "step": 10482, "token_acc": 0.30470552955135144 }, { "epoch": 6.14482556435063, "grad_norm": 0.163869681078955, "learning_rate": 0.0004148690905105174, "loss": 2.926081895828247, "step": 10483, "token_acc": 0.3080807262117037 }, { "epoch": 6.145411902667839, "grad_norm": 0.17349128005486922, "learning_rate": 0.0004148508752711183, "loss": 2.9466233253479004, "step": 10484, "token_acc": 0.30614646246478633 }, { "epoch": 6.145998240985048, "grad_norm": 0.18661846379164745, "learning_rate": 0.0004148326584831675, "loss": 2.896618366241455, "step": 10485, "token_acc": 0.3123967068034516 }, { "epoch": 6.1465845793022575, "grad_norm": 0.14698230726421643, "learning_rate": 0.00041481444014683587, "loss": 2.9340977668762207, "step": 10486, "token_acc": 0.3076336968427068 }, { "epoch": 6.147170917619467, "grad_norm": 0.17944177546704151, "learning_rate": 0.0004147962202622948, "loss": 2.939802408218384, "step": 10487, "token_acc": 0.30672235161834394 }, { "epoch": 6.147757255936676, "grad_norm": 0.1567711650362066, "learning_rate": 0.00041477799882971524, "loss": 2.9326446056365967, "step": 10488, "token_acc": 0.3060327679120402 }, { "epoch": 6.148343594253885, "grad_norm": 0.2073171422380734, "learning_rate": 0.00041475977584926846, "loss": 2.957396984100342, "step": 10489, "token_acc": 0.30252747281114556 }, { "epoch": 6.148929932571093, "grad_norm": 0.24004030897757322, "learning_rate": 0.00041474155132112557, "loss": 2.9332456588745117, "step": 10490, "token_acc": 0.3063213460394012 }, { "epoch": 6.149516270888302, "grad_norm": 0.2554601458258769, "learning_rate": 0.00041472332524545777, "loss": 2.933067560195923, "step": 10491, "token_acc": 0.30784441186630146 }, { "epoch": 6.150102609205511, "grad_norm": 0.16863235777922414, "learning_rate": 0.0004147050976224363, "loss": 2.9686851501464844, "step": 10492, "token_acc": 0.30329630445772354 }, { "epoch": 6.15068894752272, "grad_norm": 0.2094739609225327, "learning_rate": 0.00041468686845223243, "loss": 2.936225652694702, "step": 10493, "token_acc": 0.3056676728962453 }, { "epoch": 6.1512752858399296, "grad_norm": 0.308046893252181, "learning_rate": 0.0004146686377350173, "loss": 2.958745002746582, "step": 10494, "token_acc": 0.3023653874437688 }, { "epoch": 6.151861624157139, "grad_norm": 0.244956779167131, "learning_rate": 0.0004146504054709622, "loss": 2.8869080543518066, "step": 10495, "token_acc": 0.31447863392366543 }, { "epoch": 6.152447962474348, "grad_norm": 0.17940980179641128, "learning_rate": 0.0004146321716602385, "loss": 2.9498534202575684, "step": 10496, "token_acc": 0.3067870646413622 }, { "epoch": 6.153034300791557, "grad_norm": 0.2329151701338742, "learning_rate": 0.00041461393630301736, "loss": 2.958310127258301, "step": 10497, "token_acc": 0.3040309385690912 }, { "epoch": 6.153620639108766, "grad_norm": 0.15179560881939755, "learning_rate": 0.00041459569939947006, "loss": 2.938229560852051, "step": 10498, "token_acc": 0.30802733816688754 }, { "epoch": 6.154206977425975, "grad_norm": 0.21381106397551344, "learning_rate": 0.000414577460949768, "loss": 2.9201273918151855, "step": 10499, "token_acc": 0.30766649978900024 }, { "epoch": 6.154793315743184, "grad_norm": 0.154464085089828, "learning_rate": 0.00041455922095408245, "loss": 2.9637928009033203, "step": 10500, "token_acc": 0.30151563153088634 }, { "epoch": 6.1553796540603924, "grad_norm": 0.21430677562982106, "learning_rate": 0.00041454097941258475, "loss": 2.930187225341797, "step": 10501, "token_acc": 0.3069289449496025 }, { "epoch": 6.155965992377602, "grad_norm": 0.15350299040524493, "learning_rate": 0.0004145227363254462, "loss": 2.920713424682617, "step": 10502, "token_acc": 0.3081558745550163 }, { "epoch": 6.156552330694811, "grad_norm": 0.1984802493298596, "learning_rate": 0.00041450449169283846, "loss": 2.915069103240967, "step": 10503, "token_acc": 0.3094154735321161 }, { "epoch": 6.15713866901202, "grad_norm": 0.16795478819239112, "learning_rate": 0.00041448624551493254, "loss": 2.9451258182525635, "step": 10504, "token_acc": 0.30570178947649146 }, { "epoch": 6.157725007329229, "grad_norm": 0.2139972699911268, "learning_rate": 0.00041446799779189995, "loss": 2.972809314727783, "step": 10505, "token_acc": 0.2998525210921411 }, { "epoch": 6.158311345646438, "grad_norm": 0.17299874943001023, "learning_rate": 0.0004144497485239123, "loss": 2.920882225036621, "step": 10506, "token_acc": 0.30833366075989155 }, { "epoch": 6.158897683963647, "grad_norm": 0.1965185103617262, "learning_rate": 0.0004144314977111407, "loss": 2.9171862602233887, "step": 10507, "token_acc": 0.307525509528676 }, { "epoch": 6.159484022280856, "grad_norm": 0.16348901491062998, "learning_rate": 0.0004144132453537568, "loss": 2.967710494995117, "step": 10508, "token_acc": 0.3029505257152172 }, { "epoch": 6.160070360598065, "grad_norm": 0.2332032975357556, "learning_rate": 0.00041439499145193206, "loss": 2.9047937393188477, "step": 10509, "token_acc": 0.3110390114009462 }, { "epoch": 6.1606566989152745, "grad_norm": 0.1705785540066222, "learning_rate": 0.00041437673600583786, "loss": 2.9172589778900146, "step": 10510, "token_acc": 0.31025348579352247 }, { "epoch": 6.161243037232484, "grad_norm": 0.16366281468649038, "learning_rate": 0.0004143584790156457, "loss": 2.9317102432250977, "step": 10511, "token_acc": 0.3067437460805961 }, { "epoch": 6.161829375549692, "grad_norm": 0.1601560742302561, "learning_rate": 0.0004143402204815272, "loss": 2.933523654937744, "step": 10512, "token_acc": 0.3063418092618516 }, { "epoch": 6.162415713866901, "grad_norm": 0.17373931166236636, "learning_rate": 0.0004143219604036537, "loss": 2.9422945976257324, "step": 10513, "token_acc": 0.30532538504942425 }, { "epoch": 6.16300205218411, "grad_norm": 0.18616943477253542, "learning_rate": 0.00041430369878219685, "loss": 2.95859956741333, "step": 10514, "token_acc": 0.30294241288961393 }, { "epoch": 6.163588390501319, "grad_norm": 0.1566410030301233, "learning_rate": 0.00041428543561732815, "loss": 2.941256523132324, "step": 10515, "token_acc": 0.30742792721085416 }, { "epoch": 6.164174728818528, "grad_norm": 0.21408578938545805, "learning_rate": 0.0004142671709092191, "loss": 2.9786064624786377, "step": 10516, "token_acc": 0.3022998965762136 }, { "epoch": 6.164761067135737, "grad_norm": 0.1868261068765775, "learning_rate": 0.00041424890465804133, "loss": 2.9580087661743164, "step": 10517, "token_acc": 0.30444521162742527 }, { "epoch": 6.1653474054529465, "grad_norm": 0.21312871740695563, "learning_rate": 0.0004142306368639665, "loss": 2.9835939407348633, "step": 10518, "token_acc": 0.3015483143205429 }, { "epoch": 6.165933743770156, "grad_norm": 0.2112837939078349, "learning_rate": 0.0004142123675271661, "loss": 2.9283437728881836, "step": 10519, "token_acc": 0.3074960853946843 }, { "epoch": 6.166520082087365, "grad_norm": 0.16353946420247256, "learning_rate": 0.0004141940966478118, "loss": 2.9730257987976074, "step": 10520, "token_acc": 0.30188471062186467 }, { "epoch": 6.167106420404574, "grad_norm": 0.17402030661932746, "learning_rate": 0.0004141758242260753, "loss": 2.939260721206665, "step": 10521, "token_acc": 0.3064985550241682 }, { "epoch": 6.167692758721783, "grad_norm": 0.1685335056651273, "learning_rate": 0.000414157550262128, "loss": 2.963682174682617, "step": 10522, "token_acc": 0.30210048982909454 }, { "epoch": 6.168279097038991, "grad_norm": 0.18375339375630045, "learning_rate": 0.0004141392747561418, "loss": 2.926851749420166, "step": 10523, "token_acc": 0.3093289078670122 }, { "epoch": 6.1688654353562, "grad_norm": 0.18060563101544722, "learning_rate": 0.00041412099770828825, "loss": 2.9130144119262695, "step": 10524, "token_acc": 0.3111232264670292 }, { "epoch": 6.169451773673409, "grad_norm": 0.1770623020561256, "learning_rate": 0.0004141027191187392, "loss": 2.9247725009918213, "step": 10525, "token_acc": 0.30822450766088366 }, { "epoch": 6.1700381119906185, "grad_norm": 0.18831891316628133, "learning_rate": 0.0004140844389876661, "loss": 2.9035959243774414, "step": 10526, "token_acc": 0.31152345572999307 }, { "epoch": 6.170624450307828, "grad_norm": 0.1958413440426652, "learning_rate": 0.0004140661573152409, "loss": 2.9531126022338867, "step": 10527, "token_acc": 0.30398385490381086 }, { "epoch": 6.171210788625037, "grad_norm": 0.1602918303491583, "learning_rate": 0.0004140478741016351, "loss": 2.9519577026367188, "step": 10528, "token_acc": 0.30397546605559556 }, { "epoch": 6.171797126942246, "grad_norm": 0.1682345823071401, "learning_rate": 0.00041402958934702073, "loss": 2.9233193397521973, "step": 10529, "token_acc": 0.3095600904587551 }, { "epoch": 6.172383465259455, "grad_norm": 0.18582547280023443, "learning_rate": 0.00041401130305156934, "loss": 2.950817108154297, "step": 10530, "token_acc": 0.30554172443818 }, { "epoch": 6.172969803576664, "grad_norm": 0.17912075198689428, "learning_rate": 0.00041399301521545274, "loss": 2.9918973445892334, "step": 10531, "token_acc": 0.2991281791914751 }, { "epoch": 6.173556141893873, "grad_norm": 0.16451893217338048, "learning_rate": 0.00041397472583884276, "loss": 2.9460551738739014, "step": 10532, "token_acc": 0.30513219035838157 }, { "epoch": 6.174142480211081, "grad_norm": 0.17925949619925596, "learning_rate": 0.00041395643492191125, "loss": 2.9483227729797363, "step": 10533, "token_acc": 0.3051188218984975 }, { "epoch": 6.1747288185282905, "grad_norm": 0.22341550213614858, "learning_rate": 0.00041393814246482995, "loss": 2.9327199459075928, "step": 10534, "token_acc": 0.3048220704249459 }, { "epoch": 6.1753151568455, "grad_norm": 0.1973700569113499, "learning_rate": 0.00041391984846777066, "loss": 2.9637317657470703, "step": 10535, "token_acc": 0.3035690698227353 }, { "epoch": 6.175901495162709, "grad_norm": 0.1477637010128084, "learning_rate": 0.0004139015529309053, "loss": 2.9454855918884277, "step": 10536, "token_acc": 0.3064243155511895 }, { "epoch": 6.176487833479918, "grad_norm": 0.20260139194917687, "learning_rate": 0.00041388325585440573, "loss": 2.9584174156188965, "step": 10537, "token_acc": 0.3043248903413332 }, { "epoch": 6.177074171797127, "grad_norm": 0.23760680729405217, "learning_rate": 0.00041386495723844374, "loss": 2.9367425441741943, "step": 10538, "token_acc": 0.3080889637794784 }, { "epoch": 6.177660510114336, "grad_norm": 0.17734900985459684, "learning_rate": 0.00041384665708319135, "loss": 2.9404971599578857, "step": 10539, "token_acc": 0.3058731413824277 }, { "epoch": 6.178246848431545, "grad_norm": 0.16519324863706866, "learning_rate": 0.00041382835538882046, "loss": 2.978525161743164, "step": 10540, "token_acc": 0.3001695669738085 }, { "epoch": 6.178833186748754, "grad_norm": 0.2582589652827132, "learning_rate": 0.0004138100521555028, "loss": 2.920891761779785, "step": 10541, "token_acc": 0.31047757483615146 }, { "epoch": 6.179419525065963, "grad_norm": 0.1746934332268986, "learning_rate": 0.0004137917473834105, "loss": 2.9548802375793457, "step": 10542, "token_acc": 0.3042654589586478 }, { "epoch": 6.1800058633831725, "grad_norm": 0.1754999240315258, "learning_rate": 0.00041377344107271544, "loss": 2.9370555877685547, "step": 10543, "token_acc": 0.30774912339353605 }, { "epoch": 6.180592201700381, "grad_norm": 0.21744993996331352, "learning_rate": 0.0004137551332235896, "loss": 2.9531164169311523, "step": 10544, "token_acc": 0.30436384401422073 }, { "epoch": 6.18117854001759, "grad_norm": 0.15339051426044142, "learning_rate": 0.000413736823836205, "loss": 3.021946907043457, "step": 10545, "token_acc": 0.2954817197858734 }, { "epoch": 6.181764878334799, "grad_norm": 0.19900483077986467, "learning_rate": 0.00041371851291073346, "loss": 2.948702096939087, "step": 10546, "token_acc": 0.3047502672979991 }, { "epoch": 6.182351216652008, "grad_norm": 0.17602034189120178, "learning_rate": 0.0004137002004473471, "loss": 2.926104784011841, "step": 10547, "token_acc": 0.3096696221181726 }, { "epoch": 6.182937554969217, "grad_norm": 0.17336696643789493, "learning_rate": 0.0004136818864462181, "loss": 2.9257566928863525, "step": 10548, "token_acc": 0.3082824773927578 }, { "epoch": 6.183523893286426, "grad_norm": 0.19256714152817334, "learning_rate": 0.0004136635709075183, "loss": 2.921236276626587, "step": 10549, "token_acc": 0.30967753994702213 }, { "epoch": 6.184110231603635, "grad_norm": 0.1544146454721351, "learning_rate": 0.00041364525383141967, "loss": 2.9522242546081543, "step": 10550, "token_acc": 0.3029366706672508 }, { "epoch": 6.1846965699208445, "grad_norm": 0.21067754109916811, "learning_rate": 0.0004136269352180945, "loss": 2.9091482162475586, "step": 10551, "token_acc": 0.3100873637993352 }, { "epoch": 6.185282908238054, "grad_norm": 0.1795930068297206, "learning_rate": 0.0004136086150677147, "loss": 2.9366633892059326, "step": 10552, "token_acc": 0.3061824401063588 }, { "epoch": 6.185869246555263, "grad_norm": 0.1670100388762526, "learning_rate": 0.00041359029338045245, "loss": 2.993654727935791, "step": 10553, "token_acc": 0.2977459915829697 }, { "epoch": 6.186455584872472, "grad_norm": 0.1858505244011271, "learning_rate": 0.00041357197015647984, "loss": 2.9274837970733643, "step": 10554, "token_acc": 0.30759988758859486 }, { "epoch": 6.18704192318968, "grad_norm": 0.17439971124781023, "learning_rate": 0.00041355364539596897, "loss": 2.893908739089966, "step": 10555, "token_acc": 0.31203162064119017 }, { "epoch": 6.187628261506889, "grad_norm": 0.1650632333940499, "learning_rate": 0.000413535319099092, "loss": 2.9777870178222656, "step": 10556, "token_acc": 0.29979753386740343 }, { "epoch": 6.188214599824098, "grad_norm": 0.1831232880811513, "learning_rate": 0.0004135169912660211, "loss": 2.92812180519104, "step": 10557, "token_acc": 0.3076560578976051 }, { "epoch": 6.188800938141307, "grad_norm": 0.16066796612015022, "learning_rate": 0.0004134986618969284, "loss": 2.896663188934326, "step": 10558, "token_acc": 0.31246574033563157 }, { "epoch": 6.1893872764585165, "grad_norm": 0.17697673798360594, "learning_rate": 0.00041348033099198613, "loss": 2.9662580490112305, "step": 10559, "token_acc": 0.3008626017789482 }, { "epoch": 6.189973614775726, "grad_norm": 0.18479492554503982, "learning_rate": 0.0004134619985513663, "loss": 2.9578285217285156, "step": 10560, "token_acc": 0.30268922817764965 }, { "epoch": 6.190559953092935, "grad_norm": 0.16264772367238586, "learning_rate": 0.0004134436645752414, "loss": 2.9407780170440674, "step": 10561, "token_acc": 0.3058838613117868 }, { "epoch": 6.191146291410144, "grad_norm": 0.1652355056553404, "learning_rate": 0.0004134253290637834, "loss": 2.9497134685516357, "step": 10562, "token_acc": 0.30572287996054426 }, { "epoch": 6.191732629727353, "grad_norm": 0.21199181328029393, "learning_rate": 0.0004134069920171648, "loss": 2.969010829925537, "step": 10563, "token_acc": 0.3008147145171735 }, { "epoch": 6.192318968044562, "grad_norm": 0.1907498254757952, "learning_rate": 0.00041338865343555766, "loss": 2.9849185943603516, "step": 10564, "token_acc": 0.2993372721873144 }, { "epoch": 6.192905306361771, "grad_norm": 0.1561922181528007, "learning_rate": 0.00041337031331913424, "loss": 2.944082021713257, "step": 10565, "token_acc": 0.30492502131501076 }, { "epoch": 6.193491644678979, "grad_norm": 0.17282277258781387, "learning_rate": 0.0004133519716680668, "loss": 2.9318013191223145, "step": 10566, "token_acc": 0.30631725372943414 }, { "epoch": 6.1940779829961885, "grad_norm": 0.19454831251353058, "learning_rate": 0.0004133336284825278, "loss": 2.950530767440796, "step": 10567, "token_acc": 0.30722735291065684 }, { "epoch": 6.194664321313398, "grad_norm": 0.16003830194876, "learning_rate": 0.00041331528376268943, "loss": 2.8945841789245605, "step": 10568, "token_acc": 0.3139257667019766 }, { "epoch": 6.195250659630607, "grad_norm": 0.2213236382389851, "learning_rate": 0.0004132969375087241, "loss": 2.9743778705596924, "step": 10569, "token_acc": 0.30164500662418775 }, { "epoch": 6.195836997947816, "grad_norm": 0.25683636217242495, "learning_rate": 0.00041327858972080403, "loss": 2.9589996337890625, "step": 10570, "token_acc": 0.30354978077553657 }, { "epoch": 6.196423336265025, "grad_norm": 0.15495366191015236, "learning_rate": 0.00041326024039910165, "loss": 2.9925193786621094, "step": 10571, "token_acc": 0.29781304726250774 }, { "epoch": 6.197009674582234, "grad_norm": 0.20917463335259337, "learning_rate": 0.0004132418895437893, "loss": 2.926042079925537, "step": 10572, "token_acc": 0.307583912527547 }, { "epoch": 6.197596012899443, "grad_norm": 0.20123856536472745, "learning_rate": 0.00041322353715503936, "loss": 2.9659156799316406, "step": 10573, "token_acc": 0.3021300096318941 }, { "epoch": 6.198182351216652, "grad_norm": 0.17292716563372482, "learning_rate": 0.0004132051832330242, "loss": 2.9990503787994385, "step": 10574, "token_acc": 0.2991860229031586 }, { "epoch": 6.198768689533861, "grad_norm": 0.2244204564596068, "learning_rate": 0.0004131868277779164, "loss": 2.947277545928955, "step": 10575, "token_acc": 0.30520025242221416 }, { "epoch": 6.19935502785107, "grad_norm": 0.14752679539506808, "learning_rate": 0.00041316847078988814, "loss": 2.9605541229248047, "step": 10576, "token_acc": 0.30307335322252216 }, { "epoch": 6.199941366168279, "grad_norm": 0.2431009581495613, "learning_rate": 0.0004131501122691119, "loss": 2.9222159385681152, "step": 10577, "token_acc": 0.3088159066441606 }, { "epoch": 6.200527704485488, "grad_norm": 0.2851418853037055, "learning_rate": 0.0004131317522157604, "loss": 2.9670753479003906, "step": 10578, "token_acc": 0.3025158314359568 }, { "epoch": 6.201114042802697, "grad_norm": 0.16928258551544348, "learning_rate": 0.00041311339063000577, "loss": 2.9220614433288574, "step": 10579, "token_acc": 0.30965163005073143 }, { "epoch": 6.201700381119906, "grad_norm": 0.24645415675124518, "learning_rate": 0.00041309502751202064, "loss": 2.925625801086426, "step": 10580, "token_acc": 0.30825324029469736 }, { "epoch": 6.202286719437115, "grad_norm": 0.194864823867627, "learning_rate": 0.00041307666286197756, "loss": 2.945146083831787, "step": 10581, "token_acc": 0.3050396420441165 }, { "epoch": 6.202873057754324, "grad_norm": 0.18122752535097372, "learning_rate": 0.0004130582966800489, "loss": 2.9606857299804688, "step": 10582, "token_acc": 0.3025630579803757 }, { "epoch": 6.203459396071533, "grad_norm": 0.1665079446212871, "learning_rate": 0.00041303992896640735, "loss": 3.0012216567993164, "step": 10583, "token_acc": 0.2971267719930008 }, { "epoch": 6.2040457343887425, "grad_norm": 0.17074151084204162, "learning_rate": 0.00041302155972122533, "loss": 2.9974000453948975, "step": 10584, "token_acc": 0.29681415701746494 }, { "epoch": 6.204632072705952, "grad_norm": 0.15207429357068372, "learning_rate": 0.0004130031889446754, "loss": 2.977205276489258, "step": 10585, "token_acc": 0.30183684191733445 }, { "epoch": 6.205218411023161, "grad_norm": 0.1935889226728444, "learning_rate": 0.0004129848166369302, "loss": 2.9002203941345215, "step": 10586, "token_acc": 0.3133992650896384 }, { "epoch": 6.205804749340369, "grad_norm": 0.23314623253247724, "learning_rate": 0.00041296644279816225, "loss": 2.931007146835327, "step": 10587, "token_acc": 0.30650158792403764 }, { "epoch": 6.206391087657578, "grad_norm": 0.16494521807394452, "learning_rate": 0.0004129480674285442, "loss": 2.9425692558288574, "step": 10588, "token_acc": 0.30667345369927906 }, { "epoch": 6.206977425974787, "grad_norm": 0.20479280777771355, "learning_rate": 0.0004129296905282486, "loss": 2.9431004524230957, "step": 10589, "token_acc": 0.30562295327915073 }, { "epoch": 6.207563764291996, "grad_norm": 0.20022152686696465, "learning_rate": 0.00041291131209744816, "loss": 2.919126510620117, "step": 10590, "token_acc": 0.3093269473810386 }, { "epoch": 6.208150102609205, "grad_norm": 0.20575951902433143, "learning_rate": 0.0004128929321363154, "loss": 2.9182581901550293, "step": 10591, "token_acc": 0.30933843129586364 }, { "epoch": 6.2087364409264145, "grad_norm": 0.24894864085792323, "learning_rate": 0.00041287455064502305, "loss": 2.976844310760498, "step": 10592, "token_acc": 0.30072805610592196 }, { "epoch": 6.209322779243624, "grad_norm": 0.19535920025598794, "learning_rate": 0.0004128561676237438, "loss": 2.9612884521484375, "step": 10593, "token_acc": 0.3036801475684263 }, { "epoch": 6.209909117560833, "grad_norm": 0.2622808499768753, "learning_rate": 0.0004128377830726503, "loss": 2.941671371459961, "step": 10594, "token_acc": 0.30617319031043544 }, { "epoch": 6.210495455878042, "grad_norm": 0.18015495440228485, "learning_rate": 0.0004128193969919153, "loss": 2.906928300857544, "step": 10595, "token_acc": 0.31053157034773327 }, { "epoch": 6.211081794195251, "grad_norm": 0.2505207807565127, "learning_rate": 0.0004128010093817114, "loss": 2.9427521228790283, "step": 10596, "token_acc": 0.3061699265975271 }, { "epoch": 6.21166813251246, "grad_norm": 0.18876674250551867, "learning_rate": 0.0004127826202422114, "loss": 2.934694290161133, "step": 10597, "token_acc": 0.30615599615234035 }, { "epoch": 6.212254470829668, "grad_norm": 0.21086203283000965, "learning_rate": 0.00041276422957358804, "loss": 2.927766799926758, "step": 10598, "token_acc": 0.3086313902646248 }, { "epoch": 6.212840809146877, "grad_norm": 0.16388671288980408, "learning_rate": 0.00041274583737601413, "loss": 2.9911603927612305, "step": 10599, "token_acc": 0.2996580600502761 }, { "epoch": 6.2134271474640865, "grad_norm": 0.21516643860499476, "learning_rate": 0.0004127274436496623, "loss": 2.9499707221984863, "step": 10600, "token_acc": 0.3044990610364821 }, { "epoch": 6.214013485781296, "grad_norm": 0.19291350989032158, "learning_rate": 0.0004127090483947055, "loss": 2.9630448818206787, "step": 10601, "token_acc": 0.3031496271924006 }, { "epoch": 6.214599824098505, "grad_norm": 0.19917281929130404, "learning_rate": 0.00041269065161131634, "loss": 2.949312210083008, "step": 10602, "token_acc": 0.3035276077552496 }, { "epoch": 6.215186162415714, "grad_norm": 0.18042766201609928, "learning_rate": 0.00041267225329966786, "loss": 2.9870777130126953, "step": 10603, "token_acc": 0.300208843265676 }, { "epoch": 6.215772500732923, "grad_norm": 0.20520671314327915, "learning_rate": 0.00041265385345993263, "loss": 2.910634994506836, "step": 10604, "token_acc": 0.3110755708468289 }, { "epoch": 6.216358839050132, "grad_norm": 0.14879723562835254, "learning_rate": 0.0004126354520922837, "loss": 2.9380698204040527, "step": 10605, "token_acc": 0.30740860889820143 }, { "epoch": 6.216945177367341, "grad_norm": 0.21555875902024238, "learning_rate": 0.0004126170491968938, "loss": 2.920599937438965, "step": 10606, "token_acc": 0.3099641573424179 }, { "epoch": 6.21753151568455, "grad_norm": 0.17596552856219855, "learning_rate": 0.0004125986447739359, "loss": 2.9261152744293213, "step": 10607, "token_acc": 0.3083567676268006 }, { "epoch": 6.218117854001759, "grad_norm": 0.16433946264449972, "learning_rate": 0.0004125802388235829, "loss": 2.9523723125457764, "step": 10608, "token_acc": 0.3043990141854734 }, { "epoch": 6.218704192318968, "grad_norm": 0.1678183359377147, "learning_rate": 0.00041256183134600753, "loss": 2.951498508453369, "step": 10609, "token_acc": 0.30636950727718854 }, { "epoch": 6.219290530636177, "grad_norm": 0.15843412406420335, "learning_rate": 0.00041254342234138287, "loss": 2.981130599975586, "step": 10610, "token_acc": 0.3007553274146913 }, { "epoch": 6.219876868953386, "grad_norm": 0.19014444058062469, "learning_rate": 0.0004125250118098817, "loss": 2.962453842163086, "step": 10611, "token_acc": 0.3039856029672954 }, { "epoch": 6.220463207270595, "grad_norm": 0.1506582300448767, "learning_rate": 0.0004125065997516771, "loss": 2.93302583694458, "step": 10612, "token_acc": 0.3062402545242633 }, { "epoch": 6.221049545587804, "grad_norm": 0.18270628448042128, "learning_rate": 0.0004124881861669421, "loss": 2.9435229301452637, "step": 10613, "token_acc": 0.3059334028509107 }, { "epoch": 6.221635883905013, "grad_norm": 0.17059894715104043, "learning_rate": 0.0004124697710558494, "loss": 2.951679229736328, "step": 10614, "token_acc": 0.3046258878113629 }, { "epoch": 6.222222222222222, "grad_norm": 0.16600993091200414, "learning_rate": 0.0004124513544185721, "loss": 2.9631874561309814, "step": 10615, "token_acc": 0.30221542218204017 }, { "epoch": 6.222808560539431, "grad_norm": 0.1605004662713218, "learning_rate": 0.00041243293625528333, "loss": 2.976900577545166, "step": 10616, "token_acc": 0.3017416221163647 }, { "epoch": 6.2233948988566405, "grad_norm": 0.15122863828851124, "learning_rate": 0.00041241451656615595, "loss": 2.92988920211792, "step": 10617, "token_acc": 0.3089455894884698 }, { "epoch": 6.22398123717385, "grad_norm": 0.16539822499096626, "learning_rate": 0.0004123960953513631, "loss": 2.9358394145965576, "step": 10618, "token_acc": 0.30534026218206506 }, { "epoch": 6.224567575491059, "grad_norm": 0.18911295606837114, "learning_rate": 0.00041237767261107774, "loss": 2.9532246589660645, "step": 10619, "token_acc": 0.3032199429377201 }, { "epoch": 6.225153913808267, "grad_norm": 0.1907595861406025, "learning_rate": 0.0004123592483454729, "loss": 2.9623312950134277, "step": 10620, "token_acc": 0.3025152458311192 }, { "epoch": 6.225740252125476, "grad_norm": 0.16165371094321157, "learning_rate": 0.00041234082255472174, "loss": 2.8954315185546875, "step": 10621, "token_acc": 0.3123182535726155 }, { "epoch": 6.226326590442685, "grad_norm": 0.21737210165567597, "learning_rate": 0.00041232239523899736, "loss": 2.916842460632324, "step": 10622, "token_acc": 0.3094878119943768 }, { "epoch": 6.226912928759894, "grad_norm": 0.19976287297061474, "learning_rate": 0.00041230396639847273, "loss": 2.973588466644287, "step": 10623, "token_acc": 0.3016687033510994 }, { "epoch": 6.227499267077103, "grad_norm": 0.15725086569039393, "learning_rate": 0.0004122855360333211, "loss": 2.949235677719116, "step": 10624, "token_acc": 0.3049606790662839 }, { "epoch": 6.2280856053943126, "grad_norm": 0.20081187673899056, "learning_rate": 0.0004122671041437155, "loss": 2.9417974948883057, "step": 10625, "token_acc": 0.3069205860234089 }, { "epoch": 6.228671943711522, "grad_norm": 0.16556509350637266, "learning_rate": 0.0004122486707298291, "loss": 2.9606781005859375, "step": 10626, "token_acc": 0.30288652978791514 }, { "epoch": 6.229258282028731, "grad_norm": 0.18386168839143108, "learning_rate": 0.00041223023579183514, "loss": 2.932673454284668, "step": 10627, "token_acc": 0.30660960090147366 }, { "epoch": 6.22984462034594, "grad_norm": 0.18310642398555058, "learning_rate": 0.00041221179932990674, "loss": 2.942288398742676, "step": 10628, "token_acc": 0.3054085415157147 }, { "epoch": 6.230430958663149, "grad_norm": 0.15822318892666487, "learning_rate": 0.0004121933613442169, "loss": 2.953800678253174, "step": 10629, "token_acc": 0.30380770478982344 }, { "epoch": 6.231017296980358, "grad_norm": 0.1878142654466012, "learning_rate": 0.0004121749218349392, "loss": 2.9623899459838867, "step": 10630, "token_acc": 0.3034656016490595 }, { "epoch": 6.231603635297566, "grad_norm": 0.15155605011227305, "learning_rate": 0.0004121564808022465, "loss": 2.967517852783203, "step": 10631, "token_acc": 0.30249601037303014 }, { "epoch": 6.2321899736147754, "grad_norm": 0.20014022833391606, "learning_rate": 0.00041213803824631225, "loss": 2.956653118133545, "step": 10632, "token_acc": 0.3047242866245238 }, { "epoch": 6.232776311931985, "grad_norm": 0.18032777017946183, "learning_rate": 0.0004121195941673096, "loss": 2.9396469593048096, "step": 10633, "token_acc": 0.3061231566116595 }, { "epoch": 6.233362650249194, "grad_norm": 0.15933292269745034, "learning_rate": 0.00041210114856541177, "loss": 2.9592173099517822, "step": 10634, "token_acc": 0.3035840988995512 }, { "epoch": 6.233948988566403, "grad_norm": 0.2050048742268512, "learning_rate": 0.0004120827014407921, "loss": 2.911478042602539, "step": 10635, "token_acc": 0.3101621514834859 }, { "epoch": 6.234535326883612, "grad_norm": 0.17723124510921007, "learning_rate": 0.0004120642527936239, "loss": 2.9554505348205566, "step": 10636, "token_acc": 0.30478401881148076 }, { "epoch": 6.235121665200821, "grad_norm": 0.15100640746872449, "learning_rate": 0.0004120458026240804, "loss": 2.9596121311187744, "step": 10637, "token_acc": 0.3047194994676862 }, { "epoch": 6.23570800351803, "grad_norm": 0.16579914506750182, "learning_rate": 0.00041202735093233487, "loss": 2.9411568641662598, "step": 10638, "token_acc": 0.3059126640542553 }, { "epoch": 6.236294341835239, "grad_norm": 0.14967973179232463, "learning_rate": 0.0004120088977185609, "loss": 2.9596283435821533, "step": 10639, "token_acc": 0.3033594265877202 }, { "epoch": 6.236880680152448, "grad_norm": 0.2377987444772846, "learning_rate": 0.00041199044298293147, "loss": 2.903804302215576, "step": 10640, "token_acc": 0.31147248179389964 }, { "epoch": 6.237467018469657, "grad_norm": 0.2167525194078, "learning_rate": 0.0004119719867256202, "loss": 2.950406312942505, "step": 10641, "token_acc": 0.304936425247544 }, { "epoch": 6.238053356786866, "grad_norm": 0.14901019535474866, "learning_rate": 0.00041195352894680037, "loss": 2.9378597736358643, "step": 10642, "token_acc": 0.30805022156573114 }, { "epoch": 6.238639695104075, "grad_norm": 0.2271325239192477, "learning_rate": 0.00041193506964664527, "loss": 2.9547629356384277, "step": 10643, "token_acc": 0.30463851782238033 }, { "epoch": 6.239226033421284, "grad_norm": 0.19032938534746327, "learning_rate": 0.00041191660882532855, "loss": 2.9310765266418457, "step": 10644, "token_acc": 0.3073967238243222 }, { "epoch": 6.239812371738493, "grad_norm": 0.15363592005239932, "learning_rate": 0.00041189814648302336, "loss": 2.9882822036743164, "step": 10645, "token_acc": 0.2987873127127606 }, { "epoch": 6.240398710055702, "grad_norm": 0.15949628038650054, "learning_rate": 0.0004118796826199034, "loss": 2.9734439849853516, "step": 10646, "token_acc": 0.3001459846179443 }, { "epoch": 6.240985048372911, "grad_norm": 0.15152112932088116, "learning_rate": 0.0004118612172361418, "loss": 2.9163129329681396, "step": 10647, "token_acc": 0.31020423412204234 }, { "epoch": 6.24157138669012, "grad_norm": 0.15445323110352266, "learning_rate": 0.0004118427503319122, "loss": 2.8911545276641846, "step": 10648, "token_acc": 0.3149703208933219 }, { "epoch": 6.2421577250073295, "grad_norm": 0.15214329116770345, "learning_rate": 0.0004118242819073881, "loss": 2.919991970062256, "step": 10649, "token_acc": 0.3093935395277483 }, { "epoch": 6.242744063324539, "grad_norm": 0.15624040839653897, "learning_rate": 0.0004118058119627429, "loss": 2.9900355339050293, "step": 10650, "token_acc": 0.29949567778311853 }, { "epoch": 6.243330401641748, "grad_norm": 0.149668742436633, "learning_rate": 0.0004117873404981501, "loss": 2.9700136184692383, "step": 10651, "token_acc": 0.3019914859790601 }, { "epoch": 6.243916739958956, "grad_norm": 0.15648229810442482, "learning_rate": 0.0004117688675137834, "loss": 2.958305597305298, "step": 10652, "token_acc": 0.30462669618737004 }, { "epoch": 6.244503078276165, "grad_norm": 0.15091441042687145, "learning_rate": 0.000411750393009816, "loss": 2.927462577819824, "step": 10653, "token_acc": 0.30887810883893435 }, { "epoch": 6.245089416593374, "grad_norm": 0.15837125224753454, "learning_rate": 0.00041173191698642167, "loss": 2.957767963409424, "step": 10654, "token_acc": 0.3044582767721982 }, { "epoch": 6.245675754910583, "grad_norm": 0.16857482643157404, "learning_rate": 0.0004117134394437739, "loss": 2.948213577270508, "step": 10655, "token_acc": 0.30410502498818964 }, { "epoch": 6.246262093227792, "grad_norm": 0.16149726185990995, "learning_rate": 0.00041169496038204634, "loss": 2.927992105484009, "step": 10656, "token_acc": 0.3076125056979833 }, { "epoch": 6.2468484315450015, "grad_norm": 0.20331302161744527, "learning_rate": 0.00041167647980141256, "loss": 2.9670023918151855, "step": 10657, "token_acc": 0.3004794871794872 }, { "epoch": 6.247434769862211, "grad_norm": 0.2307344995926057, "learning_rate": 0.00041165799770204593, "loss": 2.954352378845215, "step": 10658, "token_acc": 0.303768237091992 }, { "epoch": 6.24802110817942, "grad_norm": 0.1846547138070567, "learning_rate": 0.0004116395140841204, "loss": 2.945906162261963, "step": 10659, "token_acc": 0.3035366356109036 }, { "epoch": 6.248607446496629, "grad_norm": 0.16546477917520447, "learning_rate": 0.0004116210289478094, "loss": 2.9402894973754883, "step": 10660, "token_acc": 0.30738529512870805 }, { "epoch": 6.249193784813838, "grad_norm": 0.21702884761918026, "learning_rate": 0.0004116025422932866, "loss": 3.002927780151367, "step": 10661, "token_acc": 0.29701584284231025 }, { "epoch": 6.249780123131047, "grad_norm": 0.2038536774505332, "learning_rate": 0.00041158405412072575, "loss": 2.921492338180542, "step": 10662, "token_acc": 0.3101951371131409 }, { "epoch": 6.250366461448255, "grad_norm": 0.15536977333799726, "learning_rate": 0.00041156556443030037, "loss": 2.936476230621338, "step": 10663, "token_acc": 0.3072034285386167 }, { "epoch": 6.250952799765464, "grad_norm": 0.18730158787331286, "learning_rate": 0.0004115470732221842, "loss": 2.9516639709472656, "step": 10664, "token_acc": 0.3054843397564818 }, { "epoch": 6.2515391380826735, "grad_norm": 0.1457630167677713, "learning_rate": 0.0004115285804965511, "loss": 2.9442296028137207, "step": 10665, "token_acc": 0.3063626443917673 }, { "epoch": 6.252125476399883, "grad_norm": 0.20472423990636346, "learning_rate": 0.00041151008625357456, "loss": 2.912977933883667, "step": 10666, "token_acc": 0.3107814258247451 }, { "epoch": 6.252711814717092, "grad_norm": 0.15686404190526013, "learning_rate": 0.0004114915904934284, "loss": 2.972034454345703, "step": 10667, "token_acc": 0.3011597360158903 }, { "epoch": 6.253298153034301, "grad_norm": 0.19565923117806938, "learning_rate": 0.00041147309321628634, "loss": 2.9297335147857666, "step": 10668, "token_acc": 0.30848329048843187 }, { "epoch": 6.25388449135151, "grad_norm": 0.2373732982714425, "learning_rate": 0.00041145459442232214, "loss": 2.9422566890716553, "step": 10669, "token_acc": 0.30595433342563205 }, { "epoch": 6.254470829668719, "grad_norm": 0.16385294194391983, "learning_rate": 0.0004114360941117097, "loss": 2.9481852054595947, "step": 10670, "token_acc": 0.30433493711665294 }, { "epoch": 6.255057167985928, "grad_norm": 0.18714660397597585, "learning_rate": 0.00041141759228462263, "loss": 2.9513778686523438, "step": 10671, "token_acc": 0.30406270533634255 }, { "epoch": 6.255643506303137, "grad_norm": 0.21346380241059978, "learning_rate": 0.00041139908894123476, "loss": 2.947744607925415, "step": 10672, "token_acc": 0.30515592504463224 }, { "epoch": 6.256229844620346, "grad_norm": 0.1500678347499334, "learning_rate": 0.00041138058408171993, "loss": 2.9296913146972656, "step": 10673, "token_acc": 0.3067953499015513 }, { "epoch": 6.256816182937555, "grad_norm": 0.23487004917199197, "learning_rate": 0.000411362077706252, "loss": 2.913830280303955, "step": 10674, "token_acc": 0.31146577579885587 }, { "epoch": 6.257402521254764, "grad_norm": 0.15953763185650172, "learning_rate": 0.00041134356981500475, "loss": 2.967125654220581, "step": 10675, "token_acc": 0.3017391736440657 }, { "epoch": 6.257988859571973, "grad_norm": 0.2107512876926024, "learning_rate": 0.0004113250604081522, "loss": 2.9537878036499023, "step": 10676, "token_acc": 0.3058582905667146 }, { "epoch": 6.258575197889182, "grad_norm": 0.16089644847551893, "learning_rate": 0.000411306549485868, "loss": 2.953906297683716, "step": 10677, "token_acc": 0.30430261626654026 }, { "epoch": 6.259161536206391, "grad_norm": 0.2097340429890355, "learning_rate": 0.0004112880370483261, "loss": 2.949415922164917, "step": 10678, "token_acc": 0.3052664324877834 }, { "epoch": 6.2597478745236, "grad_norm": 0.18834356685945014, "learning_rate": 0.0004112695230957005, "loss": 2.979276180267334, "step": 10679, "token_acc": 0.2990369016043056 }, { "epoch": 6.260334212840809, "grad_norm": 0.18317155903659374, "learning_rate": 0.00041125100762816504, "loss": 2.935314655303955, "step": 10680, "token_acc": 0.3066216549050684 }, { "epoch": 6.260920551158018, "grad_norm": 0.16897686751445973, "learning_rate": 0.00041123249064589363, "loss": 2.944854259490967, "step": 10681, "token_acc": 0.3066739259803239 }, { "epoch": 6.2615068894752275, "grad_norm": 0.17778404515846574, "learning_rate": 0.0004112139721490602, "loss": 2.9273746013641357, "step": 10682, "token_acc": 0.30693585655827643 }, { "epoch": 6.262093227792437, "grad_norm": 0.1755344305416732, "learning_rate": 0.00041119545213783883, "loss": 2.9078211784362793, "step": 10683, "token_acc": 0.3113049211772463 }, { "epoch": 6.262679566109645, "grad_norm": 0.16409451095068597, "learning_rate": 0.00041117693061240335, "loss": 2.943479061126709, "step": 10684, "token_acc": 0.30657008104666605 }, { "epoch": 6.263265904426854, "grad_norm": 0.15835245487741179, "learning_rate": 0.0004111584075729278, "loss": 2.9448561668395996, "step": 10685, "token_acc": 0.30608135742525455 }, { "epoch": 6.263852242744063, "grad_norm": 0.1818783039714962, "learning_rate": 0.00041113988301958614, "loss": 2.989250659942627, "step": 10686, "token_acc": 0.29814601686021824 }, { "epoch": 6.264438581061272, "grad_norm": 0.16556651322131954, "learning_rate": 0.0004111213569525524, "loss": 2.9533286094665527, "step": 10687, "token_acc": 0.3042867452736906 }, { "epoch": 6.265024919378481, "grad_norm": 0.15965665324936243, "learning_rate": 0.0004111028293720007, "loss": 2.949902057647705, "step": 10688, "token_acc": 0.305798246243946 }, { "epoch": 6.26561125769569, "grad_norm": 0.17584770587275117, "learning_rate": 0.00041108430027810494, "loss": 2.9338154792785645, "step": 10689, "token_acc": 0.3059811133402233 }, { "epoch": 6.2661975960128995, "grad_norm": 0.16018187994820884, "learning_rate": 0.0004110657696710393, "loss": 2.9734272956848145, "step": 10690, "token_acc": 0.30139277387735225 }, { "epoch": 6.266783934330109, "grad_norm": 0.18175213791119027, "learning_rate": 0.00041104723755097785, "loss": 3.0040438175201416, "step": 10691, "token_acc": 0.29737426935792494 }, { "epoch": 6.267370272647318, "grad_norm": 0.16634845346889324, "learning_rate": 0.00041102870391809446, "loss": 2.945624351501465, "step": 10692, "token_acc": 0.30524247122808595 }, { "epoch": 6.267956610964527, "grad_norm": 0.1659187729207687, "learning_rate": 0.0004110101687725635, "loss": 2.9529051780700684, "step": 10693, "token_acc": 0.3056869242565604 }, { "epoch": 6.268542949281736, "grad_norm": 0.1493977641897921, "learning_rate": 0.0004109916321145589, "loss": 2.944582223892212, "step": 10694, "token_acc": 0.3062965597777211 }, { "epoch": 6.269129287598945, "grad_norm": 0.1640755280303279, "learning_rate": 0.0004109730939442549, "loss": 2.9829139709472656, "step": 10695, "token_acc": 0.29930684898607823 }, { "epoch": 6.269715625916153, "grad_norm": 0.16639141992061351, "learning_rate": 0.0004109545542618256, "loss": 2.9654757976531982, "step": 10696, "token_acc": 0.30261252961305607 }, { "epoch": 6.270301964233362, "grad_norm": 0.17800215813250558, "learning_rate": 0.0004109360130674451, "loss": 2.956394672393799, "step": 10697, "token_acc": 0.303410135894554 }, { "epoch": 6.2708883025505715, "grad_norm": 0.17035056023812367, "learning_rate": 0.0004109174703612877, "loss": 2.9148378372192383, "step": 10698, "token_acc": 0.3102014983208473 }, { "epoch": 6.271474640867781, "grad_norm": 0.15090712339991302, "learning_rate": 0.0004108989261435274, "loss": 2.9530391693115234, "step": 10699, "token_acc": 0.3048058362740207 }, { "epoch": 6.27206097918499, "grad_norm": 0.17085833154876076, "learning_rate": 0.0004108803804143386, "loss": 2.9764904975891113, "step": 10700, "token_acc": 0.30132786648633914 }, { "epoch": 6.272647317502199, "grad_norm": 0.15338950763181258, "learning_rate": 0.00041086183317389543, "loss": 2.974369525909424, "step": 10701, "token_acc": 0.3027741707000354 }, { "epoch": 6.273233655819408, "grad_norm": 0.15494470286132764, "learning_rate": 0.000410843284422372, "loss": 2.964383602142334, "step": 10702, "token_acc": 0.3028221314519124 }, { "epoch": 6.273819994136617, "grad_norm": 0.17371430507974794, "learning_rate": 0.0004108247341599427, "loss": 2.9476237297058105, "step": 10703, "token_acc": 0.3035151559640724 }, { "epoch": 6.274406332453826, "grad_norm": 0.1867314652725611, "learning_rate": 0.0004108061823867818, "loss": 2.9776952266693115, "step": 10704, "token_acc": 0.3020551805327689 }, { "epoch": 6.274992670771035, "grad_norm": 0.25109485237791007, "learning_rate": 0.00041078762910306345, "loss": 2.941995143890381, "step": 10705, "token_acc": 0.30628548895899055 }, { "epoch": 6.2755790090882435, "grad_norm": 0.22445036432298882, "learning_rate": 0.00041076907430896193, "loss": 2.9733471870422363, "step": 10706, "token_acc": 0.3021375593126641 }, { "epoch": 6.276165347405453, "grad_norm": 0.16614532392500536, "learning_rate": 0.00041075051800465163, "loss": 2.9882400035858154, "step": 10707, "token_acc": 0.3001606803084854 }, { "epoch": 6.276751685722662, "grad_norm": 0.16250963895791565, "learning_rate": 0.0004107319601903069, "loss": 2.941617488861084, "step": 10708, "token_acc": 0.3065080348830877 }, { "epoch": 6.277338024039871, "grad_norm": 0.18435238114858168, "learning_rate": 0.0004107134008661019, "loss": 2.944436550140381, "step": 10709, "token_acc": 0.3057469335088736 }, { "epoch": 6.27792436235708, "grad_norm": 0.19967445659517602, "learning_rate": 0.00041069484003221113, "loss": 2.905338764190674, "step": 10710, "token_acc": 0.3113947012084515 }, { "epoch": 6.278510700674289, "grad_norm": 0.16198568081202638, "learning_rate": 0.00041067627768880886, "loss": 2.9634203910827637, "step": 10711, "token_acc": 0.30309914765605417 }, { "epoch": 6.279097038991498, "grad_norm": 0.1792011208820571, "learning_rate": 0.00041065771383606945, "loss": 2.968311309814453, "step": 10712, "token_acc": 0.3022901860535549 }, { "epoch": 6.279683377308707, "grad_norm": 0.17346103028731594, "learning_rate": 0.00041063914847416734, "loss": 2.9342784881591797, "step": 10713, "token_acc": 0.3077182751168892 }, { "epoch": 6.280269715625916, "grad_norm": 0.22581804042184764, "learning_rate": 0.0004106205816032769, "loss": 2.973550796508789, "step": 10714, "token_acc": 0.30108425021815766 }, { "epoch": 6.2808560539431255, "grad_norm": 0.21867694455752734, "learning_rate": 0.0004106020132235725, "loss": 2.9506702423095703, "step": 10715, "token_acc": 0.30548125299553114 }, { "epoch": 6.281442392260335, "grad_norm": 0.18094367586678523, "learning_rate": 0.00041058344333522856, "loss": 2.921837091445923, "step": 10716, "token_acc": 0.30944779165942615 }, { "epoch": 6.282028730577543, "grad_norm": 0.18854522143815203, "learning_rate": 0.0004105648719384196, "loss": 2.9769506454467773, "step": 10717, "token_acc": 0.30264637819808404 }, { "epoch": 6.282615068894752, "grad_norm": 0.18390104987635805, "learning_rate": 0.0004105462990333201, "loss": 2.9837560653686523, "step": 10718, "token_acc": 0.3003359462486002 }, { "epoch": 6.283201407211961, "grad_norm": 0.21556799366533075, "learning_rate": 0.00041052772462010437, "loss": 2.9653615951538086, "step": 10719, "token_acc": 0.3020806044284232 }, { "epoch": 6.28378774552917, "grad_norm": 0.1676256073899917, "learning_rate": 0.00041050914869894707, "loss": 2.9772818088531494, "step": 10720, "token_acc": 0.30162542869532083 }, { "epoch": 6.284374083846379, "grad_norm": 0.17275367495038868, "learning_rate": 0.00041049057127002256, "loss": 2.991433620452881, "step": 10721, "token_acc": 0.29839881285930725 }, { "epoch": 6.284960422163588, "grad_norm": 0.18609523295302807, "learning_rate": 0.0004104719923335053, "loss": 2.952453136444092, "step": 10722, "token_acc": 0.30565136362443235 }, { "epoch": 6.2855467604807975, "grad_norm": 0.16171305880854078, "learning_rate": 0.00041045341188957005, "loss": 2.9374561309814453, "step": 10723, "token_acc": 0.30677385220398945 }, { "epoch": 6.286133098798007, "grad_norm": 0.17842961758578577, "learning_rate": 0.0004104348299383911, "loss": 2.9456706047058105, "step": 10724, "token_acc": 0.30553190394731766 }, { "epoch": 6.286719437115216, "grad_norm": 0.2428224842586848, "learning_rate": 0.0004104162464801432, "loss": 2.9306259155273438, "step": 10725, "token_acc": 0.30736897805755303 }, { "epoch": 6.287305775432425, "grad_norm": 0.2378052573823697, "learning_rate": 0.00041039766151500077, "loss": 2.939516067504883, "step": 10726, "token_acc": 0.30643698309731404 }, { "epoch": 6.287892113749633, "grad_norm": 0.23035767689268827, "learning_rate": 0.00041037907504313853, "loss": 3.0056021213531494, "step": 10727, "token_acc": 0.29878050315890725 }, { "epoch": 6.288478452066842, "grad_norm": 0.15496644939193877, "learning_rate": 0.00041036048706473096, "loss": 2.8970255851745605, "step": 10728, "token_acc": 0.31402825969365544 }, { "epoch": 6.289064790384051, "grad_norm": 0.20608017110062515, "learning_rate": 0.0004103418975799527, "loss": 2.9943814277648926, "step": 10729, "token_acc": 0.2984552578933856 }, { "epoch": 6.28965112870126, "grad_norm": 0.26899912828276584, "learning_rate": 0.0004103233065889783, "loss": 2.965994358062744, "step": 10730, "token_acc": 0.30234542376236034 }, { "epoch": 6.2902374670184695, "grad_norm": 0.17017776867976034, "learning_rate": 0.0004103047140919825, "loss": 2.9503173828125, "step": 10731, "token_acc": 0.303994812810691 }, { "epoch": 6.290823805335679, "grad_norm": 0.18525437790137422, "learning_rate": 0.00041028612008914, "loss": 2.987330198287964, "step": 10732, "token_acc": 0.29802851438739636 }, { "epoch": 6.291410143652888, "grad_norm": 0.21629953656711082, "learning_rate": 0.0004102675245806253, "loss": 2.9833006858825684, "step": 10733, "token_acc": 0.3003798509313527 }, { "epoch": 6.291996481970097, "grad_norm": 0.15273965632863779, "learning_rate": 0.00041024892756661325, "loss": 2.9671432971954346, "step": 10734, "token_acc": 0.3008568336622318 }, { "epoch": 6.292582820287306, "grad_norm": 0.20384874288048652, "learning_rate": 0.0004102303290472784, "loss": 2.973121166229248, "step": 10735, "token_acc": 0.30207967149138226 }, { "epoch": 6.293169158604515, "grad_norm": 0.16719179953947944, "learning_rate": 0.00041021172902279553, "loss": 2.938772678375244, "step": 10736, "token_acc": 0.3071587031384009 }, { "epoch": 6.293755496921724, "grad_norm": 0.2056027587014875, "learning_rate": 0.0004101931274933394, "loss": 2.9587855339050293, "step": 10737, "token_acc": 0.30437942797166095 }, { "epoch": 6.294341835238933, "grad_norm": 0.19024847996486047, "learning_rate": 0.00041017452445908463, "loss": 2.9432830810546875, "step": 10738, "token_acc": 0.3062739482972946 }, { "epoch": 6.2949281735561415, "grad_norm": 0.18658931745565918, "learning_rate": 0.00041015591992020614, "loss": 2.933835029602051, "step": 10739, "token_acc": 0.3055280905892883 }, { "epoch": 6.295514511873351, "grad_norm": 0.21594335753156196, "learning_rate": 0.00041013731387687857, "loss": 2.98113751411438, "step": 10740, "token_acc": 0.3006611882246285 }, { "epoch": 6.29610085019056, "grad_norm": 0.1716717836494016, "learning_rate": 0.0004101187063292766, "loss": 2.9529056549072266, "step": 10741, "token_acc": 0.30346752739282407 }, { "epoch": 6.296687188507769, "grad_norm": 0.2432535300491896, "learning_rate": 0.00041010009727757526, "loss": 2.987607955932617, "step": 10742, "token_acc": 0.3003444897278479 }, { "epoch": 6.297273526824978, "grad_norm": 0.16032287542900392, "learning_rate": 0.0004100814867219492, "loss": 2.9829154014587402, "step": 10743, "token_acc": 0.3025430167358644 }, { "epoch": 6.297859865142187, "grad_norm": 0.294085684663191, "learning_rate": 0.00041006287466257337, "loss": 2.992262601852417, "step": 10744, "token_acc": 0.29858235432450947 }, { "epoch": 6.298446203459396, "grad_norm": 0.1593908889941105, "learning_rate": 0.0004100442610996224, "loss": 2.9359092712402344, "step": 10745, "token_acc": 0.3088082039767249 }, { "epoch": 6.299032541776605, "grad_norm": 0.2248581368980056, "learning_rate": 0.0004100256460332713, "loss": 2.978719711303711, "step": 10746, "token_acc": 0.3001815203897472 }, { "epoch": 6.299618880093814, "grad_norm": 0.1814648435840269, "learning_rate": 0.00041000702946369493, "loss": 2.9123282432556152, "step": 10747, "token_acc": 0.3106841949447507 }, { "epoch": 6.3002052184110235, "grad_norm": 0.22385824081681255, "learning_rate": 0.00040998841139106815, "loss": 2.9500255584716797, "step": 10748, "token_acc": 0.3046547691013746 }, { "epoch": 6.300791556728232, "grad_norm": 0.1639904790435948, "learning_rate": 0.0004099697918155658, "loss": 2.966064453125, "step": 10749, "token_acc": 0.30261989688634133 }, { "epoch": 6.301377895045441, "grad_norm": 0.1943748910450793, "learning_rate": 0.0004099511707373628, "loss": 2.9536399841308594, "step": 10750, "token_acc": 0.3056278753174933 }, { "epoch": 6.30196423336265, "grad_norm": 0.1649119386355125, "learning_rate": 0.0004099325481566341, "loss": 2.944915771484375, "step": 10751, "token_acc": 0.30660730261540153 }, { "epoch": 6.302550571679859, "grad_norm": 0.2000970836482914, "learning_rate": 0.0004099139240735546, "loss": 2.96683406829834, "step": 10752, "token_acc": 0.3023114666775932 }, { "epoch": 6.303136909997068, "grad_norm": 0.1568036186972172, "learning_rate": 0.0004098952984882993, "loss": 2.9767005443573, "step": 10753, "token_acc": 0.30112432190068533 }, { "epoch": 6.303723248314277, "grad_norm": 0.1849768311965063, "learning_rate": 0.0004098766714010431, "loss": 2.910928249359131, "step": 10754, "token_acc": 0.31102051305388323 }, { "epoch": 6.304309586631486, "grad_norm": 0.160824079004354, "learning_rate": 0.00040985804281196104, "loss": 2.996994972229004, "step": 10755, "token_acc": 0.29731959309805167 }, { "epoch": 6.3048959249486956, "grad_norm": 0.16811320855697715, "learning_rate": 0.000409839412721228, "loss": 2.9906435012817383, "step": 10756, "token_acc": 0.30185384569139373 }, { "epoch": 6.305482263265905, "grad_norm": 0.15207529638282197, "learning_rate": 0.0004098207811290192, "loss": 2.9579620361328125, "step": 10757, "token_acc": 0.3035878659739322 }, { "epoch": 6.306068601583114, "grad_norm": 0.19288553176804735, "learning_rate": 0.0004098021480355095, "loss": 2.99324893951416, "step": 10758, "token_acc": 0.2994201955947556 }, { "epoch": 6.306654939900323, "grad_norm": 0.20553617397124965, "learning_rate": 0.0004097835134408739, "loss": 2.9699487686157227, "step": 10759, "token_acc": 0.30162164960461774 }, { "epoch": 6.307241278217531, "grad_norm": 0.15684226834572326, "learning_rate": 0.0004097648773452876, "loss": 2.9380526542663574, "step": 10760, "token_acc": 0.30702303521597507 }, { "epoch": 6.30782761653474, "grad_norm": 0.15898458431356147, "learning_rate": 0.0004097462397489253, "loss": 3.0039005279541016, "step": 10761, "token_acc": 0.29803386889360584 }, { "epoch": 6.308413954851949, "grad_norm": 0.17580300484744313, "learning_rate": 0.00040972760065196255, "loss": 2.9903974533081055, "step": 10762, "token_acc": 0.2973094996536004 }, { "epoch": 6.3090002931691584, "grad_norm": 0.25844011839613906, "learning_rate": 0.00040970896005457425, "loss": 2.925718307495117, "step": 10763, "token_acc": 0.3072864578018165 }, { "epoch": 6.309586631486368, "grad_norm": 0.2181514226615147, "learning_rate": 0.0004096903179569354, "loss": 2.9795279502868652, "step": 10764, "token_acc": 0.2998364532948829 }, { "epoch": 6.310172969803577, "grad_norm": 0.16606603364458433, "learning_rate": 0.00040967167435922126, "loss": 2.9574387073516846, "step": 10765, "token_acc": 0.30466992445521823 }, { "epoch": 6.310759308120786, "grad_norm": 0.17189873451296944, "learning_rate": 0.0004096530292616068, "loss": 2.946988105773926, "step": 10766, "token_acc": 0.30562167909265414 }, { "epoch": 6.311345646437995, "grad_norm": 0.1541542443566039, "learning_rate": 0.0004096343826642673, "loss": 2.9323983192443848, "step": 10767, "token_acc": 0.3067842168624684 }, { "epoch": 6.311931984755204, "grad_norm": 0.16921138473562003, "learning_rate": 0.000409615734567378, "loss": 2.953695774078369, "step": 10768, "token_acc": 0.30343248952813 }, { "epoch": 6.312518323072413, "grad_norm": 0.16116794866834921, "learning_rate": 0.0004095970849711138, "loss": 2.958869457244873, "step": 10769, "token_acc": 0.30621989926436816 }, { "epoch": 6.313104661389621, "grad_norm": 0.16758321234128207, "learning_rate": 0.00040957843387565016, "loss": 2.9617886543273926, "step": 10770, "token_acc": 0.30238406253140054 }, { "epoch": 6.3136909997068305, "grad_norm": 0.1783437112168568, "learning_rate": 0.0004095597812811621, "loss": 2.952045202255249, "step": 10771, "token_acc": 0.30430610216865195 }, { "epoch": 6.31427733802404, "grad_norm": 0.15884957519110054, "learning_rate": 0.0004095411271878249, "loss": 2.9516546726226807, "step": 10772, "token_acc": 0.305449233951361 }, { "epoch": 6.314863676341249, "grad_norm": 0.15640376890595595, "learning_rate": 0.00040952247159581383, "loss": 2.9838786125183105, "step": 10773, "token_acc": 0.2995808277929284 }, { "epoch": 6.315450014658458, "grad_norm": 0.1877203457120048, "learning_rate": 0.00040950381450530416, "loss": 2.944627285003662, "step": 10774, "token_acc": 0.30475273216431986 }, { "epoch": 6.316036352975667, "grad_norm": 0.1810460021781974, "learning_rate": 0.00040948515591647094, "loss": 3.0127995014190674, "step": 10775, "token_acc": 0.2967367517250767 }, { "epoch": 6.316622691292876, "grad_norm": 0.17636068801339932, "learning_rate": 0.0004094664958294897, "loss": 2.928328514099121, "step": 10776, "token_acc": 0.30906427956368254 }, { "epoch": 6.317209029610085, "grad_norm": 0.17743338094801225, "learning_rate": 0.00040944783424453555, "loss": 2.993743896484375, "step": 10777, "token_acc": 0.29896734306298123 }, { "epoch": 6.317795367927294, "grad_norm": 0.1611740609120004, "learning_rate": 0.0004094291711617839, "loss": 2.9498472213745117, "step": 10778, "token_acc": 0.30496136786378597 }, { "epoch": 6.318381706244503, "grad_norm": 0.25534089475162786, "learning_rate": 0.00040941050658141004, "loss": 2.963923692703247, "step": 10779, "token_acc": 0.3022430900817393 }, { "epoch": 6.3189680445617125, "grad_norm": 0.21271098988981793, "learning_rate": 0.00040939184050358925, "loss": 2.9459118843078613, "step": 10780, "token_acc": 0.3060134207182 }, { "epoch": 6.319554382878922, "grad_norm": 0.21128522894272903, "learning_rate": 0.00040937317292849687, "loss": 2.965941905975342, "step": 10781, "token_acc": 0.3023571986021842 }, { "epoch": 6.32014072119613, "grad_norm": 0.21093245312582917, "learning_rate": 0.00040935450385630836, "loss": 2.956766128540039, "step": 10782, "token_acc": 0.30266745190818767 }, { "epoch": 6.320727059513339, "grad_norm": 0.2275620704213413, "learning_rate": 0.000409335833287199, "loss": 2.9423980712890625, "step": 10783, "token_acc": 0.306085447433278 }, { "epoch": 6.321313397830548, "grad_norm": 0.2927460585484521, "learning_rate": 0.0004093171612213441, "loss": 2.919191360473633, "step": 10784, "token_acc": 0.3102671628824848 }, { "epoch": 6.321899736147757, "grad_norm": 0.1822316931657284, "learning_rate": 0.00040929848765891926, "loss": 3.0211868286132812, "step": 10785, "token_acc": 0.2941877710641926 }, { "epoch": 6.322486074464966, "grad_norm": 0.27725910245655755, "learning_rate": 0.0004092798126000997, "loss": 2.9302666187286377, "step": 10786, "token_acc": 0.3082778714427775 }, { "epoch": 6.323072412782175, "grad_norm": 0.18790672437481956, "learning_rate": 0.000409261136045061, "loss": 2.978792905807495, "step": 10787, "token_acc": 0.30181798695122114 }, { "epoch": 6.3236587510993845, "grad_norm": 0.2329379330505155, "learning_rate": 0.0004092424579939785, "loss": 2.9792137145996094, "step": 10788, "token_acc": 0.3018024509489955 }, { "epoch": 6.324245089416594, "grad_norm": 0.1906342970821725, "learning_rate": 0.0004092237784470277, "loss": 2.944913864135742, "step": 10789, "token_acc": 0.307067015681557 }, { "epoch": 6.324831427733803, "grad_norm": 0.24182390121189637, "learning_rate": 0.0004092050974043841, "loss": 2.964467763900757, "step": 10790, "token_acc": 0.3020452356201916 }, { "epoch": 6.325417766051012, "grad_norm": 0.18254085837019518, "learning_rate": 0.0004091864148662231, "loss": 2.9857306480407715, "step": 10791, "token_acc": 0.29925223483313756 }, { "epoch": 6.32600410436822, "grad_norm": 0.21948705277912348, "learning_rate": 0.0004091677308327203, "loss": 2.978860378265381, "step": 10792, "token_acc": 0.30232393868757507 }, { "epoch": 6.326590442685429, "grad_norm": 0.1741871495638311, "learning_rate": 0.0004091490453040511, "loss": 2.9424777030944824, "step": 10793, "token_acc": 0.3054735118897828 }, { "epoch": 6.327176781002638, "grad_norm": 0.20933469060022383, "learning_rate": 0.00040913035828039104, "loss": 2.9874415397644043, "step": 10794, "token_acc": 0.2989740478953476 }, { "epoch": 6.327763119319847, "grad_norm": 0.1822761139502352, "learning_rate": 0.0004091116697619157, "loss": 2.967315196990967, "step": 10795, "token_acc": 0.30227283604630756 }, { "epoch": 6.3283494576370565, "grad_norm": 0.17049090060171704, "learning_rate": 0.00040909297974880065, "loss": 2.94614839553833, "step": 10796, "token_acc": 0.3047003139790993 }, { "epoch": 6.328935795954266, "grad_norm": 0.17771488769610239, "learning_rate": 0.0004090742882412215, "loss": 2.950713634490967, "step": 10797, "token_acc": 0.3043460049269208 }, { "epoch": 6.329522134271475, "grad_norm": 0.17986301762646423, "learning_rate": 0.00040905559523935365, "loss": 2.962371826171875, "step": 10798, "token_acc": 0.3024503940871592 }, { "epoch": 6.330108472588684, "grad_norm": 0.19717726009794678, "learning_rate": 0.0004090369007433729, "loss": 2.9636647701263428, "step": 10799, "token_acc": 0.30354625597003937 }, { "epoch": 6.330694810905893, "grad_norm": 0.18564921448974336, "learning_rate": 0.00040901820475345473, "loss": 2.923710346221924, "step": 10800, "token_acc": 0.3088022216855832 }, { "epoch": 6.331281149223102, "grad_norm": 0.17776704863023265, "learning_rate": 0.00040899950726977486, "loss": 2.9480600357055664, "step": 10801, "token_acc": 0.30531636190425787 }, { "epoch": 6.331867487540311, "grad_norm": 0.1734457013896376, "learning_rate": 0.00040898080829250886, "loss": 2.974599838256836, "step": 10802, "token_acc": 0.30180354435673584 }, { "epoch": 6.33245382585752, "grad_norm": 0.15816398986472657, "learning_rate": 0.0004089621078218323, "loss": 2.9576454162597656, "step": 10803, "token_acc": 0.303144851639038 }, { "epoch": 6.3330401641747285, "grad_norm": 0.15317147080163063, "learning_rate": 0.000408943405857921, "loss": 2.9852488040924072, "step": 10804, "token_acc": 0.2987735754478596 }, { "epoch": 6.333626502491938, "grad_norm": 0.15577659438689084, "learning_rate": 0.00040892470240095057, "loss": 2.9559624195098877, "step": 10805, "token_acc": 0.30403510183111787 }, { "epoch": 6.334212840809147, "grad_norm": 0.15648671109347695, "learning_rate": 0.0004089059974510968, "loss": 2.9770126342773438, "step": 10806, "token_acc": 0.30069942778810504 }, { "epoch": 6.334799179126356, "grad_norm": 0.18132125651048198, "learning_rate": 0.0004088872910085352, "loss": 2.956864595413208, "step": 10807, "token_acc": 0.30368590977451326 }, { "epoch": 6.335385517443565, "grad_norm": 0.19737683119097826, "learning_rate": 0.00040886858307344164, "loss": 2.9990720748901367, "step": 10808, "token_acc": 0.29925698180886495 }, { "epoch": 6.335971855760774, "grad_norm": 0.1546178639370173, "learning_rate": 0.0004088498736459918, "loss": 2.935983180999756, "step": 10809, "token_acc": 0.3057188953886586 }, { "epoch": 6.336558194077983, "grad_norm": 0.20306841301271356, "learning_rate": 0.0004088311627263615, "loss": 2.9671216011047363, "step": 10810, "token_acc": 0.3014625050085788 }, { "epoch": 6.337144532395192, "grad_norm": 0.14823643827571703, "learning_rate": 0.00040881245031472647, "loss": 2.935427665710449, "step": 10811, "token_acc": 0.3076261662702872 }, { "epoch": 6.337730870712401, "grad_norm": 0.18450799358697834, "learning_rate": 0.00040879373641126244, "loss": 2.957122325897217, "step": 10812, "token_acc": 0.30556591827405566 }, { "epoch": 6.3383172090296105, "grad_norm": 0.19014421271930487, "learning_rate": 0.0004087750210161452, "loss": 2.9455313682556152, "step": 10813, "token_acc": 0.3043840184798358 }, { "epoch": 6.338903547346819, "grad_norm": 0.1923612830543601, "learning_rate": 0.0004087563041295506, "loss": 2.9569859504699707, "step": 10814, "token_acc": 0.30666536869231786 }, { "epoch": 6.339489885664028, "grad_norm": 0.28448137683699515, "learning_rate": 0.0004087375857516545, "loss": 2.96341609954834, "step": 10815, "token_acc": 0.3042254049391051 }, { "epoch": 6.340076223981237, "grad_norm": 0.18397853657627436, "learning_rate": 0.0004087188658826326, "loss": 2.9583353996276855, "step": 10816, "token_acc": 0.30391012862691713 }, { "epoch": 6.340662562298446, "grad_norm": 0.2942493319439405, "learning_rate": 0.0004087001445226609, "loss": 2.974437952041626, "step": 10817, "token_acc": 0.3010475423045931 }, { "epoch": 6.341248900615655, "grad_norm": 0.17362676824394588, "learning_rate": 0.00040868142167191525, "loss": 2.936406373977661, "step": 10818, "token_acc": 0.3073456145232333 }, { "epoch": 6.341835238932864, "grad_norm": 0.2838777418632287, "learning_rate": 0.0004086626973305714, "loss": 2.9297938346862793, "step": 10819, "token_acc": 0.30684104358262054 }, { "epoch": 6.342421577250073, "grad_norm": 0.190440661248757, "learning_rate": 0.00040864397149880527, "loss": 3.0005035400390625, "step": 10820, "token_acc": 0.29801646126111464 }, { "epoch": 6.3430079155672825, "grad_norm": 0.2651197462610228, "learning_rate": 0.0004086252441767928, "loss": 2.974175214767456, "step": 10821, "token_acc": 0.30242416790897586 }, { "epoch": 6.343594253884492, "grad_norm": 0.2036625697360712, "learning_rate": 0.00040860651536471, "loss": 2.9158129692077637, "step": 10822, "token_acc": 0.3097171734847822 }, { "epoch": 6.344180592201701, "grad_norm": 0.21847400739862124, "learning_rate": 0.0004085877850627326, "loss": 2.9157142639160156, "step": 10823, "token_acc": 0.3093898545219346 }, { "epoch": 6.34476693051891, "grad_norm": 0.16971870620343465, "learning_rate": 0.0004085690532710368, "loss": 2.9479660987854004, "step": 10824, "token_acc": 0.30397138445920224 }, { "epoch": 6.345353268836118, "grad_norm": 0.23650637259409174, "learning_rate": 0.0004085503199897983, "loss": 2.945699691772461, "step": 10825, "token_acc": 0.3046584130669763 }, { "epoch": 6.345939607153327, "grad_norm": 0.15974963529213068, "learning_rate": 0.00040853158521919326, "loss": 2.938174247741699, "step": 10826, "token_acc": 0.3070989762851862 }, { "epoch": 6.346525945470536, "grad_norm": 0.23098417142956673, "learning_rate": 0.00040851284895939755, "loss": 2.9526498317718506, "step": 10827, "token_acc": 0.30498470783924453 }, { "epoch": 6.347112283787745, "grad_norm": 0.1847132406253998, "learning_rate": 0.00040849411121058726, "loss": 2.9610795974731445, "step": 10828, "token_acc": 0.303367884830593 }, { "epoch": 6.3476986221049545, "grad_norm": 0.20438673346664957, "learning_rate": 0.00040847537197293834, "loss": 2.970602035522461, "step": 10829, "token_acc": 0.30247527423818443 }, { "epoch": 6.348284960422164, "grad_norm": 0.1735034025965014, "learning_rate": 0.0004084566312466269, "loss": 2.9392409324645996, "step": 10830, "token_acc": 0.30602609500111655 }, { "epoch": 6.348871298739373, "grad_norm": 0.169675159898126, "learning_rate": 0.0004084378890318289, "loss": 2.9660706520080566, "step": 10831, "token_acc": 0.30317148614480255 }, { "epoch": 6.349457637056582, "grad_norm": 0.17645184364705807, "learning_rate": 0.0004084191453287204, "loss": 2.962535858154297, "step": 10832, "token_acc": 0.30366989433908015 }, { "epoch": 6.350043975373791, "grad_norm": 0.1638831989949693, "learning_rate": 0.0004084004001374775, "loss": 2.9805829524993896, "step": 10833, "token_acc": 0.30172266802374537 }, { "epoch": 6.350630313691, "grad_norm": 0.1684463481529548, "learning_rate": 0.00040838165345827635, "loss": 2.986117362976074, "step": 10834, "token_acc": 0.2994642451875142 }, { "epoch": 6.351216652008208, "grad_norm": 0.1660768017385439, "learning_rate": 0.00040836290529129294, "loss": 2.9212846755981445, "step": 10835, "token_acc": 0.3096131433128182 }, { "epoch": 6.351802990325417, "grad_norm": 0.17403147724622514, "learning_rate": 0.00040834415563670344, "loss": 2.953073024749756, "step": 10836, "token_acc": 0.30453788885591526 }, { "epoch": 6.3523893286426265, "grad_norm": 0.16854161116773753, "learning_rate": 0.000408325404494684, "loss": 2.9603171348571777, "step": 10837, "token_acc": 0.30508944847669206 }, { "epoch": 6.352975666959836, "grad_norm": 0.16632996679032383, "learning_rate": 0.0004083066518654107, "loss": 2.959322690963745, "step": 10838, "token_acc": 0.30260860376959753 }, { "epoch": 6.353562005277045, "grad_norm": 0.15416111578109223, "learning_rate": 0.00040828789774905973, "loss": 2.9420764446258545, "step": 10839, "token_acc": 0.30680719345819935 }, { "epoch": 6.354148343594254, "grad_norm": 0.18643884886268097, "learning_rate": 0.00040826914214580723, "loss": 2.9215569496154785, "step": 10840, "token_acc": 0.30880543400169813 }, { "epoch": 6.354734681911463, "grad_norm": 0.15989325044506605, "learning_rate": 0.00040825038505582943, "loss": 2.9394874572753906, "step": 10841, "token_acc": 0.3070223503965393 }, { "epoch": 6.355321020228672, "grad_norm": 0.18561601282956042, "learning_rate": 0.00040823162647930257, "loss": 2.9898955821990967, "step": 10842, "token_acc": 0.2988686151775287 }, { "epoch": 6.355907358545881, "grad_norm": 0.15419911008065623, "learning_rate": 0.00040821286641640273, "loss": 2.936950445175171, "step": 10843, "token_acc": 0.30715273995982106 }, { "epoch": 6.35649369686309, "grad_norm": 0.15529703422324756, "learning_rate": 0.00040819410486730626, "loss": 2.9549407958984375, "step": 10844, "token_acc": 0.30446500412727096 }, { "epoch": 6.357080035180299, "grad_norm": 0.15018961002272643, "learning_rate": 0.0004081753418321893, "loss": 2.9974050521850586, "step": 10845, "token_acc": 0.2985105271501974 }, { "epoch": 6.3576663734975085, "grad_norm": 0.16635531336148643, "learning_rate": 0.00040815657731122813, "loss": 2.942469358444214, "step": 10846, "token_acc": 0.3047651272779611 }, { "epoch": 6.358252711814717, "grad_norm": 0.188006068034605, "learning_rate": 0.00040813781130459906, "loss": 2.989856004714966, "step": 10847, "token_acc": 0.2989411634028461 }, { "epoch": 6.358839050131926, "grad_norm": 0.14892789592477348, "learning_rate": 0.0004081190438124783, "loss": 2.961075782775879, "step": 10848, "token_acc": 0.30411342417198556 }, { "epoch": 6.359425388449135, "grad_norm": 0.18338071795135955, "learning_rate": 0.00040810027483504226, "loss": 2.936551570892334, "step": 10849, "token_acc": 0.3042010289289315 }, { "epoch": 6.360011726766344, "grad_norm": 0.17708582958768235, "learning_rate": 0.00040808150437246716, "loss": 2.945286273956299, "step": 10850, "token_acc": 0.30593272234466684 }, { "epoch": 6.360598065083553, "grad_norm": 0.16148340883805717, "learning_rate": 0.00040806273242492935, "loss": 2.996377468109131, "step": 10851, "token_acc": 0.30021760875117054 }, { "epoch": 6.361184403400762, "grad_norm": 0.19331688261037655, "learning_rate": 0.00040804395899260513, "loss": 2.9838149547576904, "step": 10852, "token_acc": 0.3001470534932972 }, { "epoch": 6.361770741717971, "grad_norm": 0.17344990666641738, "learning_rate": 0.0004080251840756709, "loss": 2.957798957824707, "step": 10853, "token_acc": 0.3030363135805016 }, { "epoch": 6.3623570800351805, "grad_norm": 0.1570562756866253, "learning_rate": 0.000408006407674303, "loss": 2.986405849456787, "step": 10854, "token_acc": 0.29971788671502897 }, { "epoch": 6.36294341835239, "grad_norm": 0.17383835423386193, "learning_rate": 0.00040798762978867787, "loss": 2.9317831993103027, "step": 10855, "token_acc": 0.30786995146273544 }, { "epoch": 6.363529756669599, "grad_norm": 0.15056905926299197, "learning_rate": 0.0004079688504189718, "loss": 2.944356918334961, "step": 10856, "token_acc": 0.3055736354483418 }, { "epoch": 6.364116094986807, "grad_norm": 0.18679124920399348, "learning_rate": 0.00040795006956536125, "loss": 2.9912161827087402, "step": 10857, "token_acc": 0.29985641122564965 }, { "epoch": 6.364702433304016, "grad_norm": 0.21837390433273712, "learning_rate": 0.00040793128722802267, "loss": 2.990884304046631, "step": 10858, "token_acc": 0.2994574378423936 }, { "epoch": 6.365288771621225, "grad_norm": 0.16203994532035224, "learning_rate": 0.00040791250340713235, "loss": 2.9752655029296875, "step": 10859, "token_acc": 0.30146775015310506 }, { "epoch": 6.365875109938434, "grad_norm": 0.2068809153540409, "learning_rate": 0.000407893718102867, "loss": 2.933103322982788, "step": 10860, "token_acc": 0.30650429881073576 }, { "epoch": 6.366461448255643, "grad_norm": 0.32050532498582485, "learning_rate": 0.0004078749313154028, "loss": 2.973238945007324, "step": 10861, "token_acc": 0.3005563483028972 }, { "epoch": 6.3670477865728525, "grad_norm": 0.19336847793540174, "learning_rate": 0.00040785614304491647, "loss": 2.95566725730896, "step": 10862, "token_acc": 0.3036985276545339 }, { "epoch": 6.367634124890062, "grad_norm": 0.2630405707313785, "learning_rate": 0.00040783735329158433, "loss": 2.953232765197754, "step": 10863, "token_acc": 0.3042019102513968 }, { "epoch": 6.368220463207271, "grad_norm": 0.2316992143845289, "learning_rate": 0.0004078185620555829, "loss": 2.9680933952331543, "step": 10864, "token_acc": 0.30257090709883083 }, { "epoch": 6.36880680152448, "grad_norm": 0.1831685546451254, "learning_rate": 0.00040779976933708886, "loss": 2.942295551300049, "step": 10865, "token_acc": 0.3059243500330582 }, { "epoch": 6.369393139841689, "grad_norm": 0.1840300298439578, "learning_rate": 0.00040778097513627853, "loss": 2.9506733417510986, "step": 10866, "token_acc": 0.3060844126736971 }, { "epoch": 6.369979478158898, "grad_norm": 0.19895509226032182, "learning_rate": 0.0004077621794533286, "loss": 2.9362006187438965, "step": 10867, "token_acc": 0.30948858946736557 }, { "epoch": 6.370565816476106, "grad_norm": 0.17036374163993093, "learning_rate": 0.00040774338228841557, "loss": 2.987150192260742, "step": 10868, "token_acc": 0.29975110974264235 }, { "epoch": 6.371152154793315, "grad_norm": 0.19105024764681075, "learning_rate": 0.000407724583641716, "loss": 3.0011301040649414, "step": 10869, "token_acc": 0.29667671881640995 }, { "epoch": 6.3717384931105245, "grad_norm": 0.16941502393887453, "learning_rate": 0.0004077057835134065, "loss": 2.9660325050354004, "step": 10870, "token_acc": 0.3042255296685287 }, { "epoch": 6.372324831427734, "grad_norm": 0.20297351491449264, "learning_rate": 0.0004076869819036637, "loss": 2.9667439460754395, "step": 10871, "token_acc": 0.3017885362209658 }, { "epoch": 6.372911169744943, "grad_norm": 0.15621510987112286, "learning_rate": 0.00040766817881266425, "loss": 2.9813055992126465, "step": 10872, "token_acc": 0.3009944215377153 }, { "epoch": 6.373497508062152, "grad_norm": 0.1879396645837518, "learning_rate": 0.00040764937424058465, "loss": 2.9365572929382324, "step": 10873, "token_acc": 0.3072442572294036 }, { "epoch": 6.374083846379361, "grad_norm": 0.15944379709442696, "learning_rate": 0.0004076305681876016, "loss": 2.928072214126587, "step": 10874, "token_acc": 0.3082971599295034 }, { "epoch": 6.37467018469657, "grad_norm": 0.16772170958770188, "learning_rate": 0.0004076117606538918, "loss": 2.955548048019409, "step": 10875, "token_acc": 0.30468142036308593 }, { "epoch": 6.375256523013779, "grad_norm": 0.17314208271890746, "learning_rate": 0.000407592951639632, "loss": 2.9402618408203125, "step": 10876, "token_acc": 0.3062215143540105 }, { "epoch": 6.375842861330988, "grad_norm": 0.1467150302752035, "learning_rate": 0.0004075741411449986, "loss": 2.9789655208587646, "step": 10877, "token_acc": 0.2999671293838024 }, { "epoch": 6.3764291996481965, "grad_norm": 0.1695742513699787, "learning_rate": 0.0004075553291701685, "loss": 2.918882131576538, "step": 10878, "token_acc": 0.31006785437969175 }, { "epoch": 6.377015537965406, "grad_norm": 0.15877419773961907, "learning_rate": 0.00040753651571531844, "loss": 2.98256254196167, "step": 10879, "token_acc": 0.30085511849499147 }, { "epoch": 6.377601876282615, "grad_norm": 0.15984131095840176, "learning_rate": 0.00040751770078062513, "loss": 2.9471163749694824, "step": 10880, "token_acc": 0.304554722982506 }, { "epoch": 6.378188214599824, "grad_norm": 0.16875222079120633, "learning_rate": 0.00040749888436626523, "loss": 2.935910224914551, "step": 10881, "token_acc": 0.30772079292530397 }, { "epoch": 6.378774552917033, "grad_norm": 0.16437580296060417, "learning_rate": 0.0004074800664724155, "loss": 2.967566728591919, "step": 10882, "token_acc": 0.3023087170028455 }, { "epoch": 6.379360891234242, "grad_norm": 0.1684293100389546, "learning_rate": 0.00040746124709925286, "loss": 2.9030261039733887, "step": 10883, "token_acc": 0.31215960790906916 }, { "epoch": 6.379947229551451, "grad_norm": 0.16529405148219053, "learning_rate": 0.00040744242624695396, "loss": 2.946683883666992, "step": 10884, "token_acc": 0.3055111758736437 }, { "epoch": 6.38053356786866, "grad_norm": 0.1709058387925002, "learning_rate": 0.0004074236039156956, "loss": 2.970705509185791, "step": 10885, "token_acc": 0.3017043384600763 }, { "epoch": 6.381119906185869, "grad_norm": 0.17452504406676456, "learning_rate": 0.00040740478010565465, "loss": 2.9911231994628906, "step": 10886, "token_acc": 0.2963657300666984 }, { "epoch": 6.3817062445030786, "grad_norm": 0.2104857927652964, "learning_rate": 0.0004073859548170078, "loss": 2.989306926727295, "step": 10887, "token_acc": 0.30027754469760537 }, { "epoch": 6.382292582820288, "grad_norm": 0.1814747431436101, "learning_rate": 0.00040736712804993204, "loss": 2.985593795776367, "step": 10888, "token_acc": 0.299658376153076 }, { "epoch": 6.382878921137497, "grad_norm": 0.14911102478475433, "learning_rate": 0.00040734829980460413, "loss": 2.991454601287842, "step": 10889, "token_acc": 0.2997452912567272 }, { "epoch": 6.383465259454705, "grad_norm": 0.20297154006380874, "learning_rate": 0.000407329470081201, "loss": 2.9836232662200928, "step": 10890, "token_acc": 0.29891252453450745 }, { "epoch": 6.384051597771914, "grad_norm": 0.21397312152753128, "learning_rate": 0.00040731063887989953, "loss": 2.9542083740234375, "step": 10891, "token_acc": 0.30463627606351573 }, { "epoch": 6.384637936089123, "grad_norm": 0.16555912180259255, "learning_rate": 0.00040729180620087657, "loss": 2.9222214221954346, "step": 10892, "token_acc": 0.3094107805857626 }, { "epoch": 6.385224274406332, "grad_norm": 0.16306872109829346, "learning_rate": 0.000407272972044309, "loss": 2.9782533645629883, "step": 10893, "token_acc": 0.3021652426164514 }, { "epoch": 6.3858106127235414, "grad_norm": 0.15796602484119113, "learning_rate": 0.0004072541364103739, "loss": 2.9529407024383545, "step": 10894, "token_acc": 0.30396469929966086 }, { "epoch": 6.386396951040751, "grad_norm": 0.154041998245844, "learning_rate": 0.000407235299299248, "loss": 2.9794280529022217, "step": 10895, "token_acc": 0.3015317582744716 }, { "epoch": 6.38698328935796, "grad_norm": 0.17211627880276303, "learning_rate": 0.0004072164607111084, "loss": 2.926893711090088, "step": 10896, "token_acc": 0.30857590855061834 }, { "epoch": 6.387569627675169, "grad_norm": 0.22738903255385115, "learning_rate": 0.0004071976206461319, "loss": 2.9317312240600586, "step": 10897, "token_acc": 0.307181005018549 }, { "epoch": 6.388155965992378, "grad_norm": 0.23456960120684114, "learning_rate": 0.0004071787791044956, "loss": 2.9224226474761963, "step": 10898, "token_acc": 0.30805666888141114 }, { "epoch": 6.388742304309587, "grad_norm": 0.17534350646128327, "learning_rate": 0.0004071599360863766, "loss": 3.009730339050293, "step": 10899, "token_acc": 0.29538303588758696 }, { "epoch": 6.389328642626795, "grad_norm": 0.18890614746666187, "learning_rate": 0.00040714109159195166, "loss": 2.9382848739624023, "step": 10900, "token_acc": 0.30688043808013626 }, { "epoch": 6.389914980944004, "grad_norm": 0.19766894672509425, "learning_rate": 0.000407122245621398, "loss": 2.97365665435791, "step": 10901, "token_acc": 0.30228476812770083 }, { "epoch": 6.3905013192612135, "grad_norm": 0.16379061696352967, "learning_rate": 0.0004071033981748925, "loss": 2.9871082305908203, "step": 10902, "token_acc": 0.30101765436433287 }, { "epoch": 6.391087657578423, "grad_norm": 0.22988086638369795, "learning_rate": 0.0004070845492526123, "loss": 2.967031717300415, "step": 10903, "token_acc": 0.30156521649009077 }, { "epoch": 6.391673995895632, "grad_norm": 0.17542942578296394, "learning_rate": 0.0004070656988547344, "loss": 2.9390015602111816, "step": 10904, "token_acc": 0.30510544447077265 }, { "epoch": 6.392260334212841, "grad_norm": 0.20382663067529233, "learning_rate": 0.0004070468469814359, "loss": 2.949906826019287, "step": 10905, "token_acc": 0.30340397330257945 }, { "epoch": 6.39284667253005, "grad_norm": 0.2383607438058896, "learning_rate": 0.000407027993632894, "loss": 2.9648690223693848, "step": 10906, "token_acc": 0.30386580226367604 }, { "epoch": 6.393433010847259, "grad_norm": 0.16733988996217825, "learning_rate": 0.0004070091388092856, "loss": 2.972419023513794, "step": 10907, "token_acc": 0.30139712408051705 }, { "epoch": 6.394019349164468, "grad_norm": 0.18227419031116351, "learning_rate": 0.0004069902825107879, "loss": 2.986868381500244, "step": 10908, "token_acc": 0.29885926462111767 }, { "epoch": 6.394605687481677, "grad_norm": 0.15174184916484817, "learning_rate": 0.00040697142473757807, "loss": 2.949087619781494, "step": 10909, "token_acc": 0.3040002074661895 }, { "epoch": 6.395192025798886, "grad_norm": 0.1778677908854828, "learning_rate": 0.00040695256548983327, "loss": 2.983928918838501, "step": 10910, "token_acc": 0.3002261029559676 }, { "epoch": 6.395778364116095, "grad_norm": 0.1623496587079988, "learning_rate": 0.0004069337047677306, "loss": 2.9465432167053223, "step": 10911, "token_acc": 0.3056307152530186 }, { "epoch": 6.396364702433304, "grad_norm": 0.17169051533787727, "learning_rate": 0.00040691484257144716, "loss": 2.9196062088012695, "step": 10912, "token_acc": 0.30980072039160855 }, { "epoch": 6.396951040750513, "grad_norm": 0.1568976140199779, "learning_rate": 0.0004068959789011603, "loss": 2.939314603805542, "step": 10913, "token_acc": 0.3070295960481577 }, { "epoch": 6.397537379067722, "grad_norm": 0.1821329309509476, "learning_rate": 0.00040687711375704717, "loss": 2.9934403896331787, "step": 10914, "token_acc": 0.2984685861494953 }, { "epoch": 6.398123717384931, "grad_norm": 0.14839242106965686, "learning_rate": 0.00040685824713928485, "loss": 2.9265429973602295, "step": 10915, "token_acc": 0.30866645958280564 }, { "epoch": 6.39871005570214, "grad_norm": 0.229685389060179, "learning_rate": 0.00040683937904805074, "loss": 2.941117763519287, "step": 10916, "token_acc": 0.30591567124551455 }, { "epoch": 6.399296394019349, "grad_norm": 0.16653251154956047, "learning_rate": 0.000406820509483522, "loss": 3.009489059448242, "step": 10917, "token_acc": 0.2982285655912683 }, { "epoch": 6.399882732336558, "grad_norm": 0.19949653974624404, "learning_rate": 0.00040680163844587585, "loss": 2.9745571613311768, "step": 10918, "token_acc": 0.3027926449373837 }, { "epoch": 6.4004690706537675, "grad_norm": 0.16132393619535174, "learning_rate": 0.00040678276593528965, "loss": 2.9241137504577637, "step": 10919, "token_acc": 0.3064816244494401 }, { "epoch": 6.401055408970977, "grad_norm": 0.1741910853088498, "learning_rate": 0.0004067638919519406, "loss": 3.0008418560028076, "step": 10920, "token_acc": 0.2981756309670101 }, { "epoch": 6.401641747288186, "grad_norm": 0.18905464535413485, "learning_rate": 0.00040674501649600603, "loss": 2.9460060596466064, "step": 10921, "token_acc": 0.3045412395679306 }, { "epoch": 6.402228085605394, "grad_norm": 0.1454600385789069, "learning_rate": 0.00040672613956766325, "loss": 2.9477379322052, "step": 10922, "token_acc": 0.308213197617134 }, { "epoch": 6.402814423922603, "grad_norm": 0.19005003823953123, "learning_rate": 0.00040670726116708955, "loss": 3.013434410095215, "step": 10923, "token_acc": 0.296986624989039 }, { "epoch": 6.403400762239812, "grad_norm": 0.15423088570831267, "learning_rate": 0.0004066883812944624, "loss": 2.9453978538513184, "step": 10924, "token_acc": 0.30603970572325 }, { "epoch": 6.403987100557021, "grad_norm": 0.21662421102239449, "learning_rate": 0.00040666949994995895, "loss": 2.9981839656829834, "step": 10925, "token_acc": 0.29735401889090424 }, { "epoch": 6.40457343887423, "grad_norm": 0.16439498280380213, "learning_rate": 0.00040665061713375674, "loss": 2.9333629608154297, "step": 10926, "token_acc": 0.3065291311408851 }, { "epoch": 6.4051597771914395, "grad_norm": 0.21215230691443124, "learning_rate": 0.00040663173284603295, "loss": 2.915843963623047, "step": 10927, "token_acc": 0.30945634850363024 }, { "epoch": 6.405746115508649, "grad_norm": 0.21730398422606226, "learning_rate": 0.00040661284708696523, "loss": 2.9760735034942627, "step": 10928, "token_acc": 0.30106388609537926 }, { "epoch": 6.406332453825858, "grad_norm": 0.15927186760527645, "learning_rate": 0.00040659395985673073, "loss": 2.921783208847046, "step": 10929, "token_acc": 0.30797585846372505 }, { "epoch": 6.406918792143067, "grad_norm": 0.1981744615992735, "learning_rate": 0.00040657507115550705, "loss": 2.961003541946411, "step": 10930, "token_acc": 0.30425831871851733 }, { "epoch": 6.407505130460276, "grad_norm": 0.16960941166030244, "learning_rate": 0.00040655618098347157, "loss": 3.0157952308654785, "step": 10931, "token_acc": 0.2977501045637128 }, { "epoch": 6.408091468777485, "grad_norm": 0.18251406087814726, "learning_rate": 0.0004065372893408017, "loss": 2.9727673530578613, "step": 10932, "token_acc": 0.30135214700581325 }, { "epoch": 6.408677807094693, "grad_norm": 0.16345760781767105, "learning_rate": 0.00040651839622767494, "loss": 2.9810328483581543, "step": 10933, "token_acc": 0.3022723214632291 }, { "epoch": 6.409264145411902, "grad_norm": 0.17920946174833166, "learning_rate": 0.0004064995016442689, "loss": 2.9735050201416016, "step": 10934, "token_acc": 0.30085968424959836 }, { "epoch": 6.4098504837291115, "grad_norm": 0.16417655011168159, "learning_rate": 0.0004064806055907607, "loss": 2.9547266960144043, "step": 10935, "token_acc": 0.30357299180034325 }, { "epoch": 6.410436822046321, "grad_norm": 0.17369072768422933, "learning_rate": 0.0004064617080673282, "loss": 2.9730939865112305, "step": 10936, "token_acc": 0.30244193357746374 }, { "epoch": 6.41102316036353, "grad_norm": 0.18921432437639116, "learning_rate": 0.0004064428090741488, "loss": 2.968780040740967, "step": 10937, "token_acc": 0.3027877837897582 }, { "epoch": 6.411609498680739, "grad_norm": 0.15011849277658057, "learning_rate": 0.00040642390861139987, "loss": 2.980011463165283, "step": 10938, "token_acc": 0.30059114654935387 }, { "epoch": 6.412195836997948, "grad_norm": 0.2621792390565829, "learning_rate": 0.0004064050066792593, "loss": 2.9537606239318848, "step": 10939, "token_acc": 0.3025850034580706 }, { "epoch": 6.412782175315157, "grad_norm": 0.2647163741710445, "learning_rate": 0.00040638610327790436, "loss": 2.95698881149292, "step": 10940, "token_acc": 0.3047461680387908 }, { "epoch": 6.413368513632366, "grad_norm": 0.1707620018382443, "learning_rate": 0.0004063671984075127, "loss": 2.993807077407837, "step": 10941, "token_acc": 0.2982854820728209 }, { "epoch": 6.413954851949575, "grad_norm": 0.27226692079271986, "learning_rate": 0.0004063482920682619, "loss": 2.974234104156494, "step": 10942, "token_acc": 0.3001864763043879 }, { "epoch": 6.4145411902667835, "grad_norm": 0.1744053772779599, "learning_rate": 0.0004063293842603296, "loss": 2.9970741271972656, "step": 10943, "token_acc": 0.29814119610689666 }, { "epoch": 6.415127528583993, "grad_norm": 0.21833086762629847, "learning_rate": 0.0004063104749838935, "loss": 2.954303741455078, "step": 10944, "token_acc": 0.30532259613683355 }, { "epoch": 6.415713866901202, "grad_norm": 0.17125161360615437, "learning_rate": 0.0004062915642391309, "loss": 2.9773969650268555, "step": 10945, "token_acc": 0.29978872935799594 }, { "epoch": 6.416300205218411, "grad_norm": 0.27661298536844026, "learning_rate": 0.0004062726520262198, "loss": 2.935421943664551, "step": 10946, "token_acc": 0.30808216791523463 }, { "epoch": 6.41688654353562, "grad_norm": 0.2658161484675333, "learning_rate": 0.00040625373834533775, "loss": 2.986954927444458, "step": 10947, "token_acc": 0.2984972081949322 }, { "epoch": 6.417472881852829, "grad_norm": 0.22428112038665238, "learning_rate": 0.00040623482319666226, "loss": 2.9350357055664062, "step": 10948, "token_acc": 0.30672836561873346 }, { "epoch": 6.418059220170038, "grad_norm": 0.1966022432563209, "learning_rate": 0.00040621590658037124, "loss": 2.8928277492523193, "step": 10949, "token_acc": 0.31273036204417726 }, { "epoch": 6.418645558487247, "grad_norm": 0.1971832014305048, "learning_rate": 0.0004061969884966423, "loss": 2.9627113342285156, "step": 10950, "token_acc": 0.30377342739074176 }, { "epoch": 6.419231896804456, "grad_norm": 0.16534826267074784, "learning_rate": 0.000406178068945653, "loss": 2.9686803817749023, "step": 10951, "token_acc": 0.3034993518742952 }, { "epoch": 6.4198182351216655, "grad_norm": 0.20937830104149904, "learning_rate": 0.0004061591479275813, "loss": 3.0157692432403564, "step": 10952, "token_acc": 0.29358285395787104 }, { "epoch": 6.420404573438875, "grad_norm": 0.14558590870342775, "learning_rate": 0.0004061402254426048, "loss": 2.9498801231384277, "step": 10953, "token_acc": 0.3057905756159119 }, { "epoch": 6.420990911756084, "grad_norm": 0.1906831222027915, "learning_rate": 0.0004061213014909013, "loss": 2.956395149230957, "step": 10954, "token_acc": 0.3042340152376142 }, { "epoch": 6.421577250073292, "grad_norm": 0.1587880618742563, "learning_rate": 0.00040610237607264854, "loss": 2.9648256301879883, "step": 10955, "token_acc": 0.3036884102740565 }, { "epoch": 6.422163588390501, "grad_norm": 0.17660925807762717, "learning_rate": 0.00040608344918802424, "loss": 2.917018175125122, "step": 10956, "token_acc": 0.3128428321729955 }, { "epoch": 6.42274992670771, "grad_norm": 0.1810069180786094, "learning_rate": 0.00040606452083720635, "loss": 2.98777437210083, "step": 10957, "token_acc": 0.2980536396859169 }, { "epoch": 6.423336265024919, "grad_norm": 0.20177902944406664, "learning_rate": 0.0004060455910203725, "loss": 2.946408271789551, "step": 10958, "token_acc": 0.30679166245820333 }, { "epoch": 6.423922603342128, "grad_norm": 0.16319900841920762, "learning_rate": 0.0004060266597377007, "loss": 2.963646411895752, "step": 10959, "token_acc": 0.30293928564921696 }, { "epoch": 6.4245089416593375, "grad_norm": 0.1788792863041922, "learning_rate": 0.00040600772698936867, "loss": 2.8948006629943848, "step": 10960, "token_acc": 0.31280953126772126 }, { "epoch": 6.425095279976547, "grad_norm": 0.1682483790250407, "learning_rate": 0.0004059887927755542, "loss": 2.96028995513916, "step": 10961, "token_acc": 0.30412131313492813 }, { "epoch": 6.425681618293756, "grad_norm": 0.19236229201662874, "learning_rate": 0.0004059698570964352, "loss": 2.9720747470855713, "step": 10962, "token_acc": 0.30084902747761655 }, { "epoch": 6.426267956610965, "grad_norm": 0.16062061909247952, "learning_rate": 0.0004059509199521897, "loss": 2.956996202468872, "step": 10963, "token_acc": 0.30439489817414794 }, { "epoch": 6.426854294928174, "grad_norm": 0.21864820679158495, "learning_rate": 0.00040593198134299536, "loss": 2.997164487838745, "step": 10964, "token_acc": 0.2979842758561754 }, { "epoch": 6.427440633245382, "grad_norm": 0.1560265522791331, "learning_rate": 0.0004059130412690302, "loss": 2.9648303985595703, "step": 10965, "token_acc": 0.30293301821927443 }, { "epoch": 6.428026971562591, "grad_norm": 0.21607294998910473, "learning_rate": 0.0004058940997304721, "loss": 3.006887435913086, "step": 10966, "token_acc": 0.2975354331899703 }, { "epoch": 6.4286133098798, "grad_norm": 0.17064828886511826, "learning_rate": 0.00040587515672749897, "loss": 2.965193271636963, "step": 10967, "token_acc": 0.3021501448216476 }, { "epoch": 6.4291996481970095, "grad_norm": 0.18106276263927537, "learning_rate": 0.0004058562122602888, "loss": 2.9416446685791016, "step": 10968, "token_acc": 0.3056566878079531 }, { "epoch": 6.429785986514219, "grad_norm": 0.16088293066732756, "learning_rate": 0.00040583726632901964, "loss": 2.978781223297119, "step": 10969, "token_acc": 0.30182337932969105 }, { "epoch": 6.430372324831428, "grad_norm": 0.2023570348559319, "learning_rate": 0.00040581831893386923, "loss": 2.9697704315185547, "step": 10970, "token_acc": 0.30196639181165114 }, { "epoch": 6.430958663148637, "grad_norm": 0.15160292167210002, "learning_rate": 0.00040579937007501564, "loss": 2.932023048400879, "step": 10971, "token_acc": 0.307907106622609 }, { "epoch": 6.431545001465846, "grad_norm": 0.18202856830457265, "learning_rate": 0.000405780419752637, "loss": 2.970111846923828, "step": 10972, "token_acc": 0.30331300304295705 }, { "epoch": 6.432131339783055, "grad_norm": 0.18304136976814175, "learning_rate": 0.0004057614679669113, "loss": 2.9986703395843506, "step": 10973, "token_acc": 0.29703468887775425 }, { "epoch": 6.432717678100264, "grad_norm": 0.16373862883180998, "learning_rate": 0.00040574251471801637, "loss": 2.960819721221924, "step": 10974, "token_acc": 0.3024405980718369 }, { "epoch": 6.433304016417473, "grad_norm": 0.17067745589540356, "learning_rate": 0.0004057235600061305, "loss": 2.9610447883605957, "step": 10975, "token_acc": 0.3004389404743231 }, { "epoch": 6.4338903547346815, "grad_norm": 0.16462512542468266, "learning_rate": 0.0004057046038314315, "loss": 2.9647059440612793, "step": 10976, "token_acc": 0.30300859789592527 }, { "epoch": 6.434476693051891, "grad_norm": 0.18076375674709722, "learning_rate": 0.00040568564619409766, "loss": 2.960850477218628, "step": 10977, "token_acc": 0.3028802663483618 }, { "epoch": 6.4350630313691, "grad_norm": 0.18544645809887664, "learning_rate": 0.00040566668709430685, "loss": 2.9548404216766357, "step": 10978, "token_acc": 0.3043620806844988 }, { "epoch": 6.435649369686309, "grad_norm": 0.155322214427238, "learning_rate": 0.0004056477265322374, "loss": 2.966245174407959, "step": 10979, "token_acc": 0.30023630675648477 }, { "epoch": 6.436235708003518, "grad_norm": 0.15957014881905904, "learning_rate": 0.0004056287645080673, "loss": 2.976929187774658, "step": 10980, "token_acc": 0.3001660496373539 }, { "epoch": 6.436822046320727, "grad_norm": 0.16763193404178797, "learning_rate": 0.0004056098010219745, "loss": 2.9870004653930664, "step": 10981, "token_acc": 0.29938123801097916 }, { "epoch": 6.437408384637936, "grad_norm": 0.14546282127208945, "learning_rate": 0.0004055908360741375, "loss": 2.9692420959472656, "step": 10982, "token_acc": 0.3008335111084022 }, { "epoch": 6.437994722955145, "grad_norm": 0.16861749167645715, "learning_rate": 0.0004055718696647342, "loss": 2.990567684173584, "step": 10983, "token_acc": 0.2990827229899327 }, { "epoch": 6.438581061272354, "grad_norm": 0.16006648482588084, "learning_rate": 0.0004055529017939428, "loss": 2.9518535137176514, "step": 10984, "token_acc": 0.3055904189385372 }, { "epoch": 6.4391673995895635, "grad_norm": 0.1805971278296849, "learning_rate": 0.0004055339324619415, "loss": 2.9713170528411865, "step": 10985, "token_acc": 0.30313592489027785 }, { "epoch": 6.439753737906772, "grad_norm": 0.16145350516167511, "learning_rate": 0.00040551496166890845, "loss": 2.9153270721435547, "step": 10986, "token_acc": 0.3091028676664036 }, { "epoch": 6.440340076223981, "grad_norm": 0.1779904315534651, "learning_rate": 0.00040549598941502194, "loss": 2.968998432159424, "step": 10987, "token_acc": 0.30381659171495284 }, { "epoch": 6.44092641454119, "grad_norm": 0.17565821801267537, "learning_rate": 0.00040547701570046015, "loss": 2.9445443153381348, "step": 10988, "token_acc": 0.30463646102083614 }, { "epoch": 6.441512752858399, "grad_norm": 0.16365530604727446, "learning_rate": 0.00040545804052540127, "loss": 2.9817442893981934, "step": 10989, "token_acc": 0.30108656919906834 }, { "epoch": 6.442099091175608, "grad_norm": 0.16998328359580273, "learning_rate": 0.0004054390638900236, "loss": 2.9653778076171875, "step": 10990, "token_acc": 0.3028326524448194 }, { "epoch": 6.442685429492817, "grad_norm": 0.2044879965170746, "learning_rate": 0.0004054200857945054, "loss": 2.9568662643432617, "step": 10991, "token_acc": 0.3029704795630699 }, { "epoch": 6.443271767810026, "grad_norm": 0.19857680427712052, "learning_rate": 0.00040540110623902485, "loss": 2.945683002471924, "step": 10992, "token_acc": 0.30467796823630966 }, { "epoch": 6.4438581061272355, "grad_norm": 0.15382924554057, "learning_rate": 0.0004053821252237604, "loss": 2.9848220348358154, "step": 10993, "token_acc": 0.3026869235380345 }, { "epoch": 6.444444444444445, "grad_norm": 0.17035612422243762, "learning_rate": 0.0004053631427488903, "loss": 2.9737548828125, "step": 10994, "token_acc": 0.3026927213865237 }, { "epoch": 6.445030782761654, "grad_norm": 0.20585445726308768, "learning_rate": 0.0004053441588145927, "loss": 2.9946248531341553, "step": 10995, "token_acc": 0.2990310370529521 }, { "epoch": 6.445617121078863, "grad_norm": 0.18421816371910196, "learning_rate": 0.00040532517342104613, "loss": 2.9479610919952393, "step": 10996, "token_acc": 0.30516617053903944 }, { "epoch": 6.446203459396072, "grad_norm": 0.15079240314429534, "learning_rate": 0.00040530618656842886, "loss": 2.939673662185669, "step": 10997, "token_acc": 0.30784405015326405 }, { "epoch": 6.44678979771328, "grad_norm": 0.1569719461174747, "learning_rate": 0.00040528719825691923, "loss": 2.968632698059082, "step": 10998, "token_acc": 0.3027323560126373 }, { "epoch": 6.447376136030489, "grad_norm": 0.14987361625202347, "learning_rate": 0.00040526820848669565, "loss": 2.9488604068756104, "step": 10999, "token_acc": 0.30570862586939984 }, { "epoch": 6.447962474347698, "grad_norm": 0.16078618993224303, "learning_rate": 0.0004052492172579364, "loss": 3.022296190261841, "step": 11000, "token_acc": 0.2928704720814198 }, { "epoch": 6.4485488126649075, "grad_norm": 0.18774993923038064, "learning_rate": 0.00040523022457082, "loss": 2.967209577560425, "step": 11001, "token_acc": 0.30199318393060004 }, { "epoch": 6.449135150982117, "grad_norm": 0.1611828550020077, "learning_rate": 0.0004052112304255249, "loss": 2.9375970363616943, "step": 11002, "token_acc": 0.30710354544063406 }, { "epoch": 6.449721489299326, "grad_norm": 0.15888655781049046, "learning_rate": 0.00040519223482222934, "loss": 2.936610221862793, "step": 11003, "token_acc": 0.307128273744476 }, { "epoch": 6.450307827616535, "grad_norm": 0.15563442168753397, "learning_rate": 0.0004051732377611119, "loss": 2.955447196960449, "step": 11004, "token_acc": 0.30553366855654346 }, { "epoch": 6.450894165933744, "grad_norm": 0.16638699384737135, "learning_rate": 0.00040515423924235094, "loss": 2.969627857208252, "step": 11005, "token_acc": 0.3021407590302463 }, { "epoch": 6.451480504250953, "grad_norm": 0.16536871974523756, "learning_rate": 0.000405135239266125, "loss": 2.985635280609131, "step": 11006, "token_acc": 0.3007485271619776 }, { "epoch": 6.452066842568162, "grad_norm": 0.18204464321068242, "learning_rate": 0.0004051162378326125, "loss": 2.9771459102630615, "step": 11007, "token_acc": 0.29979941606065535 }, { "epoch": 6.45265318088537, "grad_norm": 0.18119099182986687, "learning_rate": 0.00040509723494199206, "loss": 2.979257345199585, "step": 11008, "token_acc": 0.3011885495160243 }, { "epoch": 6.4532395192025795, "grad_norm": 0.17647198192377894, "learning_rate": 0.00040507823059444205, "loss": 2.97053861618042, "step": 11009, "token_acc": 0.3006866181265058 }, { "epoch": 6.453825857519789, "grad_norm": 0.17572981843950644, "learning_rate": 0.000405059224790141, "loss": 2.945981502532959, "step": 11010, "token_acc": 0.30668648457886505 }, { "epoch": 6.454412195836998, "grad_norm": 0.1810737399166512, "learning_rate": 0.00040504021752926756, "loss": 2.9665284156799316, "step": 11011, "token_acc": 0.30283879814821707 }, { "epoch": 6.454998534154207, "grad_norm": 0.23303855946178137, "learning_rate": 0.0004050212088120001, "loss": 2.979137659072876, "step": 11012, "token_acc": 0.30002830382205464 }, { "epoch": 6.455584872471416, "grad_norm": 0.18550357882650165, "learning_rate": 0.0004050021986385173, "loss": 2.982293128967285, "step": 11013, "token_acc": 0.30091033667683814 }, { "epoch": 6.456171210788625, "grad_norm": 0.16276016417952907, "learning_rate": 0.0004049831870089978, "loss": 2.917393922805786, "step": 11014, "token_acc": 0.310715242010427 }, { "epoch": 6.456757549105834, "grad_norm": 0.18105753600890304, "learning_rate": 0.00040496417392362, "loss": 2.9471259117126465, "step": 11015, "token_acc": 0.3056361316154255 }, { "epoch": 6.457343887423043, "grad_norm": 0.19732029779609106, "learning_rate": 0.0004049451593825626, "loss": 2.9281907081604004, "step": 11016, "token_acc": 0.3084952582347039 }, { "epoch": 6.457930225740252, "grad_norm": 0.16987720435874348, "learning_rate": 0.0004049261433860043, "loss": 2.9240832328796387, "step": 11017, "token_acc": 0.30802106985396355 }, { "epoch": 6.4585165640574616, "grad_norm": 0.16232685604741973, "learning_rate": 0.0004049071259341236, "loss": 2.978379964828491, "step": 11018, "token_acc": 0.30260693100218544 }, { "epoch": 6.45910290237467, "grad_norm": 0.15835180754354355, "learning_rate": 0.0004048881070270992, "loss": 2.9198546409606934, "step": 11019, "token_acc": 0.3112500995936579 }, { "epoch": 6.459689240691879, "grad_norm": 0.16073894935547114, "learning_rate": 0.0004048690866651097, "loss": 2.972555160522461, "step": 11020, "token_acc": 0.30151994527341913 }, { "epoch": 6.460275579009088, "grad_norm": 0.1549009792349556, "learning_rate": 0.00040485006484833384, "loss": 2.9436280727386475, "step": 11021, "token_acc": 0.30581908336562913 }, { "epoch": 6.460861917326297, "grad_norm": 0.16447859903800646, "learning_rate": 0.00040483104157695035, "loss": 2.9682822227478027, "step": 11022, "token_acc": 0.30397973737010187 }, { "epoch": 6.461448255643506, "grad_norm": 0.20129253412972184, "learning_rate": 0.00040481201685113783, "loss": 2.9514973163604736, "step": 11023, "token_acc": 0.30446153317817864 }, { "epoch": 6.462034593960715, "grad_norm": 0.3312656766050174, "learning_rate": 0.000404792990671075, "loss": 2.9329237937927246, "step": 11024, "token_acc": 0.30678949454536747 }, { "epoch": 6.4626209322779244, "grad_norm": 0.359687891140474, "learning_rate": 0.0004047739630369406, "loss": 2.923241138458252, "step": 11025, "token_acc": 0.3091828273371673 }, { "epoch": 6.463207270595134, "grad_norm": 0.18999998095476667, "learning_rate": 0.0004047549339489134, "loss": 2.974426507949829, "step": 11026, "token_acc": 0.3018828333696503 }, { "epoch": 6.463793608912343, "grad_norm": 0.23845697061423365, "learning_rate": 0.00040473590340717213, "loss": 2.965245246887207, "step": 11027, "token_acc": 0.30380509641873277 }, { "epoch": 6.464379947229552, "grad_norm": 0.24183730893845434, "learning_rate": 0.0004047168714118956, "loss": 2.9721157550811768, "step": 11028, "token_acc": 0.3009478489531856 }, { "epoch": 6.464966285546761, "grad_norm": 0.20764023463261477, "learning_rate": 0.00040469783796326245, "loss": 2.911668300628662, "step": 11029, "token_acc": 0.3096965405093503 }, { "epoch": 6.465552623863969, "grad_norm": 0.24801309810389363, "learning_rate": 0.00040467880306145165, "loss": 3.0214481353759766, "step": 11030, "token_acc": 0.29407991858132193 }, { "epoch": 6.466138962181178, "grad_norm": 0.16091745918354255, "learning_rate": 0.0004046597667066419, "loss": 2.964920997619629, "step": 11031, "token_acc": 0.30342505296408107 }, { "epoch": 6.466725300498387, "grad_norm": 0.19904757272612675, "learning_rate": 0.00040464072889901206, "loss": 2.9421639442443848, "step": 11032, "token_acc": 0.3075618889392601 }, { "epoch": 6.4673116388155965, "grad_norm": 0.18344316535051605, "learning_rate": 0.0004046216896387409, "loss": 2.983275890350342, "step": 11033, "token_acc": 0.3008353803122763 }, { "epoch": 6.467897977132806, "grad_norm": 0.19049978161785916, "learning_rate": 0.0004046026489260074, "loss": 3.003890037536621, "step": 11034, "token_acc": 0.29769240318103424 }, { "epoch": 6.468484315450015, "grad_norm": 0.18661858050436544, "learning_rate": 0.0004045836067609903, "loss": 2.9422879219055176, "step": 11035, "token_acc": 0.3057319286897656 }, { "epoch": 6.469070653767224, "grad_norm": 0.17148065335789506, "learning_rate": 0.00040456456314386845, "loss": 2.9301772117614746, "step": 11036, "token_acc": 0.30922511458488083 }, { "epoch": 6.469656992084433, "grad_norm": 0.21583330851988694, "learning_rate": 0.00040454551807482097, "loss": 2.9676766395568848, "step": 11037, "token_acc": 0.30144693486839536 }, { "epoch": 6.470243330401642, "grad_norm": 0.1652773508973196, "learning_rate": 0.0004045264715540265, "loss": 2.946106433868408, "step": 11038, "token_acc": 0.3063622298532083 }, { "epoch": 6.470829668718851, "grad_norm": 0.2009158894428785, "learning_rate": 0.000404507423581664, "loss": 2.9670166969299316, "step": 11039, "token_acc": 0.3017612692517222 }, { "epoch": 6.47141600703606, "grad_norm": 0.14933398826241234, "learning_rate": 0.00040448837415791255, "loss": 2.9930896759033203, "step": 11040, "token_acc": 0.30068521999979453 }, { "epoch": 6.4720023453532685, "grad_norm": 0.18025734834700444, "learning_rate": 0.000404469323282951, "loss": 2.9450619220733643, "step": 11041, "token_acc": 0.30822418927691847 }, { "epoch": 6.472588683670478, "grad_norm": 0.1433719502956986, "learning_rate": 0.0004044502709569583, "loss": 2.9645299911499023, "step": 11042, "token_acc": 0.3029112742501072 }, { "epoch": 6.473175021987687, "grad_norm": 0.17519320857277454, "learning_rate": 0.00040443121718011343, "loss": 2.9609498977661133, "step": 11043, "token_acc": 0.30290424548174016 }, { "epoch": 6.473761360304896, "grad_norm": 0.15932307920817604, "learning_rate": 0.0004044121619525953, "loss": 2.983809232711792, "step": 11044, "token_acc": 0.3008128020119765 }, { "epoch": 6.474347698622105, "grad_norm": 0.17436110107963299, "learning_rate": 0.00040439310527458307, "loss": 2.964989423751831, "step": 11045, "token_acc": 0.30319696479515307 }, { "epoch": 6.474934036939314, "grad_norm": 0.1666664248681814, "learning_rate": 0.0004043740471462556, "loss": 2.9466147422790527, "step": 11046, "token_acc": 0.3050879237146263 }, { "epoch": 6.475520375256523, "grad_norm": 0.19103484175670707, "learning_rate": 0.00040435498756779203, "loss": 2.9719066619873047, "step": 11047, "token_acc": 0.3012914676632004 }, { "epoch": 6.476106713573732, "grad_norm": 0.1568177668594751, "learning_rate": 0.00040433592653937135, "loss": 2.9644248485565186, "step": 11048, "token_acc": 0.3021300948750663 }, { "epoch": 6.476693051890941, "grad_norm": 0.24384840681966846, "learning_rate": 0.00040431686406117254, "loss": 2.947810411453247, "step": 11049, "token_acc": 0.3058180307005013 }, { "epoch": 6.4772793902081505, "grad_norm": 0.14228161784566254, "learning_rate": 0.0004042978001333748, "loss": 2.9597244262695312, "step": 11050, "token_acc": 0.3038345588526708 }, { "epoch": 6.477865728525359, "grad_norm": 0.24128007140971666, "learning_rate": 0.0004042787347561571, "loss": 2.9976391792297363, "step": 11051, "token_acc": 0.2981018850694671 }, { "epoch": 6.478452066842568, "grad_norm": 0.17771078577716284, "learning_rate": 0.00040425966792969866, "loss": 2.946054697036743, "step": 11052, "token_acc": 0.3062355849286874 }, { "epoch": 6.479038405159777, "grad_norm": 0.1972764012027858, "learning_rate": 0.00040424059965417846, "loss": 2.958761692047119, "step": 11053, "token_acc": 0.30292308836999277 }, { "epoch": 6.479624743476986, "grad_norm": 0.180484643260157, "learning_rate": 0.0004042215299297757, "loss": 2.937087059020996, "step": 11054, "token_acc": 0.3086102978721221 }, { "epoch": 6.480211081794195, "grad_norm": 0.25769498179007067, "learning_rate": 0.0004042024587566694, "loss": 2.9915108680725098, "step": 11055, "token_acc": 0.3007145410953142 }, { "epoch": 6.480797420111404, "grad_norm": 0.2316684140044936, "learning_rate": 0.0004041833861350388, "loss": 2.939375400543213, "step": 11056, "token_acc": 0.30582905354863094 }, { "epoch": 6.481383758428613, "grad_norm": 0.2614347270163304, "learning_rate": 0.0004041643120650631, "loss": 2.9615275859832764, "step": 11057, "token_acc": 0.30285711270132776 }, { "epoch": 6.4819700967458225, "grad_norm": 0.27686865347087153, "learning_rate": 0.0004041452365469215, "loss": 2.9852428436279297, "step": 11058, "token_acc": 0.29849912866874606 }, { "epoch": 6.482556435063032, "grad_norm": 0.21266965391364331, "learning_rate": 0.00040412615958079296, "loss": 3.0051534175872803, "step": 11059, "token_acc": 0.2960327157699744 }, { "epoch": 6.483142773380241, "grad_norm": 0.22115245201070177, "learning_rate": 0.0004041070811668569, "loss": 2.9762964248657227, "step": 11060, "token_acc": 0.30322849949778896 }, { "epoch": 6.48372911169745, "grad_norm": 0.19203711554660205, "learning_rate": 0.0004040880013052925, "loss": 2.949918031692505, "step": 11061, "token_acc": 0.30508062110435996 }, { "epoch": 6.484315450014659, "grad_norm": 0.1957515101659472, "learning_rate": 0.00040406891999627897, "loss": 2.9779117107391357, "step": 11062, "token_acc": 0.3024635345776165 }, { "epoch": 6.484901788331867, "grad_norm": 0.17192739121866624, "learning_rate": 0.00040404983723999556, "loss": 2.953482151031494, "step": 11063, "token_acc": 0.3037689831722254 }, { "epoch": 6.485488126649076, "grad_norm": 0.20204318522950107, "learning_rate": 0.0004040307530366214, "loss": 2.9332275390625, "step": 11064, "token_acc": 0.30831269201601613 }, { "epoch": 6.486074464966285, "grad_norm": 0.21347918986004039, "learning_rate": 0.000404011667386336, "loss": 2.9197020530700684, "step": 11065, "token_acc": 0.3088927434737735 }, { "epoch": 6.4866608032834945, "grad_norm": 0.18967424204918165, "learning_rate": 0.00040399258028931843, "loss": 2.990913152694702, "step": 11066, "token_acc": 0.29995835770729434 }, { "epoch": 6.487247141600704, "grad_norm": 0.20324701700526862, "learning_rate": 0.00040397349174574814, "loss": 2.9768433570861816, "step": 11067, "token_acc": 0.3015643864471217 }, { "epoch": 6.487833479917913, "grad_norm": 0.18338190804273638, "learning_rate": 0.0004039544017558044, "loss": 2.980958938598633, "step": 11068, "token_acc": 0.30056974143701454 }, { "epoch": 6.488419818235122, "grad_norm": 0.18919699248370125, "learning_rate": 0.00040393531031966646, "loss": 3.013619899749756, "step": 11069, "token_acc": 0.2962669839211221 }, { "epoch": 6.489006156552331, "grad_norm": 0.20775545955321417, "learning_rate": 0.00040391621743751373, "loss": 2.978884220123291, "step": 11070, "token_acc": 0.3002861055602186 }, { "epoch": 6.48959249486954, "grad_norm": 0.1791088632399652, "learning_rate": 0.00040389712310952546, "loss": 2.9460225105285645, "step": 11071, "token_acc": 0.3041709135133553 }, { "epoch": 6.490178833186749, "grad_norm": 0.18329203057404797, "learning_rate": 0.0004038780273358812, "loss": 2.9357171058654785, "step": 11072, "token_acc": 0.3067763306791762 }, { "epoch": 6.490765171503957, "grad_norm": 0.18800826651635513, "learning_rate": 0.0004038589301167602, "loss": 2.9659342765808105, "step": 11073, "token_acc": 0.3034585446240747 }, { "epoch": 6.4913515098211665, "grad_norm": 0.1902953162359805, "learning_rate": 0.0004038398314523419, "loss": 2.9764933586120605, "step": 11074, "token_acc": 0.30184485838224034 }, { "epoch": 6.491937848138376, "grad_norm": 0.16447794515883632, "learning_rate": 0.0004038207313428056, "loss": 2.9616081714630127, "step": 11075, "token_acc": 0.3050008027625918 }, { "epoch": 6.492524186455585, "grad_norm": 0.20159833388841156, "learning_rate": 0.0004038016297883309, "loss": 2.9155707359313965, "step": 11076, "token_acc": 0.3102328841481731 }, { "epoch": 6.493110524772794, "grad_norm": 0.1464278051951114, "learning_rate": 0.0004037825267890971, "loss": 2.96445894241333, "step": 11077, "token_acc": 0.30274721490215484 }, { "epoch": 6.493696863090003, "grad_norm": 0.18730342961676183, "learning_rate": 0.0004037634223452836, "loss": 2.9765865802764893, "step": 11078, "token_acc": 0.30059302304022967 }, { "epoch": 6.494283201407212, "grad_norm": 0.1547025149518504, "learning_rate": 0.0004037443164570701, "loss": 2.893035888671875, "step": 11079, "token_acc": 0.3129223637155889 }, { "epoch": 6.494869539724421, "grad_norm": 0.21560707900866685, "learning_rate": 0.00040372520912463586, "loss": 2.949376106262207, "step": 11080, "token_acc": 0.3053082738649569 }, { "epoch": 6.49545587804163, "grad_norm": 0.17054647591230307, "learning_rate": 0.00040370610034816043, "loss": 2.960934638977051, "step": 11081, "token_acc": 0.30258094391543405 }, { "epoch": 6.496042216358839, "grad_norm": 0.18547531815637075, "learning_rate": 0.00040368699012782326, "loss": 3.0074872970581055, "step": 11082, "token_acc": 0.29707040604544727 }, { "epoch": 6.4966285546760485, "grad_norm": 0.18844877787442277, "learning_rate": 0.00040366787846380395, "loss": 2.990184783935547, "step": 11083, "token_acc": 0.30053078944481304 }, { "epoch": 6.497214892993257, "grad_norm": 0.2385125878390623, "learning_rate": 0.00040364876535628204, "loss": 2.9769949913024902, "step": 11084, "token_acc": 0.29980267507763075 }, { "epoch": 6.497801231310466, "grad_norm": 0.20130320205267963, "learning_rate": 0.00040362965080543696, "loss": 2.95400071144104, "step": 11085, "token_acc": 0.3049591405514116 }, { "epoch": 6.498387569627675, "grad_norm": 0.21267316651769153, "learning_rate": 0.0004036105348114484, "loss": 3.0000522136688232, "step": 11086, "token_acc": 0.29770213115216615 }, { "epoch": 6.498973907944884, "grad_norm": 0.29276819219364786, "learning_rate": 0.00040359141737449577, "loss": 2.972066879272461, "step": 11087, "token_acc": 0.3011774202311376 }, { "epoch": 6.499560246262093, "grad_norm": 0.1594625919880992, "learning_rate": 0.00040357229849475874, "loss": 2.9509942531585693, "step": 11088, "token_acc": 0.305596837944664 }, { "epoch": 6.500146584579302, "grad_norm": 0.2549587862401406, "learning_rate": 0.000403553178172417, "loss": 3.016068458557129, "step": 11089, "token_acc": 0.2955493811568578 }, { "epoch": 6.500732922896511, "grad_norm": 0.1848205064782445, "learning_rate": 0.00040353405640764997, "loss": 2.9823317527770996, "step": 11090, "token_acc": 0.3005174552635927 }, { "epoch": 6.5013192612137205, "grad_norm": 0.2287311698612774, "learning_rate": 0.0004035149332006374, "loss": 2.937300443649292, "step": 11091, "token_acc": 0.3081988716953364 }, { "epoch": 6.50190559953093, "grad_norm": 0.16495878775379488, "learning_rate": 0.000403495808551559, "loss": 2.98425030708313, "step": 11092, "token_acc": 0.3021631571039131 }, { "epoch": 6.502491937848139, "grad_norm": 0.23513495163756104, "learning_rate": 0.00040347668246059416, "loss": 3.0295958518981934, "step": 11093, "token_acc": 0.2943973720572516 }, { "epoch": 6.503078276165347, "grad_norm": 0.17645608386483533, "learning_rate": 0.00040345755492792276, "loss": 2.9699559211730957, "step": 11094, "token_acc": 0.30352967667103914 }, { "epoch": 6.503664614482556, "grad_norm": 0.19432317053656015, "learning_rate": 0.0004034384259537244, "loss": 2.969893455505371, "step": 11095, "token_acc": 0.30074205672045734 }, { "epoch": 6.504250952799765, "grad_norm": 0.15641497195410242, "learning_rate": 0.0004034192955381788, "loss": 2.962783098220825, "step": 11096, "token_acc": 0.3045185988642889 }, { "epoch": 6.504837291116974, "grad_norm": 0.1784147853953123, "learning_rate": 0.00040340016368146573, "loss": 2.965872049331665, "step": 11097, "token_acc": 0.30374863200643454 }, { "epoch": 6.505423629434183, "grad_norm": 0.14982213055844684, "learning_rate": 0.00040338103038376475, "loss": 2.988189458847046, "step": 11098, "token_acc": 0.30027467289189846 }, { "epoch": 6.5060099677513925, "grad_norm": 0.15696720245700319, "learning_rate": 0.00040336189564525564, "loss": 2.945434331893921, "step": 11099, "token_acc": 0.3055884467945911 }, { "epoch": 6.506596306068602, "grad_norm": 0.15062387034080923, "learning_rate": 0.00040334275946611825, "loss": 2.991771697998047, "step": 11100, "token_acc": 0.2976693658323072 }, { "epoch": 6.507182644385811, "grad_norm": 0.15413636192794616, "learning_rate": 0.00040332362184653225, "loss": 2.9476187229156494, "step": 11101, "token_acc": 0.3051726013152345 }, { "epoch": 6.50776898270302, "grad_norm": 0.15590603656144794, "learning_rate": 0.00040330448278667744, "loss": 2.9957499504089355, "step": 11102, "token_acc": 0.2963924994616266 }, { "epoch": 6.508355321020229, "grad_norm": 0.18207075011384252, "learning_rate": 0.00040328534228673354, "loss": 3.0018863677978516, "step": 11103, "token_acc": 0.2972667028276301 }, { "epoch": 6.508941659337438, "grad_norm": 0.15906994016353035, "learning_rate": 0.0004032662003468804, "loss": 3.008528232574463, "step": 11104, "token_acc": 0.2964832091278475 }, { "epoch": 6.509527997654647, "grad_norm": 0.17118606757593374, "learning_rate": 0.00040324705696729793, "loss": 2.9678163528442383, "step": 11105, "token_acc": 0.30353108369466497 }, { "epoch": 6.510114335971855, "grad_norm": 0.15688641913000426, "learning_rate": 0.00040322791214816584, "loss": 2.979048252105713, "step": 11106, "token_acc": 0.30115884828954415 }, { "epoch": 6.5107006742890645, "grad_norm": 0.1617357399531289, "learning_rate": 0.0004032087658896639, "loss": 2.9192066192626953, "step": 11107, "token_acc": 0.3098767839450218 }, { "epoch": 6.511287012606274, "grad_norm": 0.17487596786405155, "learning_rate": 0.0004031896181919722, "loss": 2.9957218170166016, "step": 11108, "token_acc": 0.29726094272596965 }, { "epoch": 6.511873350923483, "grad_norm": 0.14805540790508762, "learning_rate": 0.0004031704690552703, "loss": 2.9292633533477783, "step": 11109, "token_acc": 0.30912352694733 }, { "epoch": 6.512459689240692, "grad_norm": 0.15503514520808528, "learning_rate": 0.0004031513184797383, "loss": 2.986847400665283, "step": 11110, "token_acc": 0.29821361980802924 }, { "epoch": 6.513046027557901, "grad_norm": 0.16031802276915977, "learning_rate": 0.0004031321664655562, "loss": 2.9577951431274414, "step": 11111, "token_acc": 0.30411198005805945 }, { "epoch": 6.51363236587511, "grad_norm": 0.16964532366968735, "learning_rate": 0.00040311301301290355, "loss": 2.956791877746582, "step": 11112, "token_acc": 0.30412319816397426 }, { "epoch": 6.514218704192319, "grad_norm": 0.15050471024297965, "learning_rate": 0.0004030938581219605, "loss": 2.98944354057312, "step": 11113, "token_acc": 0.29986565432450585 }, { "epoch": 6.514805042509528, "grad_norm": 0.16780579458213124, "learning_rate": 0.00040307470179290695, "loss": 2.968371868133545, "step": 11114, "token_acc": 0.30215735144507766 }, { "epoch": 6.515391380826737, "grad_norm": 0.1806889114251306, "learning_rate": 0.0004030555440259229, "loss": 2.9723801612854004, "step": 11115, "token_acc": 0.302467138890111 }, { "epoch": 6.515977719143946, "grad_norm": 0.1698671949536607, "learning_rate": 0.0004030363848211882, "loss": 2.960174322128296, "step": 11116, "token_acc": 0.30352090862157977 }, { "epoch": 6.516564057461155, "grad_norm": 0.1709178425125163, "learning_rate": 0.00040301722417888285, "loss": 2.957637071609497, "step": 11117, "token_acc": 0.3043306915408282 }, { "epoch": 6.517150395778364, "grad_norm": 0.15693031581161077, "learning_rate": 0.00040299806209918696, "loss": 2.961458683013916, "step": 11118, "token_acc": 0.30221822201680626 }, { "epoch": 6.517736734095573, "grad_norm": 0.18467919253195114, "learning_rate": 0.00040297889858228043, "loss": 2.9668545722961426, "step": 11119, "token_acc": 0.3034947118464014 }, { "epoch": 6.518323072412782, "grad_norm": 0.17617648984024362, "learning_rate": 0.00040295973362834334, "loss": 2.9362878799438477, "step": 11120, "token_acc": 0.3076757131062402 }, { "epoch": 6.518909410729991, "grad_norm": 0.167724274651725, "learning_rate": 0.00040294056723755555, "loss": 2.9585776329040527, "step": 11121, "token_acc": 0.30354692325525 }, { "epoch": 6.5194957490472, "grad_norm": 0.16246740202872795, "learning_rate": 0.0004029213994100973, "loss": 2.979480266571045, "step": 11122, "token_acc": 0.30170337723409657 }, { "epoch": 6.520082087364409, "grad_norm": 0.1698085538343002, "learning_rate": 0.00040290223014614857, "loss": 2.945061445236206, "step": 11123, "token_acc": 0.30597005267312893 }, { "epoch": 6.5206684256816185, "grad_norm": 0.15524081041669321, "learning_rate": 0.0004028830594458894, "loss": 2.9161622524261475, "step": 11124, "token_acc": 0.31004983929105884 }, { "epoch": 6.521254763998828, "grad_norm": 0.18531615583148847, "learning_rate": 0.00040286388730949985, "loss": 2.995180130004883, "step": 11125, "token_acc": 0.29803020980029193 }, { "epoch": 6.521841102316037, "grad_norm": 0.22921665883701475, "learning_rate": 0.00040284471373716016, "loss": 3.0060696601867676, "step": 11126, "token_acc": 0.2971086414107277 }, { "epoch": 6.522427440633246, "grad_norm": 0.21149266143443865, "learning_rate": 0.00040282553872905024, "loss": 2.975681781768799, "step": 11127, "token_acc": 0.30133797088440717 }, { "epoch": 6.523013778950454, "grad_norm": 0.1631919708140131, "learning_rate": 0.0004028063622853504, "loss": 2.976017713546753, "step": 11128, "token_acc": 0.3011949106153456 }, { "epoch": 6.523600117267663, "grad_norm": 0.16375836806997898, "learning_rate": 0.0004027871844062407, "loss": 2.9656481742858887, "step": 11129, "token_acc": 0.30218003005053956 }, { "epoch": 6.524186455584872, "grad_norm": 0.18274123697187822, "learning_rate": 0.00040276800509190126, "loss": 2.9700756072998047, "step": 11130, "token_acc": 0.300978792822186 }, { "epoch": 6.524772793902081, "grad_norm": 0.18383092967811648, "learning_rate": 0.00040274882434251225, "loss": 2.9753477573394775, "step": 11131, "token_acc": 0.3030169560366814 }, { "epoch": 6.5253591322192905, "grad_norm": 0.1435593970516185, "learning_rate": 0.00040272964215825387, "loss": 2.9538607597351074, "step": 11132, "token_acc": 0.30444851800072686 }, { "epoch": 6.5259454705365, "grad_norm": 0.15660165711008048, "learning_rate": 0.0004027104585393063, "loss": 2.939438819885254, "step": 11133, "token_acc": 0.3070572056632669 }, { "epoch": 6.526531808853709, "grad_norm": 0.16240888988111302, "learning_rate": 0.0004026912734858498, "loss": 2.9781806468963623, "step": 11134, "token_acc": 0.3022858218397415 }, { "epoch": 6.527118147170918, "grad_norm": 0.16107338658447198, "learning_rate": 0.00040267208699806454, "loss": 3.0356955528259277, "step": 11135, "token_acc": 0.2936122791940695 }, { "epoch": 6.527704485488127, "grad_norm": 0.15150660891741344, "learning_rate": 0.00040265289907613074, "loss": 2.932180404663086, "step": 11136, "token_acc": 0.3081678392650425 }, { "epoch": 6.528290823805335, "grad_norm": 0.16098823009396673, "learning_rate": 0.0004026337097202286, "loss": 2.9713478088378906, "step": 11137, "token_acc": 0.301908216049899 }, { "epoch": 6.528877162122544, "grad_norm": 0.1676462419563933, "learning_rate": 0.0004026145189305385, "loss": 2.9314002990722656, "step": 11138, "token_acc": 0.30772203380669916 }, { "epoch": 6.529463500439753, "grad_norm": 0.1649185496930345, "learning_rate": 0.0004025953267072406, "loss": 3.0186400413513184, "step": 11139, "token_acc": 0.2955524776854417 }, { "epoch": 6.5300498387569625, "grad_norm": 0.16457025830336075, "learning_rate": 0.0004025761330505152, "loss": 2.987926483154297, "step": 11140, "token_acc": 0.2992171105950669 }, { "epoch": 6.530636177074172, "grad_norm": 0.17330383111609074, "learning_rate": 0.0004025569379605427, "loss": 2.951719284057617, "step": 11141, "token_acc": 0.3037747881022273 }, { "epoch": 6.531222515391381, "grad_norm": 0.26333654755591723, "learning_rate": 0.0004025377414375033, "loss": 2.9190478324890137, "step": 11142, "token_acc": 0.30985625850268395 }, { "epoch": 6.53180885370859, "grad_norm": 0.37302851216171645, "learning_rate": 0.00040251854348157743, "loss": 2.9722611904144287, "step": 11143, "token_acc": 0.30288942198508434 }, { "epoch": 6.532395192025799, "grad_norm": 0.26622405130076077, "learning_rate": 0.0004024993440929453, "loss": 2.9543251991271973, "step": 11144, "token_acc": 0.30283647927780055 }, { "epoch": 6.532981530343008, "grad_norm": 0.21202817297220808, "learning_rate": 0.0004024801432717874, "loss": 2.938084125518799, "step": 11145, "token_acc": 0.308535713339866 }, { "epoch": 6.533567868660217, "grad_norm": 0.2555084660870878, "learning_rate": 0.00040246094101828396, "loss": 3.028245210647583, "step": 11146, "token_acc": 0.29508889335145017 }, { "epoch": 6.534154206977426, "grad_norm": 0.1936756239757135, "learning_rate": 0.00040244173733261534, "loss": 2.9544386863708496, "step": 11147, "token_acc": 0.30477834969326706 }, { "epoch": 6.534740545294635, "grad_norm": 0.2127091818913759, "learning_rate": 0.00040242253221496214, "loss": 2.992856740951538, "step": 11148, "token_acc": 0.2992971631370184 }, { "epoch": 6.535326883611844, "grad_norm": 0.19251183226628163, "learning_rate": 0.0004024033256655046, "loss": 2.9649558067321777, "step": 11149, "token_acc": 0.3030177644675327 }, { "epoch": 6.535913221929053, "grad_norm": 0.20750561467201029, "learning_rate": 0.0004023841176844233, "loss": 2.9915642738342285, "step": 11150, "token_acc": 0.2974764231040518 }, { "epoch": 6.536499560246262, "grad_norm": 0.16179455324145559, "learning_rate": 0.00040236490827189845, "loss": 2.9879589080810547, "step": 11151, "token_acc": 0.30011260674180734 }, { "epoch": 6.537085898563471, "grad_norm": 0.1743826101093737, "learning_rate": 0.00040234569742811057, "loss": 2.9683661460876465, "step": 11152, "token_acc": 0.30300225541512404 }, { "epoch": 6.53767223688068, "grad_norm": 0.16419115269212634, "learning_rate": 0.00040232648515324017, "loss": 2.975533962249756, "step": 11153, "token_acc": 0.3014663367625993 }, { "epoch": 6.538258575197889, "grad_norm": 0.18478801943418596, "learning_rate": 0.0004023072714474677, "loss": 2.964743137359619, "step": 11154, "token_acc": 0.3022246587695208 }, { "epoch": 6.538844913515098, "grad_norm": 0.15050397061360785, "learning_rate": 0.0004022880563109737, "loss": 2.949847936630249, "step": 11155, "token_acc": 0.30535603452050136 }, { "epoch": 6.5394312518323074, "grad_norm": 0.15521764653476097, "learning_rate": 0.00040226883974393856, "loss": 2.9790592193603516, "step": 11156, "token_acc": 0.2991982927673319 }, { "epoch": 6.540017590149517, "grad_norm": 0.14434526780177975, "learning_rate": 0.0004022496217465429, "loss": 2.945652484893799, "step": 11157, "token_acc": 0.30517695371290454 }, { "epoch": 6.540603928466726, "grad_norm": 0.17019691836625972, "learning_rate": 0.00040223040231896715, "loss": 2.984821319580078, "step": 11158, "token_acc": 0.3020581161225625 }, { "epoch": 6.541190266783934, "grad_norm": 0.15991862652942382, "learning_rate": 0.00040221118146139195, "loss": 2.947481870651245, "step": 11159, "token_acc": 0.30513531752990375 }, { "epoch": 6.541776605101143, "grad_norm": 0.1406013242174145, "learning_rate": 0.0004021919591739978, "loss": 2.970341682434082, "step": 11160, "token_acc": 0.3007312566549053 }, { "epoch": 6.542362943418352, "grad_norm": 0.15241765134568322, "learning_rate": 0.00040217273545696525, "loss": 2.9497389793395996, "step": 11161, "token_acc": 0.3057951863080997 }, { "epoch": 6.542949281735561, "grad_norm": 0.1651751665964146, "learning_rate": 0.00040215351031047496, "loss": 2.9672632217407227, "step": 11162, "token_acc": 0.3012107793589929 }, { "epoch": 6.54353562005277, "grad_norm": 0.16702809655806958, "learning_rate": 0.0004021342837347074, "loss": 2.959089756011963, "step": 11163, "token_acc": 0.3047524793091778 }, { "epoch": 6.5441219583699795, "grad_norm": 0.16651638106318, "learning_rate": 0.0004021150557298433, "loss": 2.9721546173095703, "step": 11164, "token_acc": 0.30175225768158426 }, { "epoch": 6.544708296687189, "grad_norm": 0.16771850223291276, "learning_rate": 0.00040209582629606325, "loss": 2.964848518371582, "step": 11165, "token_acc": 0.3009714428831018 }, { "epoch": 6.545294635004398, "grad_norm": 0.16463912302476785, "learning_rate": 0.0004020765954335478, "loss": 2.9655258655548096, "step": 11166, "token_acc": 0.3031997450893729 }, { "epoch": 6.545880973321607, "grad_norm": 0.15505401813929393, "learning_rate": 0.00040205736314247767, "loss": 2.9873099327087402, "step": 11167, "token_acc": 0.29897726219027787 }, { "epoch": 6.546467311638816, "grad_norm": 0.18476019278044695, "learning_rate": 0.0004020381294230335, "loss": 2.9754834175109863, "step": 11168, "token_acc": 0.30082285869134723 }, { "epoch": 6.547053649956025, "grad_norm": 0.20358891319795588, "learning_rate": 0.00040201889427539606, "loss": 2.9633750915527344, "step": 11169, "token_acc": 0.3029113560728886 }, { "epoch": 6.547639988273234, "grad_norm": 0.1476443707931811, "learning_rate": 0.0004019996576997459, "loss": 2.9446303844451904, "step": 11170, "token_acc": 0.30619191265690243 }, { "epoch": 6.548226326590442, "grad_norm": 0.22145729808791412, "learning_rate": 0.00040198041969626377, "loss": 2.9365415573120117, "step": 11171, "token_acc": 0.30588020170698915 }, { "epoch": 6.5488126649076515, "grad_norm": 0.2661125124865491, "learning_rate": 0.0004019611802651304, "loss": 2.987351655960083, "step": 11172, "token_acc": 0.3000746718821832 }, { "epoch": 6.549399003224861, "grad_norm": 0.18983413726452605, "learning_rate": 0.0004019419394065266, "loss": 2.979475498199463, "step": 11173, "token_acc": 0.30192675241489747 }, { "epoch": 6.54998534154207, "grad_norm": 0.1787259396518735, "learning_rate": 0.0004019226971206329, "loss": 2.989509105682373, "step": 11174, "token_acc": 0.2994121969140338 }, { "epoch": 6.550571679859279, "grad_norm": 0.1899232531587389, "learning_rate": 0.00040190345340763024, "loss": 2.9316983222961426, "step": 11175, "token_acc": 0.30696294881386327 }, { "epoch": 6.551158018176488, "grad_norm": 0.15998517076276333, "learning_rate": 0.00040188420826769923, "loss": 3.002680778503418, "step": 11176, "token_acc": 0.2970772610442392 }, { "epoch": 6.551744356493697, "grad_norm": 0.31964437712731397, "learning_rate": 0.0004018649617010209, "loss": 2.9744925498962402, "step": 11177, "token_acc": 0.29989171089111955 }, { "epoch": 6.552330694810906, "grad_norm": 0.27486265657509795, "learning_rate": 0.0004018457137077758, "loss": 2.9604973793029785, "step": 11178, "token_acc": 0.30386945792525655 }, { "epoch": 6.552917033128115, "grad_norm": 0.167832092391946, "learning_rate": 0.00040182646428814486, "loss": 2.976696491241455, "step": 11179, "token_acc": 0.3009972299168975 }, { "epoch": 6.5535033714453235, "grad_norm": 0.26145871289075223, "learning_rate": 0.0004018072134423089, "loss": 2.937617301940918, "step": 11180, "token_acc": 0.3069281785787036 }, { "epoch": 6.554089709762533, "grad_norm": 0.15296203546293735, "learning_rate": 0.0004017879611704487, "loss": 3.014613151550293, "step": 11181, "token_acc": 0.2958672185915268 }, { "epoch": 6.554676048079742, "grad_norm": 0.21084253731101335, "learning_rate": 0.0004017687074727452, "loss": 2.965714931488037, "step": 11182, "token_acc": 0.30383736771493247 }, { "epoch": 6.555262386396951, "grad_norm": 0.16989467738679165, "learning_rate": 0.0004017494523493791, "loss": 2.9914798736572266, "step": 11183, "token_acc": 0.2998882461895055 }, { "epoch": 6.55584872471416, "grad_norm": 0.22025874710734558, "learning_rate": 0.00040173019580053143, "loss": 2.9733872413635254, "step": 11184, "token_acc": 0.3017755655824702 }, { "epoch": 6.556435063031369, "grad_norm": 0.14963016294788, "learning_rate": 0.00040171093782638307, "loss": 2.986146926879883, "step": 11185, "token_acc": 0.29900933961224074 }, { "epoch": 6.557021401348578, "grad_norm": 0.18231632845402326, "learning_rate": 0.0004016916784271148, "loss": 2.9921412467956543, "step": 11186, "token_acc": 0.29869956215096616 }, { "epoch": 6.557607739665787, "grad_norm": 0.1561226368181726, "learning_rate": 0.00040167241760290766, "loss": 2.969301462173462, "step": 11187, "token_acc": 0.30307405757349665 }, { "epoch": 6.558194077982996, "grad_norm": 0.17722776522688904, "learning_rate": 0.0004016531553539425, "loss": 2.974203586578369, "step": 11188, "token_acc": 0.3020356508158759 }, { "epoch": 6.5587804163002055, "grad_norm": 0.15868407061233872, "learning_rate": 0.00040163389168040045, "loss": 2.9477462768554688, "step": 11189, "token_acc": 0.3060852729032874 }, { "epoch": 6.559366754617415, "grad_norm": 0.15926496747014127, "learning_rate": 0.0004016146265824622, "loss": 2.9946155548095703, "step": 11190, "token_acc": 0.2993076167425593 }, { "epoch": 6.559953092934624, "grad_norm": 0.1674568520768532, "learning_rate": 0.0004015953600603088, "loss": 2.961996555328369, "step": 11191, "token_acc": 0.30339379906134484 }, { "epoch": 6.560539431251832, "grad_norm": 0.17170312257271134, "learning_rate": 0.00040157609211412135, "loss": 2.9521708488464355, "step": 11192, "token_acc": 0.3043437301282001 }, { "epoch": 6.561125769569041, "grad_norm": 0.1530956888546429, "learning_rate": 0.00040155682274408067, "loss": 2.972043514251709, "step": 11193, "token_acc": 0.30137425095900683 }, { "epoch": 6.56171210788625, "grad_norm": 0.1557623495844131, "learning_rate": 0.000401537551950368, "loss": 2.961657762527466, "step": 11194, "token_acc": 0.30311488334714964 }, { "epoch": 6.562298446203459, "grad_norm": 0.16003021059210945, "learning_rate": 0.0004015182797331641, "loss": 2.968696117401123, "step": 11195, "token_acc": 0.301645058470508 }, { "epoch": 6.562884784520668, "grad_norm": 0.1786294270629262, "learning_rate": 0.00040149900609265024, "loss": 2.9579410552978516, "step": 11196, "token_acc": 0.30328853182864607 }, { "epoch": 6.5634711228378775, "grad_norm": 0.17763957090967267, "learning_rate": 0.00040147973102900724, "loss": 2.9871959686279297, "step": 11197, "token_acc": 0.2991581162175162 }, { "epoch": 6.564057461155087, "grad_norm": 0.18231735563094909, "learning_rate": 0.0004014604545424164, "loss": 2.9686074256896973, "step": 11198, "token_acc": 0.3019304284295718 }, { "epoch": 6.564643799472296, "grad_norm": 0.1737406443680257, "learning_rate": 0.0004014411766330587, "loss": 2.976677179336548, "step": 11199, "token_acc": 0.30153282140735743 }, { "epoch": 6.565230137789505, "grad_norm": 0.16011320529324602, "learning_rate": 0.0004014218973011151, "loss": 2.9515035152435303, "step": 11200, "token_acc": 0.3047002806683775 }, { "epoch": 6.565816476106714, "grad_norm": 0.1619442764634733, "learning_rate": 0.0004014026165467669, "loss": 2.975390911102295, "step": 11201, "token_acc": 0.3001782503076001 }, { "epoch": 6.566402814423922, "grad_norm": 0.16733250096343202, "learning_rate": 0.00040138333437019516, "loss": 2.935001850128174, "step": 11202, "token_acc": 0.3078750233060642 }, { "epoch": 6.566989152741131, "grad_norm": 0.1891730378653655, "learning_rate": 0.00040136405077158087, "loss": 2.9878616333007812, "step": 11203, "token_acc": 0.30076576964151047 }, { "epoch": 6.56757549105834, "grad_norm": 0.1659767218358766, "learning_rate": 0.0004013447657511054, "loss": 2.9527807235717773, "step": 11204, "token_acc": 0.3044030602647454 }, { "epoch": 6.5681618293755495, "grad_norm": 0.1532420207129692, "learning_rate": 0.00040132547930894975, "loss": 2.9237451553344727, "step": 11205, "token_acc": 0.30833082277577817 }, { "epoch": 6.568748167692759, "grad_norm": 0.1825513720403444, "learning_rate": 0.00040130619144529515, "loss": 2.9488725662231445, "step": 11206, "token_acc": 0.3040480561003648 }, { "epoch": 6.569334506009968, "grad_norm": 0.17243852168682142, "learning_rate": 0.00040128690216032284, "loss": 2.9678449630737305, "step": 11207, "token_acc": 0.3041058498130173 }, { "epoch": 6.569920844327177, "grad_norm": 0.17437718427260404, "learning_rate": 0.00040126761145421383, "loss": 2.988457679748535, "step": 11208, "token_acc": 0.3011408582849544 }, { "epoch": 6.570507182644386, "grad_norm": 0.18657848849176717, "learning_rate": 0.00040124831932714946, "loss": 2.971238136291504, "step": 11209, "token_acc": 0.3030426770532311 }, { "epoch": 6.571093520961595, "grad_norm": 0.19553339550398852, "learning_rate": 0.000401229025779311, "loss": 2.934677839279175, "step": 11210, "token_acc": 0.30813865152174025 }, { "epoch": 6.571679859278804, "grad_norm": 0.16242794869941907, "learning_rate": 0.0004012097308108795, "loss": 2.9933226108551025, "step": 11211, "token_acc": 0.29941702913203644 }, { "epoch": 6.572266197596013, "grad_norm": 0.18507054383700744, "learning_rate": 0.0004011904344220365, "loss": 2.94520902633667, "step": 11212, "token_acc": 0.3052172650779111 }, { "epoch": 6.572852535913222, "grad_norm": 0.20344307617890334, "learning_rate": 0.000401171136612963, "loss": 2.994338274002075, "step": 11213, "token_acc": 0.29907069999840874 }, { "epoch": 6.573438874230431, "grad_norm": 0.16494569677580434, "learning_rate": 0.0004011518373838404, "loss": 2.985337257385254, "step": 11214, "token_acc": 0.29951017292380117 }, { "epoch": 6.57402521254764, "grad_norm": 0.15881999450891515, "learning_rate": 0.00040113253673484993, "loss": 2.93871808052063, "step": 11215, "token_acc": 0.30591864333905777 }, { "epoch": 6.574611550864849, "grad_norm": 0.15664374797376182, "learning_rate": 0.000401113234666173, "loss": 2.9544553756713867, "step": 11216, "token_acc": 0.3051158600253619 }, { "epoch": 6.575197889182058, "grad_norm": 0.16009462209079725, "learning_rate": 0.0004010939311779909, "loss": 2.920405149459839, "step": 11217, "token_acc": 0.3092036139208881 }, { "epoch": 6.575784227499267, "grad_norm": 0.16481042129008847, "learning_rate": 0.00040107462627048487, "loss": 2.9739761352539062, "step": 11218, "token_acc": 0.30430224566151676 }, { "epoch": 6.576370565816476, "grad_norm": 0.15946977336207707, "learning_rate": 0.0004010553199438363, "loss": 2.931671380996704, "step": 11219, "token_acc": 0.30687955702915726 }, { "epoch": 6.576956904133685, "grad_norm": 0.16953680646510555, "learning_rate": 0.00040103601219822644, "loss": 3.0140914916992188, "step": 11220, "token_acc": 0.2957710557217473 }, { "epoch": 6.577543242450894, "grad_norm": 0.1701523026092201, "learning_rate": 0.00040101670303383685, "loss": 2.9883460998535156, "step": 11221, "token_acc": 0.2982902474290959 }, { "epoch": 6.5781295807681035, "grad_norm": 0.21875221446136267, "learning_rate": 0.0004009973924508489, "loss": 2.964977741241455, "step": 11222, "token_acc": 0.3023220927285236 }, { "epoch": 6.578715919085313, "grad_norm": 0.21434783057040763, "learning_rate": 0.00040097808044944384, "loss": 2.962202787399292, "step": 11223, "token_acc": 0.3050518474937752 }, { "epoch": 6.579302257402521, "grad_norm": 0.17424674084696992, "learning_rate": 0.00040095876702980315, "loss": 3.005999803543091, "step": 11224, "token_acc": 0.2980569820903087 }, { "epoch": 6.57988859571973, "grad_norm": 0.20087966181329328, "learning_rate": 0.00040093945219210825, "loss": 2.9673056602478027, "step": 11225, "token_acc": 0.303547943071513 }, { "epoch": 6.580474934036939, "grad_norm": 0.20435472173356972, "learning_rate": 0.0004009201359365407, "loss": 2.982630491256714, "step": 11226, "token_acc": 0.3000470207152373 }, { "epoch": 6.581061272354148, "grad_norm": 0.1805361586376264, "learning_rate": 0.0004009008182632817, "loss": 2.9506354331970215, "step": 11227, "token_acc": 0.30605738575983 }, { "epoch": 6.581647610671357, "grad_norm": 0.18641077785242474, "learning_rate": 0.00040088149917251296, "loss": 2.984947681427002, "step": 11228, "token_acc": 0.297964899658247 }, { "epoch": 6.582233948988566, "grad_norm": 0.18877877111446134, "learning_rate": 0.0004008621786644159, "loss": 2.963156223297119, "step": 11229, "token_acc": 0.30306955801812013 }, { "epoch": 6.5828202873057755, "grad_norm": 0.17334043146748826, "learning_rate": 0.0004008428567391718, "loss": 2.99684476852417, "step": 11230, "token_acc": 0.2968765642206427 }, { "epoch": 6.583406625622985, "grad_norm": 0.19024658869410907, "learning_rate": 0.0004008235333969624, "loss": 2.965780735015869, "step": 11231, "token_acc": 0.302976737244852 }, { "epoch": 6.583992963940194, "grad_norm": 0.19598266352144297, "learning_rate": 0.0004008042086379692, "loss": 2.9838173389434814, "step": 11232, "token_acc": 0.2983281730566809 }, { "epoch": 6.584579302257403, "grad_norm": 0.1542214787173342, "learning_rate": 0.0004007848824623736, "loss": 2.982515335083008, "step": 11233, "token_acc": 0.29947915993537966 }, { "epoch": 6.585165640574612, "grad_norm": 0.18870182275863476, "learning_rate": 0.00040076555487035726, "loss": 2.936087131500244, "step": 11234, "token_acc": 0.3077537258212572 }, { "epoch": 6.585751978891821, "grad_norm": 0.21990111832902462, "learning_rate": 0.00040074622586210165, "loss": 2.966492176055908, "step": 11235, "token_acc": 0.3015228320395091 }, { "epoch": 6.586338317209029, "grad_norm": 0.1730571691483246, "learning_rate": 0.0004007268954377884, "loss": 3.032069206237793, "step": 11236, "token_acc": 0.2932425221249204 }, { "epoch": 6.586924655526238, "grad_norm": 0.18292674111527507, "learning_rate": 0.0004007075635975991, "loss": 3.0040929317474365, "step": 11237, "token_acc": 0.2960183689947517 }, { "epoch": 6.5875109938434475, "grad_norm": 0.20896854429305198, "learning_rate": 0.0004006882303417152, "loss": 3.0218987464904785, "step": 11238, "token_acc": 0.29448428303390134 }, { "epoch": 6.588097332160657, "grad_norm": 0.1665642206983382, "learning_rate": 0.0004006688956703186, "loss": 2.9605095386505127, "step": 11239, "token_acc": 0.30383915256306154 }, { "epoch": 6.588683670477866, "grad_norm": 0.1900831292834334, "learning_rate": 0.0004006495595835906, "loss": 2.982273817062378, "step": 11240, "token_acc": 0.30070123355684086 }, { "epoch": 6.589270008795075, "grad_norm": 0.18985697433528803, "learning_rate": 0.00040063022208171306, "loss": 2.949173927307129, "step": 11241, "token_acc": 0.3054469130477095 }, { "epoch": 6.589856347112284, "grad_norm": 0.1748365561690017, "learning_rate": 0.0004006108831648676, "loss": 2.953469753265381, "step": 11242, "token_acc": 0.30341288545852646 }, { "epoch": 6.590442685429493, "grad_norm": 0.24564981606984573, "learning_rate": 0.0004005915428332358, "loss": 2.976010322570801, "step": 11243, "token_acc": 0.3004418755736133 }, { "epoch": 6.591029023746702, "grad_norm": 0.1952095042570715, "learning_rate": 0.00040057220108699935, "loss": 2.9934163093566895, "step": 11244, "token_acc": 0.29856175353019376 }, { "epoch": 6.59161536206391, "grad_norm": 0.1886815152948818, "learning_rate": 0.00040055285792634, "loss": 2.9731338024139404, "step": 11245, "token_acc": 0.3014417716490023 }, { "epoch": 6.5922017003811195, "grad_norm": 0.18511738942371814, "learning_rate": 0.0004005335133514395, "loss": 2.949902057647705, "step": 11246, "token_acc": 0.3046860694461157 }, { "epoch": 6.592788038698329, "grad_norm": 0.16832657512394414, "learning_rate": 0.0004005141673624794, "loss": 2.980968475341797, "step": 11247, "token_acc": 0.29957453908400766 }, { "epoch": 6.593374377015538, "grad_norm": 0.1962482673027451, "learning_rate": 0.0004004948199596415, "loss": 3.009364128112793, "step": 11248, "token_acc": 0.2956463709646841 }, { "epoch": 6.593960715332747, "grad_norm": 0.20120199114745718, "learning_rate": 0.00040047547114310756, "loss": 3.010697364807129, "step": 11249, "token_acc": 0.2961451307263162 }, { "epoch": 6.594547053649956, "grad_norm": 0.18916567753920846, "learning_rate": 0.0004004561209130594, "loss": 2.983368396759033, "step": 11250, "token_acc": 0.2992909686686876 }, { "epoch": 6.595133391967165, "grad_norm": 0.18504645649833162, "learning_rate": 0.0004004367692696788, "loss": 2.9660043716430664, "step": 11251, "token_acc": 0.3024168770846135 }, { "epoch": 6.595719730284374, "grad_norm": 0.1676705082216995, "learning_rate": 0.0004004174162131473, "loss": 2.9570086002349854, "step": 11252, "token_acc": 0.30335053828405356 }, { "epoch": 6.596306068601583, "grad_norm": 0.17593995390247955, "learning_rate": 0.0004003980617436469, "loss": 2.9862563610076904, "step": 11253, "token_acc": 0.29958512511560587 }, { "epoch": 6.596892406918792, "grad_norm": 0.1684482167915221, "learning_rate": 0.0004003787058613594, "loss": 2.964564800262451, "step": 11254, "token_acc": 0.30418020698745807 }, { "epoch": 6.5974787452360015, "grad_norm": 0.17990097223482265, "learning_rate": 0.00040035934856646663, "loss": 2.956902027130127, "step": 11255, "token_acc": 0.3033822786591511 }, { "epoch": 6.598065083553211, "grad_norm": 0.1869272391398067, "learning_rate": 0.00040033998985915037, "loss": 2.9767754077911377, "step": 11256, "token_acc": 0.3023617540521849 }, { "epoch": 6.598651421870419, "grad_norm": 0.16109429483886992, "learning_rate": 0.00040032062973959247, "loss": 2.9803686141967773, "step": 11257, "token_acc": 0.3001485845231431 }, { "epoch": 6.599237760187628, "grad_norm": 0.19020374212928276, "learning_rate": 0.00040030126820797486, "loss": 2.965989589691162, "step": 11258, "token_acc": 0.3026991210702201 }, { "epoch": 6.599824098504837, "grad_norm": 0.1687357345172246, "learning_rate": 0.00040028190526447926, "loss": 2.9698033332824707, "step": 11259, "token_acc": 0.3024019880008409 }, { "epoch": 6.600410436822046, "grad_norm": 0.17998202483812448, "learning_rate": 0.0004002625409092878, "loss": 2.955725908279419, "step": 11260, "token_acc": 0.3041121475263785 }, { "epoch": 6.600996775139255, "grad_norm": 0.15161831970961853, "learning_rate": 0.00040024317514258224, "loss": 3.0178322792053223, "step": 11261, "token_acc": 0.2966456159147805 }, { "epoch": 6.601583113456464, "grad_norm": 0.1652386523746586, "learning_rate": 0.0004002238079645444, "loss": 2.939131259918213, "step": 11262, "token_acc": 0.3057705107946202 }, { "epoch": 6.6021694517736735, "grad_norm": 0.1585342635997751, "learning_rate": 0.0004002044393753564, "loss": 2.970935583114624, "step": 11263, "token_acc": 0.30150134715904425 }, { "epoch": 6.602755790090883, "grad_norm": 0.17874758350722286, "learning_rate": 0.00040018506937520003, "loss": 2.94380521774292, "step": 11264, "token_acc": 0.3054671773689896 }, { "epoch": 6.603342128408092, "grad_norm": 0.21557875427627826, "learning_rate": 0.00040016569796425737, "loss": 2.965689182281494, "step": 11265, "token_acc": 0.3013336039893269 }, { "epoch": 6.603928466725301, "grad_norm": 0.3044733490108263, "learning_rate": 0.0004001463251427103, "loss": 2.961544990539551, "step": 11266, "token_acc": 0.30331916043084933 }, { "epoch": 6.604514805042509, "grad_norm": 0.27772725864653786, "learning_rate": 0.0004001269509107409, "loss": 2.975137710571289, "step": 11267, "token_acc": 0.302118156631256 }, { "epoch": 6.605101143359718, "grad_norm": 0.16136409679450592, "learning_rate": 0.00040010757526853097, "loss": 2.9820151329040527, "step": 11268, "token_acc": 0.2998230937761174 }, { "epoch": 6.605687481676927, "grad_norm": 0.19867519207947842, "learning_rate": 0.0004000881982162627, "loss": 2.951850175857544, "step": 11269, "token_acc": 0.30432672574687064 }, { "epoch": 6.606273819994136, "grad_norm": 0.1682162728476659, "learning_rate": 0.000400068819754118, "loss": 2.9683351516723633, "step": 11270, "token_acc": 0.3041029900793706 }, { "epoch": 6.6068601583113455, "grad_norm": 0.18361017807144261, "learning_rate": 0.00040004943988227907, "loss": 2.9812755584716797, "step": 11271, "token_acc": 0.30288669028134035 }, { "epoch": 6.607446496628555, "grad_norm": 0.1755034064134464, "learning_rate": 0.00040003005860092777, "loss": 2.9711644649505615, "step": 11272, "token_acc": 0.302183261231885 }, { "epoch": 6.608032834945764, "grad_norm": 0.15978620826140932, "learning_rate": 0.0004000106759102463, "loss": 2.944441080093384, "step": 11273, "token_acc": 0.3057343778620787 }, { "epoch": 6.608619173262973, "grad_norm": 0.17103776973949536, "learning_rate": 0.0003999912918104166, "loss": 2.9782581329345703, "step": 11274, "token_acc": 0.3027661953850904 }, { "epoch": 6.609205511580182, "grad_norm": 0.16473655633248444, "learning_rate": 0.0003999719063016209, "loss": 3.0013179779052734, "step": 11275, "token_acc": 0.2978080440398768 }, { "epoch": 6.609791849897391, "grad_norm": 0.16068811863320862, "learning_rate": 0.0003999525193840412, "loss": 2.9495091438293457, "step": 11276, "token_acc": 0.3057050760061086 }, { "epoch": 6.6103781882146, "grad_norm": 0.15847140481231592, "learning_rate": 0.0003999331310578596, "loss": 2.9653549194335938, "step": 11277, "token_acc": 0.3023673991106682 }, { "epoch": 6.610964526531809, "grad_norm": 0.15978898150342716, "learning_rate": 0.0003999137413232583, "loss": 3.0211563110351562, "step": 11278, "token_acc": 0.29484481441643673 }, { "epoch": 6.6115508648490176, "grad_norm": 0.1550866787487519, "learning_rate": 0.0003998943501804194, "loss": 2.962747812271118, "step": 11279, "token_acc": 0.3023202284563765 }, { "epoch": 6.612137203166227, "grad_norm": 0.16061791384692747, "learning_rate": 0.00039987495762952514, "loss": 2.9647843837738037, "step": 11280, "token_acc": 0.30258597922808683 }, { "epoch": 6.612723541483436, "grad_norm": 0.1576971320760154, "learning_rate": 0.00039985556367075754, "loss": 2.97267484664917, "step": 11281, "token_acc": 0.3016032958093889 }, { "epoch": 6.613309879800645, "grad_norm": 0.153657028608998, "learning_rate": 0.00039983616830429887, "loss": 2.990851402282715, "step": 11282, "token_acc": 0.29982219144488326 }, { "epoch": 6.613896218117854, "grad_norm": 0.15423272946294572, "learning_rate": 0.0003998167715303313, "loss": 2.9909074306488037, "step": 11283, "token_acc": 0.2995448837837695 }, { "epoch": 6.614482556435063, "grad_norm": 0.14871675841811052, "learning_rate": 0.00039979737334903704, "loss": 2.9922878742218018, "step": 11284, "token_acc": 0.30002985902275686 }, { "epoch": 6.615068894752272, "grad_norm": 0.15798849544513727, "learning_rate": 0.00039977797376059834, "loss": 2.9922990798950195, "step": 11285, "token_acc": 0.2968589975790332 }, { "epoch": 6.615655233069481, "grad_norm": 0.14606783816881663, "learning_rate": 0.00039975857276519736, "loss": 3.00759220123291, "step": 11286, "token_acc": 0.2978142003875588 }, { "epoch": 6.6162415713866904, "grad_norm": 0.17191826425833984, "learning_rate": 0.0003997391703630164, "loss": 3.011841297149658, "step": 11287, "token_acc": 0.29837340221888314 }, { "epoch": 6.616827909703899, "grad_norm": 0.16073184456246847, "learning_rate": 0.0003997197665542377, "loss": 2.995091438293457, "step": 11288, "token_acc": 0.29715104668869113 }, { "epoch": 6.617414248021108, "grad_norm": 0.1605085025934662, "learning_rate": 0.0003997003613390436, "loss": 2.9804296493530273, "step": 11289, "token_acc": 0.30143200354049443 }, { "epoch": 6.618000586338317, "grad_norm": 0.17013224521671716, "learning_rate": 0.00039968095471761635, "loss": 2.9749085903167725, "step": 11290, "token_acc": 0.3008406498200409 }, { "epoch": 6.618586924655526, "grad_norm": 0.15732755057500827, "learning_rate": 0.0003996615466901381, "loss": 2.9856176376342773, "step": 11291, "token_acc": 0.3006596010795866 }, { "epoch": 6.619173262972735, "grad_norm": 0.17827725954953713, "learning_rate": 0.00039964213725679146, "loss": 3.006697416305542, "step": 11292, "token_acc": 0.29841368442223337 }, { "epoch": 6.619759601289944, "grad_norm": 0.17991959864122428, "learning_rate": 0.00039962272641775844, "loss": 2.9707937240600586, "step": 11293, "token_acc": 0.3033506285655283 }, { "epoch": 6.620345939607153, "grad_norm": 0.26187656411690885, "learning_rate": 0.00039960331417322157, "loss": 2.956144332885742, "step": 11294, "token_acc": 0.3027453274024821 }, { "epoch": 6.6209322779243625, "grad_norm": 0.34432967288291105, "learning_rate": 0.00039958390052336314, "loss": 3.0120749473571777, "step": 11295, "token_acc": 0.2960285941223193 }, { "epoch": 6.621518616241572, "grad_norm": 0.27889487123375595, "learning_rate": 0.0003995644854683655, "loss": 2.992389678955078, "step": 11296, "token_acc": 0.2992779372002196 }, { "epoch": 6.622104954558781, "grad_norm": 0.1681474203128766, "learning_rate": 0.00039954506900841114, "loss": 2.958573818206787, "step": 11297, "token_acc": 0.3053966607790075 }, { "epoch": 6.62269129287599, "grad_norm": 0.21352704113694201, "learning_rate": 0.00039952565114368234, "loss": 2.9816384315490723, "step": 11298, "token_acc": 0.3016189448625607 }, { "epoch": 6.623277631193199, "grad_norm": 0.2529299917132346, "learning_rate": 0.0003995062318743615, "loss": 2.9975266456604004, "step": 11299, "token_acc": 0.30087965963496477 }, { "epoch": 6.623863969510407, "grad_norm": 0.16246424444462493, "learning_rate": 0.0003994868112006312, "loss": 2.994394302368164, "step": 11300, "token_acc": 0.2990252859144457 }, { "epoch": 6.624450307827616, "grad_norm": 0.2585178836100943, "learning_rate": 0.0003994673891226737, "loss": 2.9406607151031494, "step": 11301, "token_acc": 0.30620253450950125 }, { "epoch": 6.625036646144825, "grad_norm": 0.1674858029010497, "learning_rate": 0.0003994479656406714, "loss": 2.989301919937134, "step": 11302, "token_acc": 0.2991464284770487 }, { "epoch": 6.6256229844620345, "grad_norm": 0.23907903713222117, "learning_rate": 0.00039942854075480683, "loss": 2.9460976123809814, "step": 11303, "token_acc": 0.3050778546945721 }, { "epoch": 6.626209322779244, "grad_norm": 0.1733634729341798, "learning_rate": 0.00039940911446526256, "loss": 2.92620587348938, "step": 11304, "token_acc": 0.30871569048196595 }, { "epoch": 6.626795661096453, "grad_norm": 0.22544127734143735, "learning_rate": 0.000399389686772221, "loss": 2.966820240020752, "step": 11305, "token_acc": 0.30217138981989855 }, { "epoch": 6.627381999413662, "grad_norm": 0.1933680911373531, "learning_rate": 0.0003993702576758646, "loss": 2.931497097015381, "step": 11306, "token_acc": 0.30754804309498396 }, { "epoch": 6.627968337730871, "grad_norm": 0.18020681447448758, "learning_rate": 0.00039935082717637593, "loss": 2.980104446411133, "step": 11307, "token_acc": 0.30057539123762084 }, { "epoch": 6.62855467604808, "grad_norm": 0.1731775260199237, "learning_rate": 0.00039933139527393744, "loss": 2.969069719314575, "step": 11308, "token_acc": 0.30227127110111535 }, { "epoch": 6.629141014365289, "grad_norm": 0.22069848635505473, "learning_rate": 0.0003993119619687318, "loss": 2.9962916374206543, "step": 11309, "token_acc": 0.2969698691411143 }, { "epoch": 6.629727352682497, "grad_norm": 0.16130669577377058, "learning_rate": 0.00039929252726094144, "loss": 2.911189079284668, "step": 11310, "token_acc": 0.3115277452140392 }, { "epoch": 6.6303136909997065, "grad_norm": 0.2199124118643532, "learning_rate": 0.00039927309115074896, "loss": 2.9559974670410156, "step": 11311, "token_acc": 0.30275293082557053 }, { "epoch": 6.630900029316916, "grad_norm": 0.15738936091451544, "learning_rate": 0.0003992536536383369, "loss": 2.9630136489868164, "step": 11312, "token_acc": 0.30459864709167883 }, { "epoch": 6.631486367634125, "grad_norm": 0.2545698903081588, "learning_rate": 0.00039923421472388786, "loss": 2.9613256454467773, "step": 11313, "token_acc": 0.30383339391080416 }, { "epoch": 6.632072705951334, "grad_norm": 0.1536925509587512, "learning_rate": 0.00039921477440758456, "loss": 2.9535117149353027, "step": 11314, "token_acc": 0.30526684098035173 }, { "epoch": 6.632659044268543, "grad_norm": 0.2261107277589054, "learning_rate": 0.0003991953326896095, "loss": 2.9762346744537354, "step": 11315, "token_acc": 0.30294161193777297 }, { "epoch": 6.633245382585752, "grad_norm": 0.16426183098494554, "learning_rate": 0.00039917588957014534, "loss": 2.9472107887268066, "step": 11316, "token_acc": 0.3050770758300525 }, { "epoch": 6.633831720902961, "grad_norm": 0.2145499042313142, "learning_rate": 0.0003991564450493747, "loss": 3.020759344100952, "step": 11317, "token_acc": 0.2951953845672771 }, { "epoch": 6.63441805922017, "grad_norm": 0.17814728411850064, "learning_rate": 0.00039913699912748026, "loss": 2.9859585762023926, "step": 11318, "token_acc": 0.29976791794776475 }, { "epoch": 6.635004397537379, "grad_norm": 0.1879720329454861, "learning_rate": 0.0003991175518046446, "loss": 2.9675140380859375, "step": 11319, "token_acc": 0.30041296412374174 }, { "epoch": 6.6355907358545885, "grad_norm": 0.18202018110081905, "learning_rate": 0.0003990981030810506, "loss": 2.9443275928497314, "step": 11320, "token_acc": 0.30576355617603196 }, { "epoch": 6.636177074171798, "grad_norm": 0.19244164916234927, "learning_rate": 0.0003990786529568807, "loss": 2.973818302154541, "step": 11321, "token_acc": 0.3025326994870142 }, { "epoch": 6.636763412489006, "grad_norm": 0.16071303484187896, "learning_rate": 0.0003990592014323179, "loss": 2.925814628601074, "step": 11322, "token_acc": 0.30808422550354236 }, { "epoch": 6.637349750806215, "grad_norm": 0.1599602395163467, "learning_rate": 0.00039903974850754464, "loss": 2.975641965866089, "step": 11323, "token_acc": 0.3022494388342611 }, { "epoch": 6.637936089123424, "grad_norm": 0.18456231069785628, "learning_rate": 0.00039902029418274385, "loss": 2.9288320541381836, "step": 11324, "token_acc": 0.30712462275682284 }, { "epoch": 6.638522427440633, "grad_norm": 0.15320914130243188, "learning_rate": 0.0003990008384580982, "loss": 2.9629385471343994, "step": 11325, "token_acc": 0.3028232192031953 }, { "epoch": 6.639108765757842, "grad_norm": 0.166298481540386, "learning_rate": 0.00039898138133379036, "loss": 2.9879112243652344, "step": 11326, "token_acc": 0.3004662579261225 }, { "epoch": 6.639695104075051, "grad_norm": 0.15404862208584968, "learning_rate": 0.0003989619228100033, "loss": 2.9541945457458496, "step": 11327, "token_acc": 0.3045424972067499 }, { "epoch": 6.6402814423922605, "grad_norm": 0.18990610287037202, "learning_rate": 0.0003989424628869196, "loss": 2.95762038230896, "step": 11328, "token_acc": 0.30331468126305516 }, { "epoch": 6.64086778070947, "grad_norm": 0.17273292187125022, "learning_rate": 0.0003989230015647223, "loss": 2.966157913208008, "step": 11329, "token_acc": 0.30294152878250497 }, { "epoch": 6.641454119026679, "grad_norm": 0.1789580370560568, "learning_rate": 0.00039890353884359397, "loss": 2.9753592014312744, "step": 11330, "token_acc": 0.3034994968026816 }, { "epoch": 6.642040457343887, "grad_norm": 0.18401936389355647, "learning_rate": 0.00039888407472371757, "loss": 2.939253807067871, "step": 11331, "token_acc": 0.3071318396904656 }, { "epoch": 6.642626795661096, "grad_norm": 0.18840871853493116, "learning_rate": 0.0003988646092052759, "loss": 2.992238998413086, "step": 11332, "token_acc": 0.2986090598362731 }, { "epoch": 6.643213133978305, "grad_norm": 0.16661194799982856, "learning_rate": 0.00039884514228845185, "loss": 2.9246954917907715, "step": 11333, "token_acc": 0.3085244200928477 }, { "epoch": 6.643799472295514, "grad_norm": 0.1909322920754988, "learning_rate": 0.0003988256739734283, "loss": 2.978854179382324, "step": 11334, "token_acc": 0.30011719232981576 }, { "epoch": 6.644385810612723, "grad_norm": 0.17486995596637397, "learning_rate": 0.00039880620426038804, "loss": 3.017129898071289, "step": 11335, "token_acc": 0.29652975464280873 }, { "epoch": 6.6449721489299325, "grad_norm": 0.22381391879663706, "learning_rate": 0.00039878673314951396, "loss": 2.954921245574951, "step": 11336, "token_acc": 0.30441744892324685 }, { "epoch": 6.645558487247142, "grad_norm": 0.2687697638986463, "learning_rate": 0.0003987672606409891, "loss": 3.00222110748291, "step": 11337, "token_acc": 0.2976112735893403 }, { "epoch": 6.646144825564351, "grad_norm": 0.18437348364461953, "learning_rate": 0.00039874778673499624, "loss": 2.9416041374206543, "step": 11338, "token_acc": 0.3054880932768599 }, { "epoch": 6.64673116388156, "grad_norm": 0.2450197236217183, "learning_rate": 0.0003987283114317184, "loss": 2.9084627628326416, "step": 11339, "token_acc": 0.3123429881246578 }, { "epoch": 6.647317502198769, "grad_norm": 0.2848056883882818, "learning_rate": 0.00039870883473133847, "loss": 2.9422760009765625, "step": 11340, "token_acc": 0.3083463187110111 }, { "epoch": 6.647903840515978, "grad_norm": 0.15853114272354413, "learning_rate": 0.00039868935663403944, "loss": 2.988128662109375, "step": 11341, "token_acc": 0.2999840155049602 }, { "epoch": 6.648490178833187, "grad_norm": 0.2322339602601575, "learning_rate": 0.0003986698771400042, "loss": 2.994713306427002, "step": 11342, "token_acc": 0.29932864525260017 }, { "epoch": 6.649076517150396, "grad_norm": 0.15229090812477258, "learning_rate": 0.0003986503962494159, "loss": 2.975273847579956, "step": 11343, "token_acc": 0.30006314800129236 }, { "epoch": 6.6496628554676045, "grad_norm": 0.21826083613405795, "learning_rate": 0.00039863091396245744, "loss": 2.985178232192993, "step": 11344, "token_acc": 0.3009278958092726 }, { "epoch": 6.650249193784814, "grad_norm": 0.17413434387675664, "learning_rate": 0.00039861143027931174, "loss": 2.9989888668060303, "step": 11345, "token_acc": 0.2982934276241239 }, { "epoch": 6.650835532102023, "grad_norm": 0.19602224656811673, "learning_rate": 0.00039859194520016196, "loss": 3.0043396949768066, "step": 11346, "token_acc": 0.29739406679702446 }, { "epoch": 6.651421870419232, "grad_norm": 0.16917231433944352, "learning_rate": 0.000398572458725191, "loss": 2.951287269592285, "step": 11347, "token_acc": 0.3036410231111365 }, { "epoch": 6.652008208736441, "grad_norm": 0.22558505873386944, "learning_rate": 0.0003985529708545821, "loss": 2.990481376647949, "step": 11348, "token_acc": 0.299524301536374 }, { "epoch": 6.65259454705365, "grad_norm": 0.17242011803185772, "learning_rate": 0.0003985334815885182, "loss": 2.975827693939209, "step": 11349, "token_acc": 0.30318547107962357 }, { "epoch": 6.653180885370859, "grad_norm": 0.22068679908703276, "learning_rate": 0.00039851399092718235, "loss": 3.0046346187591553, "step": 11350, "token_acc": 0.29883451805955763 }, { "epoch": 6.653767223688068, "grad_norm": 0.18977190717218675, "learning_rate": 0.00039849449887075774, "loss": 2.9638400077819824, "step": 11351, "token_acc": 0.30285654847811105 }, { "epoch": 6.654353562005277, "grad_norm": 0.18960398653368563, "learning_rate": 0.00039847500541942725, "loss": 2.977375030517578, "step": 11352, "token_acc": 0.30059698698381165 }, { "epoch": 6.654939900322486, "grad_norm": 0.17752790292338771, "learning_rate": 0.00039845551057337436, "loss": 3.014984607696533, "step": 11353, "token_acc": 0.29502940448675086 }, { "epoch": 6.655526238639695, "grad_norm": 0.20052522966708472, "learning_rate": 0.0003984360143327819, "loss": 2.9672741889953613, "step": 11354, "token_acc": 0.3029964262331566 }, { "epoch": 6.656112576956904, "grad_norm": 0.16946088297302775, "learning_rate": 0.00039841651669783314, "loss": 3.0039467811584473, "step": 11355, "token_acc": 0.29748688765885106 }, { "epoch": 6.656698915274113, "grad_norm": 0.25410664578718156, "learning_rate": 0.00039839701766871115, "loss": 2.969050168991089, "step": 11356, "token_acc": 0.3025026349720288 }, { "epoch": 6.657285253591322, "grad_norm": 0.1697919890376909, "learning_rate": 0.00039837751724559916, "loss": 2.998368501663208, "step": 11357, "token_acc": 0.2967860742708972 }, { "epoch": 6.657871591908531, "grad_norm": 0.20422285579647964, "learning_rate": 0.00039835801542868033, "loss": 2.9559884071350098, "step": 11358, "token_acc": 0.30283790567876445 }, { "epoch": 6.65845793022574, "grad_norm": 0.20112100628267332, "learning_rate": 0.00039833851221813795, "loss": 2.990658760070801, "step": 11359, "token_acc": 0.3000247181592494 }, { "epoch": 6.659044268542949, "grad_norm": 0.17427786204975684, "learning_rate": 0.0003983190076141551, "loss": 2.9911274909973145, "step": 11360, "token_acc": 0.2997285789762662 }, { "epoch": 6.6596306068601585, "grad_norm": 0.17571834224115132, "learning_rate": 0.00039829950161691496, "loss": 2.9850687980651855, "step": 11361, "token_acc": 0.30093128226413135 }, { "epoch": 6.660216945177368, "grad_norm": 0.16404291267475324, "learning_rate": 0.00039827999422660087, "loss": 2.948847532272339, "step": 11362, "token_acc": 0.30450124049935023 }, { "epoch": 6.660803283494577, "grad_norm": 0.15501037350033997, "learning_rate": 0.00039826048544339604, "loss": 2.962644100189209, "step": 11363, "token_acc": 0.30116452484607986 }, { "epoch": 6.661389621811786, "grad_norm": 0.16210604216854, "learning_rate": 0.00039824097526748375, "loss": 3.0023744106292725, "step": 11364, "token_acc": 0.29782760279563036 }, { "epoch": 6.661975960128994, "grad_norm": 0.16028527562722358, "learning_rate": 0.0003982214636990473, "loss": 2.9896163940429688, "step": 11365, "token_acc": 0.3004510015715946 }, { "epoch": 6.662562298446203, "grad_norm": 0.16284612716906804, "learning_rate": 0.0003982019507382698, "loss": 2.963650703430176, "step": 11366, "token_acc": 0.3016701254575262 }, { "epoch": 6.663148636763412, "grad_norm": 0.1600422740184805, "learning_rate": 0.0003981824363853348, "loss": 2.9712648391723633, "step": 11367, "token_acc": 0.30307501104585516 }, { "epoch": 6.663734975080621, "grad_norm": 0.17231644150423228, "learning_rate": 0.00039816292064042547, "loss": 2.9800655841827393, "step": 11368, "token_acc": 0.3003442501178118 }, { "epoch": 6.6643213133978305, "grad_norm": 0.1558288489373433, "learning_rate": 0.0003981434035037251, "loss": 2.972364902496338, "step": 11369, "token_acc": 0.30436563779385123 }, { "epoch": 6.66490765171504, "grad_norm": 0.1554496406919713, "learning_rate": 0.0003981238849754171, "loss": 2.961440324783325, "step": 11370, "token_acc": 0.30170340214878 }, { "epoch": 6.665493990032249, "grad_norm": 0.1633196544310385, "learning_rate": 0.00039810436505568483, "loss": 2.980769634246826, "step": 11371, "token_acc": 0.30040120489649424 }, { "epoch": 6.666080328349458, "grad_norm": 0.1472713446337543, "learning_rate": 0.0003980848437447116, "loss": 2.9376120567321777, "step": 11372, "token_acc": 0.30590338516151205 }, { "epoch": 6.666666666666667, "grad_norm": 0.15996808334395465, "learning_rate": 0.0003980653210426808, "loss": 2.995889663696289, "step": 11373, "token_acc": 0.30017272119456206 }, { "epoch": 6.667253004983876, "grad_norm": 0.16260672518631145, "learning_rate": 0.0003980457969497759, "loss": 2.931584358215332, "step": 11374, "token_acc": 0.3080101840462079 }, { "epoch": 6.667839343301084, "grad_norm": 0.14881729636078642, "learning_rate": 0.00039802627146618016, "loss": 2.9780993461608887, "step": 11375, "token_acc": 0.30363366045376367 }, { "epoch": 6.668425681618293, "grad_norm": 0.15577388953062649, "learning_rate": 0.0003980067445920771, "loss": 3.023399591445923, "step": 11376, "token_acc": 0.29595491513200944 }, { "epoch": 6.6690120199355025, "grad_norm": 0.17539517943152408, "learning_rate": 0.00039798721632765006, "loss": 2.9995086193084717, "step": 11377, "token_acc": 0.2980259470466337 }, { "epoch": 6.669598358252712, "grad_norm": 0.19779285818013395, "learning_rate": 0.0003979676866730826, "loss": 3.0004284381866455, "step": 11378, "token_acc": 0.29822697133976744 }, { "epoch": 6.670184696569921, "grad_norm": 0.22744218841690456, "learning_rate": 0.0003979481556285581, "loss": 2.9864578247070312, "step": 11379, "token_acc": 0.29997855614889357 }, { "epoch": 6.67077103488713, "grad_norm": 0.21881702236564107, "learning_rate": 0.00039792862319426006, "loss": 2.97074294090271, "step": 11380, "token_acc": 0.3022918260140806 }, { "epoch": 6.671357373204339, "grad_norm": 0.20744695640484034, "learning_rate": 0.00039790908937037184, "loss": 2.9663262367248535, "step": 11381, "token_acc": 0.3029587826313757 }, { "epoch": 6.671943711521548, "grad_norm": 0.16986284935226043, "learning_rate": 0.0003978895541570771, "loss": 2.9617903232574463, "step": 11382, "token_acc": 0.303431661524406 }, { "epoch": 6.672530049838757, "grad_norm": 0.1561473548659989, "learning_rate": 0.0003978700175545593, "loss": 2.980672836303711, "step": 11383, "token_acc": 0.2999207846790727 }, { "epoch": 6.673116388155966, "grad_norm": 0.2211981636040716, "learning_rate": 0.00039785047956300193, "loss": 2.962477207183838, "step": 11384, "token_acc": 0.3033675769972353 }, { "epoch": 6.673702726473175, "grad_norm": 0.17076739641469774, "learning_rate": 0.0003978309401825886, "loss": 2.986905574798584, "step": 11385, "token_acc": 0.2995022589375646 }, { "epoch": 6.6742890647903845, "grad_norm": 0.16765755074504907, "learning_rate": 0.00039781139941350263, "loss": 2.9670209884643555, "step": 11386, "token_acc": 0.30220433478415837 }, { "epoch": 6.674875403107593, "grad_norm": 0.24438881204459117, "learning_rate": 0.00039779185725592785, "loss": 2.9719505310058594, "step": 11387, "token_acc": 0.30192105661966984 }, { "epoch": 6.675461741424802, "grad_norm": 0.1833358863087755, "learning_rate": 0.00039777231371004775, "loss": 2.988298177719116, "step": 11388, "token_acc": 0.2985937907644143 }, { "epoch": 6.676048079742011, "grad_norm": 0.16750504086245546, "learning_rate": 0.00039775276877604583, "loss": 2.994284152984619, "step": 11389, "token_acc": 0.2981171602493125 }, { "epoch": 6.67663441805922, "grad_norm": 0.24000613632202555, "learning_rate": 0.0003977332224541058, "loss": 2.9887194633483887, "step": 11390, "token_acc": 0.3011829783156135 }, { "epoch": 6.677220756376429, "grad_norm": 0.1452197601517279, "learning_rate": 0.0003977136747444112, "loss": 2.9103033542633057, "step": 11391, "token_acc": 0.3108374664775257 }, { "epoch": 6.677807094693638, "grad_norm": 0.2366979988530175, "learning_rate": 0.0003976941256471456, "loss": 2.986128091812134, "step": 11392, "token_acc": 0.2986735725789623 }, { "epoch": 6.678393433010847, "grad_norm": 0.17994698541319323, "learning_rate": 0.00039767457516249276, "loss": 2.9973793029785156, "step": 11393, "token_acc": 0.2990064299287857 }, { "epoch": 6.6789797713280565, "grad_norm": 0.16438750332071767, "learning_rate": 0.00039765502329063636, "loss": 2.9811553955078125, "step": 11394, "token_acc": 0.30108103278834986 }, { "epoch": 6.679566109645266, "grad_norm": 0.1549691251026536, "learning_rate": 0.0003976354700317599, "loss": 2.9553847312927246, "step": 11395, "token_acc": 0.3041669602545271 }, { "epoch": 6.680152447962474, "grad_norm": 0.17816800106733896, "learning_rate": 0.0003976159153860471, "loss": 2.9815449714660645, "step": 11396, "token_acc": 0.30144271720613286 }, { "epoch": 6.680738786279683, "grad_norm": 0.2013883068736934, "learning_rate": 0.00039759635935368175, "loss": 2.9550509452819824, "step": 11397, "token_acc": 0.30500478985034735 }, { "epoch": 6.681325124596892, "grad_norm": 0.1561848370858565, "learning_rate": 0.0003975768019348475, "loss": 3.0009632110595703, "step": 11398, "token_acc": 0.2975030642069204 }, { "epoch": 6.681911462914101, "grad_norm": 0.2138414501531232, "learning_rate": 0.0003975572431297281, "loss": 3.0197954177856445, "step": 11399, "token_acc": 0.29623594893372707 }, { "epoch": 6.68249780123131, "grad_norm": 0.19750749170737644, "learning_rate": 0.0003975376829385071, "loss": 2.9644269943237305, "step": 11400, "token_acc": 0.30316190879720356 }, { "epoch": 6.683084139548519, "grad_norm": 0.1834038092913836, "learning_rate": 0.00039751812136136846, "loss": 2.9684314727783203, "step": 11401, "token_acc": 0.30197478591694943 }, { "epoch": 6.6836704778657285, "grad_norm": 0.225212574135076, "learning_rate": 0.00039749855839849593, "loss": 2.990743637084961, "step": 11402, "token_acc": 0.30024662055665785 }, { "epoch": 6.684256816182938, "grad_norm": 0.15930740786059144, "learning_rate": 0.0003974789940500731, "loss": 2.979135036468506, "step": 11403, "token_acc": 0.302282593547946 }, { "epoch": 6.684843154500147, "grad_norm": 0.2571862811236799, "learning_rate": 0.0003974594283162839, "loss": 2.9956653118133545, "step": 11404, "token_acc": 0.2995337103409694 }, { "epoch": 6.685429492817356, "grad_norm": 0.2809083945566357, "learning_rate": 0.000397439861197312, "loss": 2.9527673721313477, "step": 11405, "token_acc": 0.30408394811007955 }, { "epoch": 6.686015831134565, "grad_norm": 0.1785346315351098, "learning_rate": 0.00039742029269334135, "loss": 3.017627716064453, "step": 11406, "token_acc": 0.29444740785897533 }, { "epoch": 6.686602169451774, "grad_norm": 0.2896679081758272, "learning_rate": 0.00039740072280455564, "loss": 2.9676661491394043, "step": 11407, "token_acc": 0.3033405242180884 }, { "epoch": 6.687188507768982, "grad_norm": 0.15761621473427762, "learning_rate": 0.00039738115153113886, "loss": 2.9629921913146973, "step": 11408, "token_acc": 0.30046340868538274 }, { "epoch": 6.687774846086191, "grad_norm": 0.18653703652886608, "learning_rate": 0.00039736157887327467, "loss": 2.962462902069092, "step": 11409, "token_acc": 0.301782415447265 }, { "epoch": 6.6883611844034006, "grad_norm": 0.15088859435176058, "learning_rate": 0.00039734200483114706, "loss": 2.993987560272217, "step": 11410, "token_acc": 0.29966419296397084 }, { "epoch": 6.68894752272061, "grad_norm": 0.1656135642445275, "learning_rate": 0.00039732242940493986, "loss": 2.950441837310791, "step": 11411, "token_acc": 0.3054687846016366 }, { "epoch": 6.689533861037819, "grad_norm": 0.1504809502385022, "learning_rate": 0.0003973028525948369, "loss": 2.975975751876831, "step": 11412, "token_acc": 0.30017452921661014 }, { "epoch": 6.690120199355028, "grad_norm": 0.15898046842313324, "learning_rate": 0.00039728327440102226, "loss": 2.977590560913086, "step": 11413, "token_acc": 0.29912872859779027 }, { "epoch": 6.690706537672237, "grad_norm": 0.14026560960836, "learning_rate": 0.0003972636948236796, "loss": 2.970689058303833, "step": 11414, "token_acc": 0.30247910090177166 }, { "epoch": 6.691292875989446, "grad_norm": 0.1626919012453347, "learning_rate": 0.00039724411386299303, "loss": 2.990302085876465, "step": 11415, "token_acc": 0.2994936007594644 }, { "epoch": 6.691879214306655, "grad_norm": 0.15417430579077235, "learning_rate": 0.0003972245315191464, "loss": 2.999255418777466, "step": 11416, "token_acc": 0.2988714513846722 }, { "epoch": 6.692465552623864, "grad_norm": 0.15001454997459318, "learning_rate": 0.0003972049477923237, "loss": 2.952442169189453, "step": 11417, "token_acc": 0.305041956430718 }, { "epoch": 6.693051890941073, "grad_norm": 0.1615351487815618, "learning_rate": 0.0003971853626827089, "loss": 3.0144004821777344, "step": 11418, "token_acc": 0.2963099640323229 }, { "epoch": 6.693638229258282, "grad_norm": 0.16516916460729322, "learning_rate": 0.0003971657761904859, "loss": 3.0056591033935547, "step": 11419, "token_acc": 0.29687299241937554 }, { "epoch": 6.694224567575491, "grad_norm": 0.14095426621359747, "learning_rate": 0.0003971461883158387, "loss": 2.9287383556365967, "step": 11420, "token_acc": 0.3088137437154083 }, { "epoch": 6.6948109058927, "grad_norm": 0.17412852894989478, "learning_rate": 0.00039712659905895146, "loss": 2.952803611755371, "step": 11421, "token_acc": 0.3043351546356564 }, { "epoch": 6.695397244209909, "grad_norm": 0.1768829543387441, "learning_rate": 0.000397107008420008, "loss": 3.008906602859497, "step": 11422, "token_acc": 0.2972392989219714 }, { "epoch": 6.695983582527118, "grad_norm": 0.1868379971798176, "learning_rate": 0.00039708741639919243, "loss": 2.9618592262268066, "step": 11423, "token_acc": 0.3016537881880913 }, { "epoch": 6.696569920844327, "grad_norm": 0.20008529600607577, "learning_rate": 0.0003970678229966889, "loss": 2.927588701248169, "step": 11424, "token_acc": 0.30641786307913266 }, { "epoch": 6.697156259161536, "grad_norm": 0.17699882457841284, "learning_rate": 0.00039704822821268117, "loss": 2.9790148735046387, "step": 11425, "token_acc": 0.29897127426445186 }, { "epoch": 6.6977425974787455, "grad_norm": 0.16613557546879118, "learning_rate": 0.0003970286320473536, "loss": 2.9229955673217773, "step": 11426, "token_acc": 0.3096192537109207 }, { "epoch": 6.698328935795955, "grad_norm": 0.20456499536036857, "learning_rate": 0.0003970090345008901, "loss": 3.0020337104797363, "step": 11427, "token_acc": 0.2974807914511389 }, { "epoch": 6.698915274113164, "grad_norm": 0.16381672051835902, "learning_rate": 0.0003969894355734749, "loss": 2.98104190826416, "step": 11428, "token_acc": 0.29981165754315425 }, { "epoch": 6.699501612430373, "grad_norm": 0.17656741880838678, "learning_rate": 0.00039696983526529194, "loss": 2.944035053253174, "step": 11429, "token_acc": 0.3049011902669052 }, { "epoch": 6.700087950747581, "grad_norm": 0.1954456924887954, "learning_rate": 0.0003969502335765254, "loss": 2.976634979248047, "step": 11430, "token_acc": 0.30155167975119035 }, { "epoch": 6.70067428906479, "grad_norm": 0.17754278379781377, "learning_rate": 0.0003969306305073595, "loss": 2.956234931945801, "step": 11431, "token_acc": 0.3039499824391604 }, { "epoch": 6.701260627381999, "grad_norm": 0.16416145261026252, "learning_rate": 0.0003969110260579783, "loss": 2.9606378078460693, "step": 11432, "token_acc": 0.3047588484629682 }, { "epoch": 6.701846965699208, "grad_norm": 0.19104054260369127, "learning_rate": 0.00039689142022856594, "loss": 2.939675807952881, "step": 11433, "token_acc": 0.3061887261140012 }, { "epoch": 6.7024333040164175, "grad_norm": 0.21902285019659587, "learning_rate": 0.0003968718130193067, "loss": 2.9641435146331787, "step": 11434, "token_acc": 0.3026163738934081 }, { "epoch": 6.703019642333627, "grad_norm": 0.1963317144635166, "learning_rate": 0.0003968522044303846, "loss": 3.012782096862793, "step": 11435, "token_acc": 0.2969321231233732 }, { "epoch": 6.703605980650836, "grad_norm": 0.1579879993421375, "learning_rate": 0.000396832594461984, "loss": 2.9762790203094482, "step": 11436, "token_acc": 0.30051967680657565 }, { "epoch": 6.704192318968045, "grad_norm": 0.15545930764846078, "learning_rate": 0.00039681298311428905, "loss": 2.9659769535064697, "step": 11437, "token_acc": 0.30327344823748353 }, { "epoch": 6.704778657285254, "grad_norm": 0.15206612055060373, "learning_rate": 0.00039679337038748386, "loss": 2.955127000808716, "step": 11438, "token_acc": 0.3046055147215023 }, { "epoch": 6.705364995602462, "grad_norm": 0.16162466106772894, "learning_rate": 0.00039677375628175283, "loss": 2.95009446144104, "step": 11439, "token_acc": 0.30425929773530025 }, { "epoch": 6.705951333919671, "grad_norm": 0.167058006112606, "learning_rate": 0.0003967541407972801, "loss": 2.9866719245910645, "step": 11440, "token_acc": 0.2992782761957234 }, { "epoch": 6.70653767223688, "grad_norm": 0.15972261804088006, "learning_rate": 0.00039673452393425003, "loss": 2.982419013977051, "step": 11441, "token_acc": 0.3008372280169612 }, { "epoch": 6.7071240105540895, "grad_norm": 0.20851651944220445, "learning_rate": 0.0003967149056928468, "loss": 2.9721455574035645, "step": 11442, "token_acc": 0.29992864935293234 }, { "epoch": 6.707710348871299, "grad_norm": 0.18897336229577827, "learning_rate": 0.0003966952860732548, "loss": 2.9999399185180664, "step": 11443, "token_acc": 0.29903194231626634 }, { "epoch": 6.708296687188508, "grad_norm": 0.16063842874759024, "learning_rate": 0.00039667566507565815, "loss": 2.998539447784424, "step": 11444, "token_acc": 0.29909668785547006 }, { "epoch": 6.708883025505717, "grad_norm": 0.1648507483099146, "learning_rate": 0.0003966560427002413, "loss": 2.9928231239318848, "step": 11445, "token_acc": 0.2979166453487777 }, { "epoch": 6.709469363822926, "grad_norm": 0.15133694221869654, "learning_rate": 0.0003966364189471886, "loss": 3.037770986557007, "step": 11446, "token_acc": 0.29101974974478334 }, { "epoch": 6.710055702140135, "grad_norm": 0.1604097305553905, "learning_rate": 0.00039661679381668423, "loss": 2.9558980464935303, "step": 11447, "token_acc": 0.3039214432752426 }, { "epoch": 6.710642040457344, "grad_norm": 0.15208083009011727, "learning_rate": 0.0003965971673089128, "loss": 2.977978229522705, "step": 11448, "token_acc": 0.3009759593458857 }, { "epoch": 6.711228378774553, "grad_norm": 0.16365484244495107, "learning_rate": 0.0003965775394240585, "loss": 2.959310531616211, "step": 11449, "token_acc": 0.3042475737738576 }, { "epoch": 6.711814717091762, "grad_norm": 0.16650873266645058, "learning_rate": 0.0003965579101623056, "loss": 2.9658875465393066, "step": 11450, "token_acc": 0.30290446477833366 }, { "epoch": 6.7124010554089715, "grad_norm": 0.16173322458223713, "learning_rate": 0.00039653827952383874, "loss": 2.989811897277832, "step": 11451, "token_acc": 0.30018939636635766 }, { "epoch": 6.71298739372618, "grad_norm": 0.16087964543478805, "learning_rate": 0.00039651864750884217, "loss": 2.979311466217041, "step": 11452, "token_acc": 0.2996406480977204 }, { "epoch": 6.713573732043389, "grad_norm": 0.2318406177786617, "learning_rate": 0.0003964990141175004, "loss": 2.996365785598755, "step": 11453, "token_acc": 0.29877736079296663 }, { "epoch": 6.714160070360598, "grad_norm": 0.2574949755184213, "learning_rate": 0.00039647937934999774, "loss": 2.964022636413574, "step": 11454, "token_acc": 0.3014292349545485 }, { "epoch": 6.714746408677807, "grad_norm": 0.266753159723662, "learning_rate": 0.0003964597432065187, "loss": 2.981719493865967, "step": 11455, "token_acc": 0.3006012913534669 }, { "epoch": 6.715332746995016, "grad_norm": 0.16797089260512005, "learning_rate": 0.00039644010568724776, "loss": 2.9960997104644775, "step": 11456, "token_acc": 0.2992775921149497 }, { "epoch": 6.715919085312225, "grad_norm": 0.25923051664864993, "learning_rate": 0.00039642046679236933, "loss": 2.977847099304199, "step": 11457, "token_acc": 0.30059645500265775 }, { "epoch": 6.716505423629434, "grad_norm": 0.2558621824881665, "learning_rate": 0.0003964008265220679, "loss": 2.955122470855713, "step": 11458, "token_acc": 0.3035966593830851 }, { "epoch": 6.7170917619466435, "grad_norm": 0.15510102955938807, "learning_rate": 0.000396381184876528, "loss": 2.915142059326172, "step": 11459, "token_acc": 0.309123970478126 }, { "epoch": 6.717678100263853, "grad_norm": 0.19013991032479824, "learning_rate": 0.0003963615418559341, "loss": 2.975236415863037, "step": 11460, "token_acc": 0.3014166253617511 }, { "epoch": 6.718264438581061, "grad_norm": 0.15361337894880858, "learning_rate": 0.00039634189746047077, "loss": 2.9246745109558105, "step": 11461, "token_acc": 0.30814509271631607 }, { "epoch": 6.71885077689827, "grad_norm": 0.1686916331906177, "learning_rate": 0.00039632225169032256, "loss": 2.9644534587860107, "step": 11462, "token_acc": 0.30389152218952553 }, { "epoch": 6.719437115215479, "grad_norm": 0.1792400260994154, "learning_rate": 0.0003963026045456739, "loss": 3.0062899589538574, "step": 11463, "token_acc": 0.29789639279982927 }, { "epoch": 6.720023453532688, "grad_norm": 0.17703563605682096, "learning_rate": 0.0003962829560267094, "loss": 2.9580507278442383, "step": 11464, "token_acc": 0.30488392704429895 }, { "epoch": 6.720609791849897, "grad_norm": 0.1700551582338296, "learning_rate": 0.0003962633061336137, "loss": 2.997164487838745, "step": 11465, "token_acc": 0.2995014188765867 }, { "epoch": 6.721196130167106, "grad_norm": 0.16480213493276874, "learning_rate": 0.0003962436548665713, "loss": 2.9949779510498047, "step": 11466, "token_acc": 0.2983558091418955 }, { "epoch": 6.7217824684843155, "grad_norm": 0.18174588841154093, "learning_rate": 0.0003962240022257668, "loss": 2.96295166015625, "step": 11467, "token_acc": 0.3032173910733404 }, { "epoch": 6.722368806801525, "grad_norm": 0.16523648525800896, "learning_rate": 0.0003962043482113849, "loss": 2.9636545181274414, "step": 11468, "token_acc": 0.30239857535340375 }, { "epoch": 6.722955145118734, "grad_norm": 0.1743742593603934, "learning_rate": 0.0003961846928236101, "loss": 2.9745452404022217, "step": 11469, "token_acc": 0.30285229301726 }, { "epoch": 6.723541483435943, "grad_norm": 0.21039738554492693, "learning_rate": 0.00039616503606262714, "loss": 2.9557456970214844, "step": 11470, "token_acc": 0.304314615123251 }, { "epoch": 6.724127821753152, "grad_norm": 0.17541357991042905, "learning_rate": 0.0003961453779286206, "loss": 2.9444963932037354, "step": 11471, "token_acc": 0.30677319648376605 }, { "epoch": 6.724714160070361, "grad_norm": 0.17228360511669832, "learning_rate": 0.00039612571842177524, "loss": 2.9841527938842773, "step": 11472, "token_acc": 0.3007851984619072 }, { "epoch": 6.725300498387569, "grad_norm": 0.158510852369442, "learning_rate": 0.0003961060575422756, "loss": 2.9433116912841797, "step": 11473, "token_acc": 0.30456452566665726 }, { "epoch": 6.725886836704778, "grad_norm": 0.16876547034241407, "learning_rate": 0.00039608639529030643, "loss": 2.928025960922241, "step": 11474, "token_acc": 0.3088698306751006 }, { "epoch": 6.7264731750219875, "grad_norm": 0.15773256534309038, "learning_rate": 0.00039606673166605244, "loss": 2.9357941150665283, "step": 11475, "token_acc": 0.30644267155149557 }, { "epoch": 6.727059513339197, "grad_norm": 0.16713640074407096, "learning_rate": 0.00039604706666969837, "loss": 2.9290642738342285, "step": 11476, "token_acc": 0.3068775673430087 }, { "epoch": 6.727645851656406, "grad_norm": 0.19725760102992, "learning_rate": 0.00039602740030142885, "loss": 3.0053625106811523, "step": 11477, "token_acc": 0.2973092613258336 }, { "epoch": 6.728232189973615, "grad_norm": 0.20321797163338054, "learning_rate": 0.0003960077325614287, "loss": 2.948953628540039, "step": 11478, "token_acc": 0.3040985270797834 }, { "epoch": 6.728818528290824, "grad_norm": 0.17178196383917113, "learning_rate": 0.00039598806344988267, "loss": 2.996030807495117, "step": 11479, "token_acc": 0.2981013322144131 }, { "epoch": 6.729404866608033, "grad_norm": 0.14375899509215215, "learning_rate": 0.00039596839296697543, "loss": 2.9461045265197754, "step": 11480, "token_acc": 0.30591016548463357 }, { "epoch": 6.729991204925242, "grad_norm": 0.15202292356686473, "learning_rate": 0.0003959487211128919, "loss": 2.9922120571136475, "step": 11481, "token_acc": 0.2987676769831282 }, { "epoch": 6.730577543242451, "grad_norm": 0.16693567369714019, "learning_rate": 0.0003959290478878168, "loss": 3.006425142288208, "step": 11482, "token_acc": 0.29705168626201256 }, { "epoch": 6.7311638815596595, "grad_norm": 0.18321554020803385, "learning_rate": 0.0003959093732919349, "loss": 2.9566164016723633, "step": 11483, "token_acc": 0.304208504118283 }, { "epoch": 6.731750219876869, "grad_norm": 0.1661013570498967, "learning_rate": 0.00039588969732543114, "loss": 3.0147783756256104, "step": 11484, "token_acc": 0.29578203471000825 }, { "epoch": 6.732336558194078, "grad_norm": 0.15822077486735442, "learning_rate": 0.00039587001998849013, "loss": 2.9752917289733887, "step": 11485, "token_acc": 0.30153773287844127 }, { "epoch": 6.732922896511287, "grad_norm": 0.1458516837995497, "learning_rate": 0.00039585034128129695, "loss": 2.9652607440948486, "step": 11486, "token_acc": 0.3023669550483917 }, { "epoch": 6.733509234828496, "grad_norm": 0.14388629138859232, "learning_rate": 0.0003958306612040363, "loss": 2.9546666145324707, "step": 11487, "token_acc": 0.3046769260281564 }, { "epoch": 6.734095573145705, "grad_norm": 0.15247590995264726, "learning_rate": 0.00039581097975689315, "loss": 2.983140230178833, "step": 11488, "token_acc": 0.30046715427580645 }, { "epoch": 6.734681911462914, "grad_norm": 0.15429800170821226, "learning_rate": 0.0003957912969400523, "loss": 2.998532772064209, "step": 11489, "token_acc": 0.29811450245706683 }, { "epoch": 6.735268249780123, "grad_norm": 0.16346473061707514, "learning_rate": 0.00039577161275369864, "loss": 3.0059261322021484, "step": 11490, "token_acc": 0.2980360893495025 }, { "epoch": 6.735854588097332, "grad_norm": 0.17205259323937172, "learning_rate": 0.0003957519271980171, "loss": 2.9583284854888916, "step": 11491, "token_acc": 0.3044184664156925 }, { "epoch": 6.7364409264145415, "grad_norm": 0.1858788997640012, "learning_rate": 0.0003957322402731927, "loss": 2.9780406951904297, "step": 11492, "token_acc": 0.303267770019188 }, { "epoch": 6.737027264731751, "grad_norm": 0.17703923960991277, "learning_rate": 0.00039571255197941025, "loss": 3.0128934383392334, "step": 11493, "token_acc": 0.2966084083764054 }, { "epoch": 6.73761360304896, "grad_norm": 0.16588750906991814, "learning_rate": 0.00039569286231685465, "loss": 3.007120370864868, "step": 11494, "token_acc": 0.2973906142017968 }, { "epoch": 6.738199941366168, "grad_norm": 0.16717041694970475, "learning_rate": 0.000395673171285711, "loss": 2.9930107593536377, "step": 11495, "token_acc": 0.2987991391495919 }, { "epoch": 6.738786279683377, "grad_norm": 0.19841039212529712, "learning_rate": 0.00039565347888616416, "loss": 2.955406904220581, "step": 11496, "token_acc": 0.30562690384962377 }, { "epoch": 6.739372618000586, "grad_norm": 0.2350716724381974, "learning_rate": 0.0003956337851183992, "loss": 2.97166109085083, "step": 11497, "token_acc": 0.30379756885723586 }, { "epoch": 6.739958956317795, "grad_norm": 0.2070905162978886, "learning_rate": 0.0003956140899826011, "loss": 2.985978841781616, "step": 11498, "token_acc": 0.3004621817154381 }, { "epoch": 6.740545294635004, "grad_norm": 0.18754222510520704, "learning_rate": 0.0003955943934789547, "loss": 3.021526575088501, "step": 11499, "token_acc": 0.2949423911811455 }, { "epoch": 6.7411316329522135, "grad_norm": 0.23048321471820848, "learning_rate": 0.00039557469560764526, "loss": 2.950100898742676, "step": 11500, "token_acc": 0.30496787057342795 }, { "epoch": 6.741717971269423, "grad_norm": 0.2648172322573145, "learning_rate": 0.0003955549963688577, "loss": 2.952327013015747, "step": 11501, "token_acc": 0.30422020640447156 }, { "epoch": 6.742304309586632, "grad_norm": 0.18847791818133786, "learning_rate": 0.00039553529576277714, "loss": 2.961784839630127, "step": 11502, "token_acc": 0.3025832109771942 }, { "epoch": 6.742890647903841, "grad_norm": 0.21313476982150964, "learning_rate": 0.00039551559378958855, "loss": 2.955805778503418, "step": 11503, "token_acc": 0.3048662421737442 }, { "epoch": 6.743476986221049, "grad_norm": 0.20869158747076305, "learning_rate": 0.000395495890449477, "loss": 2.98655366897583, "step": 11504, "token_acc": 0.30046692646803114 }, { "epoch": 6.744063324538258, "grad_norm": 0.17893776640727163, "learning_rate": 0.0003954761857426277, "loss": 2.99147891998291, "step": 11505, "token_acc": 0.3008376215218413 }, { "epoch": 6.744649662855467, "grad_norm": 0.20607404808495444, "learning_rate": 0.0003954564796692256, "loss": 3.026346206665039, "step": 11506, "token_acc": 0.29349765680152756 }, { "epoch": 6.745236001172676, "grad_norm": 0.18787202441424802, "learning_rate": 0.0003954367722294559, "loss": 2.984320640563965, "step": 11507, "token_acc": 0.30087749726986573 }, { "epoch": 6.7458223394898855, "grad_norm": 0.23308987392576105, "learning_rate": 0.0003954170634235037, "loss": 3.021008014678955, "step": 11508, "token_acc": 0.29556874419310186 }, { "epoch": 6.746408677807095, "grad_norm": 0.1732800854431111, "learning_rate": 0.0003953973532515541, "loss": 3.00510311126709, "step": 11509, "token_acc": 0.29536773313301146 }, { "epoch": 6.746995016124304, "grad_norm": 0.19131957897508126, "learning_rate": 0.0003953776417137924, "loss": 2.994755268096924, "step": 11510, "token_acc": 0.29816661061904387 }, { "epoch": 6.747581354441513, "grad_norm": 0.15674542219896306, "learning_rate": 0.0003953579288104036, "loss": 2.981248378753662, "step": 11511, "token_acc": 0.30045408015958425 }, { "epoch": 6.748167692758722, "grad_norm": 0.16626129748061105, "learning_rate": 0.00039533821454157294, "loss": 2.973879337310791, "step": 11512, "token_acc": 0.30147481627155315 }, { "epoch": 6.748754031075931, "grad_norm": 0.148490137085822, "learning_rate": 0.0003953184989074855, "loss": 2.9606704711914062, "step": 11513, "token_acc": 0.30443997394739053 }, { "epoch": 6.74934036939314, "grad_norm": 0.15796892771559037, "learning_rate": 0.0003952987819083267, "loss": 2.9425177574157715, "step": 11514, "token_acc": 0.3068812953026452 }, { "epoch": 6.749926707710349, "grad_norm": 0.16356087009431436, "learning_rate": 0.00039527906354428155, "loss": 3.005080223083496, "step": 11515, "token_acc": 0.2985207526970271 }, { "epoch": 6.7505130460275575, "grad_norm": 0.1810867579875682, "learning_rate": 0.00039525934381553547, "loss": 2.9707722663879395, "step": 11516, "token_acc": 0.30112775034650036 }, { "epoch": 6.751099384344767, "grad_norm": 0.18581876382224982, "learning_rate": 0.0003952396227222735, "loss": 2.969979763031006, "step": 11517, "token_acc": 0.3012135738067609 }, { "epoch": 6.751685722661976, "grad_norm": 0.19218717805482774, "learning_rate": 0.00039521990026468104, "loss": 2.924926280975342, "step": 11518, "token_acc": 0.30899103816848655 }, { "epoch": 6.752272060979185, "grad_norm": 0.21361703522535758, "learning_rate": 0.00039520017644294333, "loss": 2.9774489402770996, "step": 11519, "token_acc": 0.3032800386963513 }, { "epoch": 6.752858399296394, "grad_norm": 0.16581166326799177, "learning_rate": 0.00039518045125724557, "loss": 2.996628522872925, "step": 11520, "token_acc": 0.2984814936128398 }, { "epoch": 6.753444737613603, "grad_norm": 0.17277259045512303, "learning_rate": 0.0003951607247077731, "loss": 3.0042214393615723, "step": 11521, "token_acc": 0.2994068323062769 }, { "epoch": 6.754031075930812, "grad_norm": 0.1700588335449712, "learning_rate": 0.00039514099679471127, "loss": 2.945157527923584, "step": 11522, "token_acc": 0.30562858896134026 }, { "epoch": 6.754617414248021, "grad_norm": 0.16114751493813664, "learning_rate": 0.0003951212675182453, "loss": 2.983963966369629, "step": 11523, "token_acc": 0.29970211490459986 }, { "epoch": 6.75520375256523, "grad_norm": 0.16061151200255297, "learning_rate": 0.0003951015368785607, "loss": 2.9542856216430664, "step": 11524, "token_acc": 0.3044669873842855 }, { "epoch": 6.7557900908824395, "grad_norm": 0.16263681074630146, "learning_rate": 0.0003950818048758425, "loss": 2.9794771671295166, "step": 11525, "token_acc": 0.3008689469116309 }, { "epoch": 6.756376429199648, "grad_norm": 0.15136003354234842, "learning_rate": 0.0003950620715102764, "loss": 2.953697681427002, "step": 11526, "token_acc": 0.3025988219504534 }, { "epoch": 6.756962767516857, "grad_norm": 0.15278856869202703, "learning_rate": 0.00039504233678204747, "loss": 2.9756898880004883, "step": 11527, "token_acc": 0.30330868162238106 }, { "epoch": 6.757549105834066, "grad_norm": 0.15290740658849838, "learning_rate": 0.0003950226006913413, "loss": 2.949049711227417, "step": 11528, "token_acc": 0.30562880311317336 }, { "epoch": 6.758135444151275, "grad_norm": 0.14819716119841855, "learning_rate": 0.0003950028632383432, "loss": 2.9888970851898193, "step": 11529, "token_acc": 0.30025993899287573 }, { "epoch": 6.758721782468484, "grad_norm": 0.17918413291684476, "learning_rate": 0.0003949831244232387, "loss": 2.99914813041687, "step": 11530, "token_acc": 0.2990253164222696 }, { "epoch": 6.759308120785693, "grad_norm": 0.20739440949144017, "learning_rate": 0.00039496338424621305, "loss": 2.957144021987915, "step": 11531, "token_acc": 0.30406981117881593 }, { "epoch": 6.759894459102902, "grad_norm": 0.20594981256943412, "learning_rate": 0.0003949436427074517, "loss": 2.9398584365844727, "step": 11532, "token_acc": 0.30769919627465464 }, { "epoch": 6.7604807974201115, "grad_norm": 0.17405599879909062, "learning_rate": 0.0003949238998071402, "loss": 2.9859185218811035, "step": 11533, "token_acc": 0.3007505533366961 }, { "epoch": 6.761067135737321, "grad_norm": 0.14852573463486676, "learning_rate": 0.0003949041555454639, "loss": 3.0099306106567383, "step": 11534, "token_acc": 0.2969432544930015 }, { "epoch": 6.76165347405453, "grad_norm": 0.184693955571649, "learning_rate": 0.0003948844099226083, "loss": 2.971446990966797, "step": 11535, "token_acc": 0.30301257149927435 }, { "epoch": 6.762239812371739, "grad_norm": 0.2049823120662438, "learning_rate": 0.000394864662938759, "loss": 2.9630303382873535, "step": 11536, "token_acc": 0.30290180972797665 }, { "epoch": 6.762826150688948, "grad_norm": 0.19537275938541532, "learning_rate": 0.00039484491459410134, "loss": 2.9980673789978027, "step": 11537, "token_acc": 0.2970245327668203 }, { "epoch": 6.763412489006156, "grad_norm": 0.15916917749864046, "learning_rate": 0.0003948251648888208, "loss": 2.944810628890991, "step": 11538, "token_acc": 0.30536347438411177 }, { "epoch": 6.763998827323365, "grad_norm": 0.24929323312007814, "learning_rate": 0.0003948054138231031, "loss": 2.981698513031006, "step": 11539, "token_acc": 0.3004834753940174 }, { "epoch": 6.764585165640574, "grad_norm": 0.3133734272944794, "learning_rate": 0.00039478566139713366, "loss": 2.981476306915283, "step": 11540, "token_acc": 0.3009886555434209 }, { "epoch": 6.7651715039577835, "grad_norm": 0.17613963765919682, "learning_rate": 0.00039476590761109803, "loss": 2.9939332008361816, "step": 11541, "token_acc": 0.2970473123728509 }, { "epoch": 6.765757842274993, "grad_norm": 0.23323969381611873, "learning_rate": 0.0003947461524651818, "loss": 2.9460811614990234, "step": 11542, "token_acc": 0.3057751831887476 }, { "epoch": 6.766344180592202, "grad_norm": 0.23846156732643098, "learning_rate": 0.00039472639595957044, "loss": 3.023005962371826, "step": 11543, "token_acc": 0.2926903748397474 }, { "epoch": 6.766930518909411, "grad_norm": 0.1715534130041322, "learning_rate": 0.0003947066380944496, "loss": 3.0024967193603516, "step": 11544, "token_acc": 0.2988230434572696 }, { "epoch": 6.76751685722662, "grad_norm": 0.22083473630112432, "learning_rate": 0.000394686878870005, "loss": 2.989177942276001, "step": 11545, "token_acc": 0.2996123033364435 }, { "epoch": 6.768103195543829, "grad_norm": 0.16734941858278113, "learning_rate": 0.0003946671182864221, "loss": 2.9711222648620605, "step": 11546, "token_acc": 0.3028971384418488 }, { "epoch": 6.768689533861037, "grad_norm": 0.2169538768258249, "learning_rate": 0.0003946473563438865, "loss": 3.0118746757507324, "step": 11547, "token_acc": 0.2985385244943295 }, { "epoch": 6.7692758721782464, "grad_norm": 0.1511884384981121, "learning_rate": 0.00039462759304258394, "loss": 3.0192019939422607, "step": 11548, "token_acc": 0.2948051570555422 }, { "epoch": 6.769862210495456, "grad_norm": 0.2008901533765202, "learning_rate": 0.00039460782838270005, "loss": 2.9511473178863525, "step": 11549, "token_acc": 0.305786471911625 }, { "epoch": 6.770448548812665, "grad_norm": 0.1688867903467657, "learning_rate": 0.00039458806236442046, "loss": 2.9981157779693604, "step": 11550, "token_acc": 0.2984890303657787 }, { "epoch": 6.771034887129874, "grad_norm": 0.1668637948419691, "learning_rate": 0.00039456829498793087, "loss": 2.9873576164245605, "step": 11551, "token_acc": 0.3003670837416553 }, { "epoch": 6.771621225447083, "grad_norm": 0.16332442491547605, "learning_rate": 0.00039454852625341687, "loss": 2.972841739654541, "step": 11552, "token_acc": 0.3015342483749296 }, { "epoch": 6.772207563764292, "grad_norm": 0.18464896950364837, "learning_rate": 0.0003945287561610644, "loss": 2.9945921897888184, "step": 11553, "token_acc": 0.29862314616928953 }, { "epoch": 6.772793902081501, "grad_norm": 0.18614791348959603, "learning_rate": 0.0003945089847110589, "loss": 2.9459640979766846, "step": 11554, "token_acc": 0.30536367918446183 }, { "epoch": 6.77338024039871, "grad_norm": 0.21189460505970756, "learning_rate": 0.0003944892119035863, "loss": 3.0124449729919434, "step": 11555, "token_acc": 0.2962415038122924 }, { "epoch": 6.773966578715919, "grad_norm": 0.1598058020935131, "learning_rate": 0.0003944694377388322, "loss": 2.9895286560058594, "step": 11556, "token_acc": 0.29911994993038793 }, { "epoch": 6.7745529170331285, "grad_norm": 0.1549407673556061, "learning_rate": 0.0003944496622169824, "loss": 2.9838523864746094, "step": 11557, "token_acc": 0.2993662864385298 }, { "epoch": 6.775139255350338, "grad_norm": 0.1721596812951165, "learning_rate": 0.00039442988533822267, "loss": 2.998366355895996, "step": 11558, "token_acc": 0.2973919302800879 }, { "epoch": 6.775725593667546, "grad_norm": 0.1928050292269371, "learning_rate": 0.00039441010710273883, "loss": 2.977863311767578, "step": 11559, "token_acc": 0.2993067308602004 }, { "epoch": 6.776311931984755, "grad_norm": 0.1489748180115067, "learning_rate": 0.0003943903275107166, "loss": 2.998818874359131, "step": 11560, "token_acc": 0.2981613847743713 }, { "epoch": 6.776898270301964, "grad_norm": 0.2372633185013389, "learning_rate": 0.00039437054656234183, "loss": 2.9870150089263916, "step": 11561, "token_acc": 0.2994970640796528 }, { "epoch": 6.777484608619173, "grad_norm": 0.15708906238072473, "learning_rate": 0.0003943507642578003, "loss": 2.973066568374634, "step": 11562, "token_acc": 0.30277421195264587 }, { "epoch": 6.778070946936382, "grad_norm": 0.18366629821923414, "learning_rate": 0.0003943309805972779, "loss": 2.95536470413208, "step": 11563, "token_acc": 0.3041023791394488 }, { "epoch": 6.778657285253591, "grad_norm": 0.15467451082334238, "learning_rate": 0.0003943111955809604, "loss": 3.0104329586029053, "step": 11564, "token_acc": 0.29741946024163873 }, { "epoch": 6.7792436235708005, "grad_norm": 0.178612960506566, "learning_rate": 0.0003942914092090337, "loss": 2.9964513778686523, "step": 11565, "token_acc": 0.2990949934781558 }, { "epoch": 6.77982996188801, "grad_norm": 0.15897730936721075, "learning_rate": 0.00039427162148168363, "loss": 2.970902919769287, "step": 11566, "token_acc": 0.3020746623122073 }, { "epoch": 6.780416300205219, "grad_norm": 0.15509038060151067, "learning_rate": 0.000394251832399096, "loss": 2.949418544769287, "step": 11567, "token_acc": 0.3065515133384493 }, { "epoch": 6.781002638522428, "grad_norm": 0.1672390698424791, "learning_rate": 0.0003942320419614569, "loss": 2.953291416168213, "step": 11568, "token_acc": 0.3030630649497517 }, { "epoch": 6.781588976839636, "grad_norm": 0.1735530227168004, "learning_rate": 0.00039421225016895203, "loss": 2.9651939868927, "step": 11569, "token_acc": 0.30290431679306157 }, { "epoch": 6.782175315156845, "grad_norm": 0.16867436415106482, "learning_rate": 0.0003941924570217674, "loss": 2.996748447418213, "step": 11570, "token_acc": 0.29924092866612106 }, { "epoch": 6.782761653474054, "grad_norm": 0.163582482296536, "learning_rate": 0.0003941726625200891, "loss": 2.962548017501831, "step": 11571, "token_acc": 0.30176613237749816 }, { "epoch": 6.783347991791263, "grad_norm": 0.2063742112221261, "learning_rate": 0.0003941528666641027, "loss": 2.9715094566345215, "step": 11572, "token_acc": 0.3028142320568519 }, { "epoch": 6.7839343301084725, "grad_norm": 0.15689777973354346, "learning_rate": 0.00039413306945399455, "loss": 2.9831771850585938, "step": 11573, "token_acc": 0.30011492846235294 }, { "epoch": 6.784520668425682, "grad_norm": 0.2305764150714964, "learning_rate": 0.0003941132708899503, "loss": 2.9912753105163574, "step": 11574, "token_acc": 0.2997486553713519 }, { "epoch": 6.785107006742891, "grad_norm": 0.20693199595276215, "learning_rate": 0.0003940934709721561, "loss": 2.9474563598632812, "step": 11575, "token_acc": 0.3058722328715479 }, { "epoch": 6.7856933450601, "grad_norm": 0.1570801021600605, "learning_rate": 0.00039407366970079796, "loss": 2.9580841064453125, "step": 11576, "token_acc": 0.304716959543128 }, { "epoch": 6.786279683377309, "grad_norm": 0.22222845867201804, "learning_rate": 0.0003940538670760617, "loss": 2.9611153602600098, "step": 11577, "token_acc": 0.3021958109181384 }, { "epoch": 6.786866021694518, "grad_norm": 0.18275101611013528, "learning_rate": 0.0003940340630981336, "loss": 2.9507999420166016, "step": 11578, "token_acc": 0.3050683556841433 }, { "epoch": 6.787452360011727, "grad_norm": 0.17426836863417955, "learning_rate": 0.00039401425776719955, "loss": 2.979895830154419, "step": 11579, "token_acc": 0.3004485879337678 }, { "epoch": 6.788038698328936, "grad_norm": 0.17255843395367024, "learning_rate": 0.0003939944510834456, "loss": 2.972538471221924, "step": 11580, "token_acc": 0.3022905218110198 }, { "epoch": 6.7886250366461445, "grad_norm": 0.15373701630187564, "learning_rate": 0.0003939746430470579, "loss": 2.957298755645752, "step": 11581, "token_acc": 0.3041560981651447 }, { "epoch": 6.789211374963354, "grad_norm": 0.18548568269878304, "learning_rate": 0.0003939548336582223, "loss": 2.9947452545166016, "step": 11582, "token_acc": 0.29820577452457186 }, { "epoch": 6.789797713280563, "grad_norm": 0.1630054545418031, "learning_rate": 0.000393935022917125, "loss": 2.9864251613616943, "step": 11583, "token_acc": 0.2995224129828053 }, { "epoch": 6.790384051597772, "grad_norm": 0.15718918838460197, "learning_rate": 0.0003939152108239522, "loss": 2.9676175117492676, "step": 11584, "token_acc": 0.3029986798920955 }, { "epoch": 6.790970389914981, "grad_norm": 0.15545140765453122, "learning_rate": 0.0003938953973788899, "loss": 2.987039566040039, "step": 11585, "token_acc": 0.30003087144549606 }, { "epoch": 6.79155672823219, "grad_norm": 0.15652426434457511, "learning_rate": 0.0003938755825821243, "loss": 2.987412452697754, "step": 11586, "token_acc": 0.2999010832329342 }, { "epoch": 6.792143066549399, "grad_norm": 0.1827417417541671, "learning_rate": 0.00039385576643384134, "loss": 2.9207797050476074, "step": 11587, "token_acc": 0.31141747614246773 }, { "epoch": 6.792729404866608, "grad_norm": 0.1576406312738965, "learning_rate": 0.0003938359489342274, "loss": 3.006629705429077, "step": 11588, "token_acc": 0.29616861547160034 }, { "epoch": 6.793315743183817, "grad_norm": 0.20273197780092267, "learning_rate": 0.0003938161300834686, "loss": 2.9420042037963867, "step": 11589, "token_acc": 0.30544274445500175 }, { "epoch": 6.7939020815010265, "grad_norm": 0.26601222085945747, "learning_rate": 0.000393796309881751, "loss": 3.0006086826324463, "step": 11590, "token_acc": 0.2963497114242747 }, { "epoch": 6.794488419818235, "grad_norm": 0.15903031760504424, "learning_rate": 0.00039377648832926074, "loss": 2.978531837463379, "step": 11591, "token_acc": 0.30120723740328553 }, { "epoch": 6.795074758135444, "grad_norm": 0.26405906080537384, "learning_rate": 0.0003937566654261842, "loss": 2.9976654052734375, "step": 11592, "token_acc": 0.2997663941264809 }, { "epoch": 6.795661096452653, "grad_norm": 0.20355855519441834, "learning_rate": 0.0003937368411727075, "loss": 2.943694591522217, "step": 11593, "token_acc": 0.3068827620865344 }, { "epoch": 6.796247434769862, "grad_norm": 0.213111676156233, "learning_rate": 0.00039371701556901686, "loss": 2.972809076309204, "step": 11594, "token_acc": 0.3019167726418599 }, { "epoch": 6.796833773087071, "grad_norm": 0.2102821517409978, "learning_rate": 0.0003936971886152985, "loss": 2.9829165935516357, "step": 11595, "token_acc": 0.3000268995302545 }, { "epoch": 6.79742011140428, "grad_norm": 0.17707894001326263, "learning_rate": 0.00039367736031173866, "loss": 3.01369047164917, "step": 11596, "token_acc": 0.29639866170694706 }, { "epoch": 6.798006449721489, "grad_norm": 0.20333774919045303, "learning_rate": 0.00039365753065852374, "loss": 3.006033420562744, "step": 11597, "token_acc": 0.2964986910181219 }, { "epoch": 6.7985927880386985, "grad_norm": 0.19431307756761462, "learning_rate": 0.00039363769965583984, "loss": 2.9322497844696045, "step": 11598, "token_acc": 0.3084968047268326 }, { "epoch": 6.799179126355908, "grad_norm": 0.21546096785925278, "learning_rate": 0.0003936178673038733, "loss": 2.9905667304992676, "step": 11599, "token_acc": 0.29876764104898734 }, { "epoch": 6.799765464673117, "grad_norm": 0.18047640108193683, "learning_rate": 0.0003935980336028104, "loss": 2.9647064208984375, "step": 11600, "token_acc": 0.30452075181647714 }, { "epoch": 6.800351802990326, "grad_norm": 0.1829915390708398, "learning_rate": 0.0003935781985528375, "loss": 2.9450063705444336, "step": 11601, "token_acc": 0.305358867489916 }, { "epoch": 6.800938141307535, "grad_norm": 0.1701278952655037, "learning_rate": 0.0003935583621541409, "loss": 2.9871788024902344, "step": 11602, "token_acc": 0.3011451456130194 }, { "epoch": 6.801524479624743, "grad_norm": 0.18104097654567916, "learning_rate": 0.0003935385244069069, "loss": 2.987971782684326, "step": 11603, "token_acc": 0.3008784509655764 }, { "epoch": 6.802110817941952, "grad_norm": 0.15007697645227325, "learning_rate": 0.00039351868531132194, "loss": 2.936387538909912, "step": 11604, "token_acc": 0.3080193527759501 }, { "epoch": 6.802697156259161, "grad_norm": 0.1746934503512794, "learning_rate": 0.0003934988448675723, "loss": 2.9657599925994873, "step": 11605, "token_acc": 0.30339610451507676 }, { "epoch": 6.8032834945763705, "grad_norm": 0.15267090806808237, "learning_rate": 0.0003934790030758444, "loss": 2.9769229888916016, "step": 11606, "token_acc": 0.30137786989197435 }, { "epoch": 6.80386983289358, "grad_norm": 0.17854870618415986, "learning_rate": 0.0003934591599363245, "loss": 2.994415760040283, "step": 11607, "token_acc": 0.29903814866395073 }, { "epoch": 6.804456171210789, "grad_norm": 0.15644539588439593, "learning_rate": 0.00039343931544919924, "loss": 2.9980106353759766, "step": 11608, "token_acc": 0.29752241329506574 }, { "epoch": 6.805042509527998, "grad_norm": 0.16487058386167547, "learning_rate": 0.00039341946961465483, "loss": 2.978438377380371, "step": 11609, "token_acc": 0.30263254844583853 }, { "epoch": 6.805628847845207, "grad_norm": 0.1691050588007732, "learning_rate": 0.00039339962243287787, "loss": 2.995551347732544, "step": 11610, "token_acc": 0.2995381743646083 }, { "epoch": 6.806215186162416, "grad_norm": 0.164714585870821, "learning_rate": 0.00039337977390405457, "loss": 2.9820971488952637, "step": 11611, "token_acc": 0.30025294797932056 }, { "epoch": 6.806801524479624, "grad_norm": 0.16491873167965881, "learning_rate": 0.00039335992402837153, "loss": 2.9999372959136963, "step": 11612, "token_acc": 0.2979064020267645 }, { "epoch": 6.807387862796833, "grad_norm": 0.1451506557215027, "learning_rate": 0.0003933400728060151, "loss": 3.0204498767852783, "step": 11613, "token_acc": 0.2948417704090865 }, { "epoch": 6.8079742011140425, "grad_norm": 0.16908115927182538, "learning_rate": 0.000393320220237172, "loss": 2.965651035308838, "step": 11614, "token_acc": 0.30411801056090554 }, { "epoch": 6.808560539431252, "grad_norm": 0.1463599063841434, "learning_rate": 0.0003933003663220285, "loss": 2.9881362915039062, "step": 11615, "token_acc": 0.29876793552195896 }, { "epoch": 6.809146877748461, "grad_norm": 0.1787531783987317, "learning_rate": 0.0003932805110607711, "loss": 3.0090205669403076, "step": 11616, "token_acc": 0.2994712960821684 }, { "epoch": 6.80973321606567, "grad_norm": 0.22418310616684273, "learning_rate": 0.0003932606544535864, "loss": 2.993567705154419, "step": 11617, "token_acc": 0.29839883551673946 }, { "epoch": 6.810319554382879, "grad_norm": 0.16362262409831566, "learning_rate": 0.0003932407965006609, "loss": 2.953751802444458, "step": 11618, "token_acc": 0.30359957299936163 }, { "epoch": 6.810905892700088, "grad_norm": 0.16952164689901786, "learning_rate": 0.0003932209372021812, "loss": 2.981529951095581, "step": 11619, "token_acc": 0.2996828066678088 }, { "epoch": 6.811492231017297, "grad_norm": 0.15680471113975752, "learning_rate": 0.00039320107655833376, "loss": 2.973137855529785, "step": 11620, "token_acc": 0.3029456269013186 }, { "epoch": 6.812078569334506, "grad_norm": 0.15571765119605566, "learning_rate": 0.00039318121456930513, "loss": 2.977614402770996, "step": 11621, "token_acc": 0.30084120997656405 }, { "epoch": 6.812664907651715, "grad_norm": 0.1694194613573455, "learning_rate": 0.00039316135123528196, "loss": 3.0071518421173096, "step": 11622, "token_acc": 0.2964382146849824 }, { "epoch": 6.8132512459689245, "grad_norm": 0.15332634871058373, "learning_rate": 0.0003931414865564508, "loss": 2.9971706867218018, "step": 11623, "token_acc": 0.2989519673367488 }, { "epoch": 6.813837584286133, "grad_norm": 0.16815999304841078, "learning_rate": 0.0003931216205329982, "loss": 2.980884313583374, "step": 11624, "token_acc": 0.30035730797837057 }, { "epoch": 6.814423922603342, "grad_norm": 0.1590140537525872, "learning_rate": 0.00039310175316511093, "loss": 2.968369483947754, "step": 11625, "token_acc": 0.30294270907596704 }, { "epoch": 6.815010260920551, "grad_norm": 0.14861616002803776, "learning_rate": 0.0003930818844529754, "loss": 2.9992194175720215, "step": 11626, "token_acc": 0.2971770227505987 }, { "epoch": 6.81559659923776, "grad_norm": 0.1414313774014646, "learning_rate": 0.00039306201439677854, "loss": 2.9639291763305664, "step": 11627, "token_acc": 0.3040394751255251 }, { "epoch": 6.816182937554969, "grad_norm": 0.14794481386540587, "learning_rate": 0.00039304214299670673, "loss": 2.9717040061950684, "step": 11628, "token_acc": 0.30168710478482375 }, { "epoch": 6.816769275872178, "grad_norm": 0.17694370353133437, "learning_rate": 0.00039302227025294674, "loss": 3.0060343742370605, "step": 11629, "token_acc": 0.29703140775293296 }, { "epoch": 6.817355614189387, "grad_norm": 0.16708814615084477, "learning_rate": 0.00039300239616568526, "loss": 3.002920627593994, "step": 11630, "token_acc": 0.29581983447226556 }, { "epoch": 6.8179419525065965, "grad_norm": 0.14645786586725693, "learning_rate": 0.00039298252073510895, "loss": 2.940195322036743, "step": 11631, "token_acc": 0.3084593331605539 }, { "epoch": 6.818528290823806, "grad_norm": 0.22639182838683544, "learning_rate": 0.00039296264396140456, "loss": 3.0081708431243896, "step": 11632, "token_acc": 0.2965828876322743 }, { "epoch": 6.819114629141015, "grad_norm": 0.24251021036787143, "learning_rate": 0.0003929427658447587, "loss": 2.983792304992676, "step": 11633, "token_acc": 0.3000128832775058 }, { "epoch": 6.819700967458223, "grad_norm": 0.152684834729, "learning_rate": 0.0003929228863853582, "loss": 2.983001708984375, "step": 11634, "token_acc": 0.3005014466306374 }, { "epoch": 6.820287305775432, "grad_norm": 0.250334910049205, "learning_rate": 0.00039290300558338987, "loss": 3.0014262199401855, "step": 11635, "token_acc": 0.2994212987341239 }, { "epoch": 6.820873644092641, "grad_norm": 0.24514144788877448, "learning_rate": 0.00039288312343904025, "loss": 2.9712653160095215, "step": 11636, "token_acc": 0.3035899638498157 }, { "epoch": 6.82145998240985, "grad_norm": 0.1553312306408857, "learning_rate": 0.00039286323995249626, "loss": 3.006707191467285, "step": 11637, "token_acc": 0.29694669891794007 }, { "epoch": 6.822046320727059, "grad_norm": 0.22987611284167314, "learning_rate": 0.0003928433551239447, "loss": 2.9817044734954834, "step": 11638, "token_acc": 0.30079572676902433 }, { "epoch": 6.8226326590442685, "grad_norm": 0.16399639045180195, "learning_rate": 0.00039282346895357225, "loss": 2.9868717193603516, "step": 11639, "token_acc": 0.2988611359624731 }, { "epoch": 6.823218997361478, "grad_norm": 0.2151690480190841, "learning_rate": 0.00039280358144156574, "loss": 2.9520158767700195, "step": 11640, "token_acc": 0.305098896307109 }, { "epoch": 6.823805335678687, "grad_norm": 0.1633442410442377, "learning_rate": 0.0003927836925881121, "loss": 2.93984317779541, "step": 11641, "token_acc": 0.3083804896900102 }, { "epoch": 6.824391673995896, "grad_norm": 0.1850081073851398, "learning_rate": 0.00039276380239339806, "loss": 2.9744629859924316, "step": 11642, "token_acc": 0.300420784812604 }, { "epoch": 6.824978012313105, "grad_norm": 0.16988939794489624, "learning_rate": 0.0003927439108576104, "loss": 2.9958372116088867, "step": 11643, "token_acc": 0.29902630026868215 }, { "epoch": 6.825564350630314, "grad_norm": 0.17497167374147898, "learning_rate": 0.000392724017980936, "loss": 3.006831645965576, "step": 11644, "token_acc": 0.2974191605557229 }, { "epoch": 6.826150688947523, "grad_norm": 0.16516715101063453, "learning_rate": 0.0003927041237635619, "loss": 2.9722230434417725, "step": 11645, "token_acc": 0.30148307895619963 }, { "epoch": 6.826737027264731, "grad_norm": 0.1450250620258408, "learning_rate": 0.0003926842282056748, "loss": 2.99434757232666, "step": 11646, "token_acc": 0.29992667531254324 }, { "epoch": 6.8273233655819405, "grad_norm": 0.16686499526491372, "learning_rate": 0.00039266433130746164, "loss": 3.0002710819244385, "step": 11647, "token_acc": 0.3002069166329536 }, { "epoch": 6.82790970389915, "grad_norm": 0.13723372289626537, "learning_rate": 0.00039264443306910937, "loss": 2.98030424118042, "step": 11648, "token_acc": 0.30049203036500327 }, { "epoch": 6.828496042216359, "grad_norm": 0.14456840221184772, "learning_rate": 0.00039262453349080486, "loss": 2.974066734313965, "step": 11649, "token_acc": 0.2996495549119326 }, { "epoch": 6.829082380533568, "grad_norm": 0.14794416966908008, "learning_rate": 0.0003926046325727349, "loss": 3.024742603302002, "step": 11650, "token_acc": 0.2949483393514712 }, { "epoch": 6.829668718850777, "grad_norm": 0.15061430041844995, "learning_rate": 0.0003925847303150868, "loss": 2.97066593170166, "step": 11651, "token_acc": 0.30160277319416195 }, { "epoch": 6.830255057167986, "grad_norm": 0.1487413476506025, "learning_rate": 0.0003925648267180472, "loss": 2.99392032623291, "step": 11652, "token_acc": 0.29900313152400837 }, { "epoch": 6.830841395485195, "grad_norm": 0.15882127138231913, "learning_rate": 0.0003925449217818031, "loss": 3.002487897872925, "step": 11653, "token_acc": 0.29622637025457677 }, { "epoch": 6.831427733802404, "grad_norm": 0.1381305591582993, "learning_rate": 0.00039252501550654165, "loss": 2.9671549797058105, "step": 11654, "token_acc": 0.30347537656435125 }, { "epoch": 6.8320140721196125, "grad_norm": 0.15426221949025817, "learning_rate": 0.00039250510789244966, "loss": 3.001903533935547, "step": 11655, "token_acc": 0.29808751822314816 }, { "epoch": 6.832600410436822, "grad_norm": 0.13949762108855956, "learning_rate": 0.00039248519893971424, "loss": 3.030393600463867, "step": 11656, "token_acc": 0.29422590952758865 }, { "epoch": 6.833186748754031, "grad_norm": 0.1599644371140646, "learning_rate": 0.0003924652886485224, "loss": 2.9425408840179443, "step": 11657, "token_acc": 0.306530968839569 }, { "epoch": 6.83377308707124, "grad_norm": 0.17499084019343752, "learning_rate": 0.0003924453770190611, "loss": 2.990678310394287, "step": 11658, "token_acc": 0.2988651102464332 }, { "epoch": 6.834359425388449, "grad_norm": 0.21834952292230386, "learning_rate": 0.0003924254640515175, "loss": 2.982257843017578, "step": 11659, "token_acc": 0.30182547219584255 }, { "epoch": 6.834945763705658, "grad_norm": 0.16897419958111382, "learning_rate": 0.00039240554974607843, "loss": 2.935482978820801, "step": 11660, "token_acc": 0.3087362083667089 }, { "epoch": 6.835532102022867, "grad_norm": 0.15146092993815297, "learning_rate": 0.0003923856341029313, "loss": 2.973356246948242, "step": 11661, "token_acc": 0.3046509035645056 }, { "epoch": 6.836118440340076, "grad_norm": 0.17124725104781033, "learning_rate": 0.0003923657171222629, "loss": 2.9884839057922363, "step": 11662, "token_acc": 0.3007037332411994 }, { "epoch": 6.836704778657285, "grad_norm": 0.17892336475888285, "learning_rate": 0.0003923457988042604, "loss": 2.996511936187744, "step": 11663, "token_acc": 0.2994192392168488 }, { "epoch": 6.8372911169744945, "grad_norm": 0.1712077393409036, "learning_rate": 0.000392325879149111, "loss": 3.0041728019714355, "step": 11664, "token_acc": 0.29817567725802546 }, { "epoch": 6.837877455291704, "grad_norm": 0.14618176368120178, "learning_rate": 0.00039230595815700173, "loss": 2.9733285903930664, "step": 11665, "token_acc": 0.3004451000447156 }, { "epoch": 6.838463793608913, "grad_norm": 0.15788124891897257, "learning_rate": 0.0003922860358281197, "loss": 2.9759531021118164, "step": 11666, "token_acc": 0.3019369441863121 }, { "epoch": 6.839050131926121, "grad_norm": 0.17261117122412808, "learning_rate": 0.00039226611216265216, "loss": 2.9577317237854004, "step": 11667, "token_acc": 0.3033464229949215 }, { "epoch": 6.83963647024333, "grad_norm": 0.14839595457497345, "learning_rate": 0.0003922461871607862, "loss": 2.968186855316162, "step": 11668, "token_acc": 0.30248703660606185 }, { "epoch": 6.840222808560539, "grad_norm": 0.15007170248950966, "learning_rate": 0.00039222626082270905, "loss": 2.9764349460601807, "step": 11669, "token_acc": 0.3015973536188233 }, { "epoch": 6.840809146877748, "grad_norm": 0.2069049867203262, "learning_rate": 0.00039220633314860764, "loss": 2.960733413696289, "step": 11670, "token_acc": 0.3045761361730672 }, { "epoch": 6.841395485194957, "grad_norm": 0.2838425705322793, "learning_rate": 0.00039218640413866957, "loss": 3.0168042182922363, "step": 11671, "token_acc": 0.2967158545854848 }, { "epoch": 6.8419818235121665, "grad_norm": 0.26457691160377805, "learning_rate": 0.0003921664737930817, "loss": 2.957150459289551, "step": 11672, "token_acc": 0.3048661573332089 }, { "epoch": 6.842568161829376, "grad_norm": 0.17786987374541322, "learning_rate": 0.00039214654211203144, "loss": 2.965214729309082, "step": 11673, "token_acc": 0.3026891539556384 }, { "epoch": 6.843154500146585, "grad_norm": 0.17696503641613867, "learning_rate": 0.00039212660909570597, "loss": 2.989487648010254, "step": 11674, "token_acc": 0.3021244275786088 }, { "epoch": 6.843740838463794, "grad_norm": 0.1757289689977439, "learning_rate": 0.0003921066747442924, "loss": 2.9921646118164062, "step": 11675, "token_acc": 0.2973377695053598 }, { "epoch": 6.844327176781003, "grad_norm": 0.20370940470296375, "learning_rate": 0.00039208673905797827, "loss": 2.964958429336548, "step": 11676, "token_acc": 0.30315098501831494 }, { "epoch": 6.844913515098211, "grad_norm": 0.15624900788059104, "learning_rate": 0.00039206680203695063, "loss": 2.973414421081543, "step": 11677, "token_acc": 0.3020743485714894 }, { "epoch": 6.84549985341542, "grad_norm": 0.16537313144614482, "learning_rate": 0.0003920468636813968, "loss": 2.9612526893615723, "step": 11678, "token_acc": 0.3041126514460447 }, { "epoch": 6.8460861917326294, "grad_norm": 0.1733044801398502, "learning_rate": 0.0003920269239915041, "loss": 2.96962571144104, "step": 11679, "token_acc": 0.3017782537696435 }, { "epoch": 6.846672530049839, "grad_norm": 0.15696062819547466, "learning_rate": 0.00039200698296745984, "loss": 2.944326639175415, "step": 11680, "token_acc": 0.30493475719400887 }, { "epoch": 6.847258868367048, "grad_norm": 0.15421707654434716, "learning_rate": 0.00039198704060945135, "loss": 2.975033760070801, "step": 11681, "token_acc": 0.3012039746482457 }, { "epoch": 6.847845206684257, "grad_norm": 0.17021314161472845, "learning_rate": 0.00039196709691766596, "loss": 2.9589273929595947, "step": 11682, "token_acc": 0.30565837756240916 }, { "epoch": 6.848431545001466, "grad_norm": 0.1585261835976282, "learning_rate": 0.000391947151892291, "loss": 2.952051877975464, "step": 11683, "token_acc": 0.3061527423220701 }, { "epoch": 6.849017883318675, "grad_norm": 0.17643368243495614, "learning_rate": 0.00039192720553351374, "loss": 3.0112178325653076, "step": 11684, "token_acc": 0.29810780189870945 }, { "epoch": 6.849604221635884, "grad_norm": 0.15575972972113078, "learning_rate": 0.0003919072578415217, "loss": 2.9291515350341797, "step": 11685, "token_acc": 0.3082979751866373 }, { "epoch": 6.850190559953093, "grad_norm": 0.16026352827047924, "learning_rate": 0.0003918873088165022, "loss": 2.9707016944885254, "step": 11686, "token_acc": 0.3014722548342981 }, { "epoch": 6.850776898270302, "grad_norm": 0.1539503210581957, "learning_rate": 0.0003918673584586426, "loss": 2.970628261566162, "step": 11687, "token_acc": 0.3030747518915821 }, { "epoch": 6.8513632365875115, "grad_norm": 0.14926249781289877, "learning_rate": 0.00039184740676813037, "loss": 2.937852621078491, "step": 11688, "token_acc": 0.3066189800421633 }, { "epoch": 6.85194957490472, "grad_norm": 0.15692241646490154, "learning_rate": 0.0003918274537451528, "loss": 2.964616060256958, "step": 11689, "token_acc": 0.30331537471638964 }, { "epoch": 6.852535913221929, "grad_norm": 0.1841719246833981, "learning_rate": 0.0003918074993898976, "loss": 2.94278621673584, "step": 11690, "token_acc": 0.3058318514153068 }, { "epoch": 6.853122251539138, "grad_norm": 0.26153122748683105, "learning_rate": 0.0003917875437025519, "loss": 2.9410276412963867, "step": 11691, "token_acc": 0.30496920377170583 }, { "epoch": 6.853708589856347, "grad_norm": 0.32784484200899217, "learning_rate": 0.00039176758668330323, "loss": 2.9647693634033203, "step": 11692, "token_acc": 0.30202165432442324 }, { "epoch": 6.854294928173556, "grad_norm": 0.2370146148456396, "learning_rate": 0.00039174762833233924, "loss": 2.9802162647247314, "step": 11693, "token_acc": 0.29994179440972163 }, { "epoch": 6.854881266490765, "grad_norm": 0.1600174224162427, "learning_rate": 0.0003917276686498472, "loss": 3.005119800567627, "step": 11694, "token_acc": 0.2977689218187392 }, { "epoch": 6.855467604807974, "grad_norm": 0.1579581882845999, "learning_rate": 0.00039170770763601476, "loss": 2.9815316200256348, "step": 11695, "token_acc": 0.29908986725731934 }, { "epoch": 6.8560539431251835, "grad_norm": 0.1461303305820812, "learning_rate": 0.00039168774529102935, "loss": 2.9841880798339844, "step": 11696, "token_acc": 0.30087171207816577 }, { "epoch": 6.856640281442393, "grad_norm": 0.14577401238915538, "learning_rate": 0.00039166778161507854, "loss": 3.0143861770629883, "step": 11697, "token_acc": 0.29623175833625437 }, { "epoch": 6.857226619759601, "grad_norm": 0.14808830314611282, "learning_rate": 0.00039164781660834975, "loss": 2.980742931365967, "step": 11698, "token_acc": 0.30044139809211984 }, { "epoch": 6.85781295807681, "grad_norm": 0.15336615967868994, "learning_rate": 0.0003916278502710306, "loss": 2.9505152702331543, "step": 11699, "token_acc": 0.30482012934519 }, { "epoch": 6.858399296394019, "grad_norm": 0.1545638792458985, "learning_rate": 0.00039160788260330867, "loss": 3.0311241149902344, "step": 11700, "token_acc": 0.2933646376241518 }, { "epoch": 6.858985634711228, "grad_norm": 0.1713852240745086, "learning_rate": 0.00039158791360537147, "loss": 2.9566478729248047, "step": 11701, "token_acc": 0.3045838752472822 }, { "epoch": 6.859571973028437, "grad_norm": 0.1490428308836565, "learning_rate": 0.00039156794327740665, "loss": 2.9988174438476562, "step": 11702, "token_acc": 0.2989718750412438 }, { "epoch": 6.860158311345646, "grad_norm": 0.16564574294842133, "learning_rate": 0.0003915479716196018, "loss": 3.0127007961273193, "step": 11703, "token_acc": 0.29778584260846225 }, { "epoch": 6.8607446496628555, "grad_norm": 0.152678601227875, "learning_rate": 0.0003915279986321444, "loss": 2.9870998859405518, "step": 11704, "token_acc": 0.2992416872264639 }, { "epoch": 6.861330987980065, "grad_norm": 0.14624487176497464, "learning_rate": 0.00039150802431522225, "loss": 2.994795799255371, "step": 11705, "token_acc": 0.2987401496862747 }, { "epoch": 6.861917326297274, "grad_norm": 0.16399337462002495, "learning_rate": 0.00039148804866902286, "loss": 2.948556423187256, "step": 11706, "token_acc": 0.3070787953893695 }, { "epoch": 6.862503664614483, "grad_norm": 0.1654014639942079, "learning_rate": 0.00039146807169373386, "loss": 2.9480366706848145, "step": 11707, "token_acc": 0.3071945084196669 }, { "epoch": 6.863090002931692, "grad_norm": 0.15436898928029913, "learning_rate": 0.00039144809338954303, "loss": 2.992832660675049, "step": 11708, "token_acc": 0.2993295505100567 }, { "epoch": 6.863676341248901, "grad_norm": 0.18050228149850386, "learning_rate": 0.0003914281137566379, "loss": 2.9588282108306885, "step": 11709, "token_acc": 0.303640057029663 }, { "epoch": 6.86426267956611, "grad_norm": 0.20579533794262228, "learning_rate": 0.0003914081327952063, "loss": 2.9685373306274414, "step": 11710, "token_acc": 0.3024497725789396 }, { "epoch": 6.864849017883318, "grad_norm": 0.16300578609003785, "learning_rate": 0.0003913881505054358, "loss": 3.00211763381958, "step": 11711, "token_acc": 0.2973758566239915 }, { "epoch": 6.8654353562005275, "grad_norm": 0.1808041439587513, "learning_rate": 0.0003913681668875142, "loss": 2.9760894775390625, "step": 11712, "token_acc": 0.3020449979377054 }, { "epoch": 6.866021694517737, "grad_norm": 0.25385692960627604, "learning_rate": 0.0003913481819416291, "loss": 2.9609689712524414, "step": 11713, "token_acc": 0.30338395692021475 }, { "epoch": 6.866608032834946, "grad_norm": 0.21470357794171666, "learning_rate": 0.0003913281956679683, "loss": 2.9859812259674072, "step": 11714, "token_acc": 0.30034866429510987 }, { "epoch": 6.867194371152155, "grad_norm": 0.17908612457263334, "learning_rate": 0.00039130820806671955, "loss": 2.979170799255371, "step": 11715, "token_acc": 0.29979498259019866 }, { "epoch": 6.867780709469364, "grad_norm": 0.25915820696029424, "learning_rate": 0.00039128821913807064, "loss": 2.9745588302612305, "step": 11716, "token_acc": 0.30318829531154545 }, { "epoch": 6.868367047786573, "grad_norm": 0.16407772962468534, "learning_rate": 0.00039126822888220926, "loss": 2.9711427688598633, "step": 11717, "token_acc": 0.30314034634272424 }, { "epoch": 6.868953386103782, "grad_norm": 0.18613934189704082, "learning_rate": 0.0003912482372993232, "loss": 2.966268539428711, "step": 11718, "token_acc": 0.30313986776808716 }, { "epoch": 6.869539724420991, "grad_norm": 0.18661103907496468, "learning_rate": 0.0003912282443896004, "loss": 3.0209670066833496, "step": 11719, "token_acc": 0.29706640754489716 }, { "epoch": 6.8701260627381995, "grad_norm": 0.15228308761010492, "learning_rate": 0.00039120825015322847, "loss": 2.9917116165161133, "step": 11720, "token_acc": 0.29894704024780255 }, { "epoch": 6.870712401055409, "grad_norm": 0.17673202343163127, "learning_rate": 0.0003911882545903953, "loss": 3.027149200439453, "step": 11721, "token_acc": 0.2935131112596647 }, { "epoch": 6.871298739372618, "grad_norm": 0.15882406000929414, "learning_rate": 0.0003911682577012887, "loss": 2.962437868118286, "step": 11722, "token_acc": 0.30275033540675694 }, { "epoch": 6.871885077689827, "grad_norm": 0.1506358594734898, "learning_rate": 0.0003911482594860967, "loss": 2.9518747329711914, "step": 11723, "token_acc": 0.3045365013720768 }, { "epoch": 6.872471416007036, "grad_norm": 0.14470991729019358, "learning_rate": 0.00039112825994500684, "loss": 2.9915945529937744, "step": 11724, "token_acc": 0.2985555351647796 }, { "epoch": 6.873057754324245, "grad_norm": 0.1589336414582065, "learning_rate": 0.0003911082590782072, "loss": 2.9508283138275146, "step": 11725, "token_acc": 0.30538875131538307 }, { "epoch": 6.873644092641454, "grad_norm": 0.1595454779902532, "learning_rate": 0.00039108825688588566, "loss": 2.9584591388702393, "step": 11726, "token_acc": 0.30420618315827486 }, { "epoch": 6.874230430958663, "grad_norm": 0.1570664415365281, "learning_rate": 0.00039106825336823005, "loss": 2.955723762512207, "step": 11727, "token_acc": 0.30404311964919606 }, { "epoch": 6.874816769275872, "grad_norm": 0.1577236032236176, "learning_rate": 0.00039104824852542817, "loss": 2.9925882816314697, "step": 11728, "token_acc": 0.3006066179436297 }, { "epoch": 6.8754031075930815, "grad_norm": 0.16216711564026365, "learning_rate": 0.0003910282423576682, "loss": 2.961024761199951, "step": 11729, "token_acc": 0.3038065750417002 }, { "epoch": 6.875989445910291, "grad_norm": 0.1883112035361387, "learning_rate": 0.0003910082348651379, "loss": 2.9814677238464355, "step": 11730, "token_acc": 0.29976942164726783 }, { "epoch": 6.8765757842275, "grad_norm": 0.1784186145661516, "learning_rate": 0.00039098822604802523, "loss": 2.948641777038574, "step": 11731, "token_acc": 0.30531565015107354 }, { "epoch": 6.877162122544708, "grad_norm": 0.15330187208941493, "learning_rate": 0.0003909682159065182, "loss": 2.9843952655792236, "step": 11732, "token_acc": 0.300951756652842 }, { "epoch": 6.877748460861917, "grad_norm": 0.23109139219187153, "learning_rate": 0.0003909482044408047, "loss": 2.993312358856201, "step": 11733, "token_acc": 0.2993722005925004 }, { "epoch": 6.878334799179126, "grad_norm": 0.16389196650480709, "learning_rate": 0.00039092819165107275, "loss": 2.974762439727783, "step": 11734, "token_acc": 0.3006497184907808 }, { "epoch": 6.878921137496335, "grad_norm": 0.19532395711457398, "learning_rate": 0.0003909081775375103, "loss": 2.982950210571289, "step": 11735, "token_acc": 0.29940507085160156 }, { "epoch": 6.879507475813544, "grad_norm": 0.24599810812646922, "learning_rate": 0.0003908881621003055, "loss": 2.9341750144958496, "step": 11736, "token_acc": 0.3088608332318316 }, { "epoch": 6.8800938141307535, "grad_norm": 0.15580670393558282, "learning_rate": 0.0003908681453396462, "loss": 2.971388339996338, "step": 11737, "token_acc": 0.30290704598081375 }, { "epoch": 6.880680152447963, "grad_norm": 0.2707148270950727, "learning_rate": 0.0003908481272557205, "loss": 3.0030364990234375, "step": 11738, "token_acc": 0.2972993471983931 }, { "epoch": 6.881266490765172, "grad_norm": 0.23571741169451438, "learning_rate": 0.0003908281078487164, "loss": 2.9933886528015137, "step": 11739, "token_acc": 0.29898876462112006 }, { "epoch": 6.881852829082381, "grad_norm": 0.1659700802409054, "learning_rate": 0.0003908080871188221, "loss": 2.9729669094085693, "step": 11740, "token_acc": 0.3010195657557996 }, { "epoch": 6.88243916739959, "grad_norm": 0.20912230547905833, "learning_rate": 0.00039078806506622545, "loss": 2.9782423973083496, "step": 11741, "token_acc": 0.29809322812488515 }, { "epoch": 6.883025505716798, "grad_norm": 0.1705220638385243, "learning_rate": 0.00039076804169111475, "loss": 2.9534168243408203, "step": 11742, "token_acc": 0.30499774749408715 }, { "epoch": 6.883611844034007, "grad_norm": 0.2057129839450563, "learning_rate": 0.0003907480169936779, "loss": 3.02030086517334, "step": 11743, "token_acc": 0.29664586351948236 }, { "epoch": 6.884198182351216, "grad_norm": 0.15143426095090767, "learning_rate": 0.0003907279909741031, "loss": 3.0042381286621094, "step": 11744, "token_acc": 0.29747828406173027 }, { "epoch": 6.8847845206684255, "grad_norm": 0.1898938179486049, "learning_rate": 0.00039070796363257853, "loss": 2.948796272277832, "step": 11745, "token_acc": 0.30429735844802097 }, { "epoch": 6.885370858985635, "grad_norm": 0.14881218518566464, "learning_rate": 0.00039068793496929217, "loss": 2.9900426864624023, "step": 11746, "token_acc": 0.29959716947273124 }, { "epoch": 6.885957197302844, "grad_norm": 0.17715126974194192, "learning_rate": 0.00039066790498443226, "loss": 2.9903740882873535, "step": 11747, "token_acc": 0.30070205781003173 }, { "epoch": 6.886543535620053, "grad_norm": 0.16798929924065284, "learning_rate": 0.0003906478736781869, "loss": 3.016535520553589, "step": 11748, "token_acc": 0.2945578007054092 }, { "epoch": 6.887129873937262, "grad_norm": 0.181732147387609, "learning_rate": 0.0003906278410507444, "loss": 2.9581291675567627, "step": 11749, "token_acc": 0.30472753655360607 }, { "epoch": 6.887716212254471, "grad_norm": 0.17044056474085387, "learning_rate": 0.0003906078071022928, "loss": 2.9962620735168457, "step": 11750, "token_acc": 0.29831138846233435 }, { "epoch": 6.88830255057168, "grad_norm": 0.1769806650356267, "learning_rate": 0.0003905877718330203, "loss": 2.995988130569458, "step": 11751, "token_acc": 0.29943910152132325 }, { "epoch": 6.888888888888889, "grad_norm": 0.14368481623889762, "learning_rate": 0.00039056773524311506, "loss": 2.954618215560913, "step": 11752, "token_acc": 0.30515657383252737 }, { "epoch": 6.889475227206098, "grad_norm": 0.15627866286193465, "learning_rate": 0.0003905476973327654, "loss": 2.939596176147461, "step": 11753, "token_acc": 0.3071210287808582 }, { "epoch": 6.890061565523307, "grad_norm": 0.13886588960171475, "learning_rate": 0.00039052765810215957, "loss": 2.9758479595184326, "step": 11754, "token_acc": 0.30085066914005565 }, { "epoch": 6.890647903840516, "grad_norm": 0.20349497196066435, "learning_rate": 0.00039050761755148576, "loss": 3.018279552459717, "step": 11755, "token_acc": 0.2938936271498363 }, { "epoch": 6.891234242157725, "grad_norm": 0.15249341274041836, "learning_rate": 0.0003904875756809322, "loss": 2.9879746437072754, "step": 11756, "token_acc": 0.3018520099456829 }, { "epoch": 6.891820580474934, "grad_norm": 0.1701403492778671, "learning_rate": 0.0003904675324906871, "loss": 3.0039114952087402, "step": 11757, "token_acc": 0.29902844208546564 }, { "epoch": 6.892406918792143, "grad_norm": 0.1806604825690394, "learning_rate": 0.0003904474879809389, "loss": 3.0207743644714355, "step": 11758, "token_acc": 0.2943410864822365 }, { "epoch": 6.892993257109352, "grad_norm": 0.1562616265794891, "learning_rate": 0.00039042744215187576, "loss": 2.995577335357666, "step": 11759, "token_acc": 0.299441886239791 }, { "epoch": 6.893579595426561, "grad_norm": 0.17478319840533038, "learning_rate": 0.00039040739500368607, "loss": 2.981396436691284, "step": 11760, "token_acc": 0.2984123638737602 }, { "epoch": 6.89416593374377, "grad_norm": 0.154868206747603, "learning_rate": 0.00039038734653655804, "loss": 2.9527242183685303, "step": 11761, "token_acc": 0.3042757573408121 }, { "epoch": 6.8947522720609795, "grad_norm": 0.21350116358602947, "learning_rate": 0.00039036729675068015, "loss": 2.959106922149658, "step": 11762, "token_acc": 0.30456083760774155 }, { "epoch": 6.895338610378188, "grad_norm": 0.24649431183095927, "learning_rate": 0.0003903472456462406, "loss": 2.998145341873169, "step": 11763, "token_acc": 0.29782207197476995 }, { "epoch": 6.895924948695397, "grad_norm": 0.16037500933215432, "learning_rate": 0.0003903271932234278, "loss": 2.943204879760742, "step": 11764, "token_acc": 0.307348533360598 }, { "epoch": 6.896511287012606, "grad_norm": 0.20840701415940696, "learning_rate": 0.0003903071394824301, "loss": 3.0207266807556152, "step": 11765, "token_acc": 0.29615161998800105 }, { "epoch": 6.897097625329815, "grad_norm": 0.18977743695736873, "learning_rate": 0.000390287084423436, "loss": 2.9632294178009033, "step": 11766, "token_acc": 0.30349585062240664 }, { "epoch": 6.897683963647024, "grad_norm": 0.18312595222550798, "learning_rate": 0.0003902670280466336, "loss": 2.9983348846435547, "step": 11767, "token_acc": 0.29928454625170176 }, { "epoch": 6.898270301964233, "grad_norm": 0.22138317524657736, "learning_rate": 0.0003902469703522116, "loss": 2.9409523010253906, "step": 11768, "token_acc": 0.30584507629450514 }, { "epoch": 6.898856640281442, "grad_norm": 0.1432590545847616, "learning_rate": 0.0003902269113403583, "loss": 2.936412811279297, "step": 11769, "token_acc": 0.3075153213830032 }, { "epoch": 6.8994429785986515, "grad_norm": 0.1936241012505152, "learning_rate": 0.0003902068510112621, "loss": 2.9736828804016113, "step": 11770, "token_acc": 0.30176787207768885 }, { "epoch": 6.900029316915861, "grad_norm": 0.14958651155860428, "learning_rate": 0.00039018678936511146, "loss": 2.9696903228759766, "step": 11771, "token_acc": 0.30097810074593845 }, { "epoch": 6.90061565523307, "grad_norm": 0.1902903252190865, "learning_rate": 0.00039016672640209484, "loss": 2.9661245346069336, "step": 11772, "token_acc": 0.3043516857658335 }, { "epoch": 6.901201993550279, "grad_norm": 0.14254590129059455, "learning_rate": 0.0003901466621224007, "loss": 2.979823112487793, "step": 11773, "token_acc": 0.3006787711556487 }, { "epoch": 6.901788331867488, "grad_norm": 0.1818002735556006, "learning_rate": 0.00039012659652621755, "loss": 2.939370632171631, "step": 11774, "token_acc": 0.307278811249287 }, { "epoch": 6.902374670184696, "grad_norm": 0.1522364342946901, "learning_rate": 0.0003901065296137338, "loss": 2.963469982147217, "step": 11775, "token_acc": 0.30309135456412134 }, { "epoch": 6.902961008501905, "grad_norm": 0.1816070720398656, "learning_rate": 0.00039008646138513804, "loss": 2.9807066917419434, "step": 11776, "token_acc": 0.3013243040273391 }, { "epoch": 6.903547346819114, "grad_norm": 0.2151382651975947, "learning_rate": 0.00039006639184061876, "loss": 2.9377634525299072, "step": 11777, "token_acc": 0.306210438010016 }, { "epoch": 6.9041336851363235, "grad_norm": 0.14332249560420796, "learning_rate": 0.0003900463209803644, "loss": 2.9598278999328613, "step": 11778, "token_acc": 0.30331980000950326 }, { "epoch": 6.904720023453533, "grad_norm": 0.21283316950963813, "learning_rate": 0.00039002624880456367, "loss": 2.972876787185669, "step": 11779, "token_acc": 0.3020908771131205 }, { "epoch": 6.905306361770742, "grad_norm": 0.19044640736516466, "learning_rate": 0.000390006175313405, "loss": 3.000403881072998, "step": 11780, "token_acc": 0.29721695629278566 }, { "epoch": 6.905892700087951, "grad_norm": 0.1547311689420805, "learning_rate": 0.0003899861005070769, "loss": 2.9923577308654785, "step": 11781, "token_acc": 0.3005274009883071 }, { "epoch": 6.90647903840516, "grad_norm": 0.20741202967063185, "learning_rate": 0.000389966024385768, "loss": 2.973814010620117, "step": 11782, "token_acc": 0.30181658151440816 }, { "epoch": 6.907065376722369, "grad_norm": 0.1643510913003733, "learning_rate": 0.000389945946949667, "loss": 2.9644293785095215, "step": 11783, "token_acc": 0.3035869576887152 }, { "epoch": 6.907651715039578, "grad_norm": 0.1657535197609546, "learning_rate": 0.0003899258681989624, "loss": 3.026782274246216, "step": 11784, "token_acc": 0.2943737218745928 }, { "epoch": 6.908238053356786, "grad_norm": 0.17360099787285244, "learning_rate": 0.0003899057881338428, "loss": 2.961697578430176, "step": 11785, "token_acc": 0.3034607208703509 }, { "epoch": 6.9088243916739955, "grad_norm": 0.16874729074076256, "learning_rate": 0.0003898857067544968, "loss": 2.9831490516662598, "step": 11786, "token_acc": 0.29998105681901893 }, { "epoch": 6.909410729991205, "grad_norm": 0.15540030912941716, "learning_rate": 0.00038986562406111315, "loss": 2.978808879852295, "step": 11787, "token_acc": 0.30035871607596704 }, { "epoch": 6.909997068308414, "grad_norm": 0.1415674456189763, "learning_rate": 0.00038984554005388035, "loss": 2.9639575481414795, "step": 11788, "token_acc": 0.3019653784382687 }, { "epoch": 6.910583406625623, "grad_norm": 0.16501963139354275, "learning_rate": 0.00038982545473298727, "loss": 3.049868583679199, "step": 11789, "token_acc": 0.29249697948157877 }, { "epoch": 6.911169744942832, "grad_norm": 0.13963501611247967, "learning_rate": 0.00038980536809862234, "loss": 2.9788479804992676, "step": 11790, "token_acc": 0.30249433454571495 }, { "epoch": 6.911756083260041, "grad_norm": 0.1668836305008115, "learning_rate": 0.00038978528015097444, "loss": 2.970059394836426, "step": 11791, "token_acc": 0.303032473924545 }, { "epoch": 6.91234242157725, "grad_norm": 0.18172092299289974, "learning_rate": 0.0003897651908902321, "loss": 3.0090959072113037, "step": 11792, "token_acc": 0.2979577800116396 }, { "epoch": 6.912928759894459, "grad_norm": 0.15297418467609925, "learning_rate": 0.00038974510031658424, "loss": 3.0174202919006348, "step": 11793, "token_acc": 0.29566676203604964 }, { "epoch": 6.913515098211668, "grad_norm": 0.17239415431112426, "learning_rate": 0.0003897250084302194, "loss": 2.974062919616699, "step": 11794, "token_acc": 0.30140016057356467 }, { "epoch": 6.9141014365288775, "grad_norm": 0.1621281319705989, "learning_rate": 0.00038970491523132643, "loss": 2.971731185913086, "step": 11795, "token_acc": 0.3022499623287943 }, { "epoch": 6.914687774846087, "grad_norm": 0.1584799185553344, "learning_rate": 0.00038968482072009405, "loss": 2.9675002098083496, "step": 11796, "token_acc": 0.301887034363565 }, { "epoch": 6.915274113163295, "grad_norm": 0.19386930251655318, "learning_rate": 0.000389664724896711, "loss": 2.9651296138763428, "step": 11797, "token_acc": 0.30283500027848725 }, { "epoch": 6.915860451480504, "grad_norm": 0.15417297267448893, "learning_rate": 0.000389644627761366, "loss": 2.9632906913757324, "step": 11798, "token_acc": 0.30375573083023705 }, { "epoch": 6.916446789797713, "grad_norm": 0.18563914007626597, "learning_rate": 0.00038962452931424796, "loss": 2.9614453315734863, "step": 11799, "token_acc": 0.3035214877415894 }, { "epoch": 6.917033128114922, "grad_norm": 0.18203730709462101, "learning_rate": 0.0003896044295555456, "loss": 2.955831527709961, "step": 11800, "token_acc": 0.30259708111187117 }, { "epoch": 6.917619466432131, "grad_norm": 0.16560121129589625, "learning_rate": 0.0003895843284854477, "loss": 2.9896678924560547, "step": 11801, "token_acc": 0.29955052572437596 }, { "epoch": 6.91820580474934, "grad_norm": 0.17349842530727605, "learning_rate": 0.0003895642261041432, "loss": 2.972221851348877, "step": 11802, "token_acc": 0.3017772215269086 }, { "epoch": 6.9187921430665495, "grad_norm": 0.21009843498659433, "learning_rate": 0.0003895441224118208, "loss": 2.9719138145446777, "step": 11803, "token_acc": 0.30187244631501337 }, { "epoch": 6.919378481383759, "grad_norm": 0.16073780333660168, "learning_rate": 0.00038952401740866943, "loss": 2.9791951179504395, "step": 11804, "token_acc": 0.30076098781263305 }, { "epoch": 6.919964819700968, "grad_norm": 0.1637529366041675, "learning_rate": 0.0003895039110948779, "loss": 2.976726531982422, "step": 11805, "token_acc": 0.30365360758733845 }, { "epoch": 6.920551158018176, "grad_norm": 0.17385442172446414, "learning_rate": 0.00038948380347063517, "loss": 2.9564895629882812, "step": 11806, "token_acc": 0.30524144417605437 }, { "epoch": 6.921137496335385, "grad_norm": 0.16634107477862997, "learning_rate": 0.00038946369453613, "loss": 2.969212055206299, "step": 11807, "token_acc": 0.30362493681374514 }, { "epoch": 6.921723834652594, "grad_norm": 0.18734790859647837, "learning_rate": 0.00038944358429155134, "loss": 2.984787702560425, "step": 11808, "token_acc": 0.29869633099141296 }, { "epoch": 6.922310172969803, "grad_norm": 0.22720318634854345, "learning_rate": 0.0003894234727370882, "loss": 3.011652708053589, "step": 11809, "token_acc": 0.2965390516472808 }, { "epoch": 6.9228965112870124, "grad_norm": 0.32767597266372034, "learning_rate": 0.0003894033598729294, "loss": 2.993985652923584, "step": 11810, "token_acc": 0.2996117297928881 }, { "epoch": 6.923482849604222, "grad_norm": 0.3061113766748395, "learning_rate": 0.00038938324569926376, "loss": 2.9869818687438965, "step": 11811, "token_acc": 0.29926455408753094 }, { "epoch": 6.924069187921431, "grad_norm": 0.19548215468170163, "learning_rate": 0.00038936313021628046, "loss": 2.971559524536133, "step": 11812, "token_acc": 0.302699767382967 }, { "epoch": 6.92465552623864, "grad_norm": 0.18066848859739787, "learning_rate": 0.00038934301342416835, "loss": 2.9372119903564453, "step": 11813, "token_acc": 0.30729884274329894 }, { "epoch": 6.925241864555849, "grad_norm": 0.21374463500694874, "learning_rate": 0.00038932289532311637, "loss": 2.9711790084838867, "step": 11814, "token_acc": 0.30122626230266786 }, { "epoch": 6.925828202873058, "grad_norm": 0.15431696311838963, "learning_rate": 0.0003893027759133135, "loss": 3.0014514923095703, "step": 11815, "token_acc": 0.298686161757332 }, { "epoch": 6.926414541190267, "grad_norm": 0.18420493112845854, "learning_rate": 0.00038928265519494877, "loss": 3.0034584999084473, "step": 11816, "token_acc": 0.2974038116567552 }, { "epoch": 6.927000879507476, "grad_norm": 0.1498923812111919, "learning_rate": 0.00038926253316821116, "loss": 2.9295225143432617, "step": 11817, "token_acc": 0.3073597601366663 }, { "epoch": 6.927587217824685, "grad_norm": 0.16146598003577137, "learning_rate": 0.00038924240983328974, "loss": 2.9788880348205566, "step": 11818, "token_acc": 0.30195872473359386 }, { "epoch": 6.928173556141894, "grad_norm": 0.1505482905149616, "learning_rate": 0.0003892222851903735, "loss": 2.976224422454834, "step": 11819, "token_acc": 0.30206864811822054 }, { "epoch": 6.928759894459103, "grad_norm": 0.17457911643333568, "learning_rate": 0.0003892021592396515, "loss": 2.926795482635498, "step": 11820, "token_acc": 0.3086684863459374 }, { "epoch": 6.929346232776312, "grad_norm": 0.16876744131809343, "learning_rate": 0.00038918203198131285, "loss": 2.980958938598633, "step": 11821, "token_acc": 0.3005658529214147 }, { "epoch": 6.929932571093521, "grad_norm": 0.15581391023882385, "learning_rate": 0.0003891619034155465, "loss": 2.998814105987549, "step": 11822, "token_acc": 0.29680609942824426 }, { "epoch": 6.93051890941073, "grad_norm": 0.14882227932705785, "learning_rate": 0.00038914177354254156, "loss": 2.9813952445983887, "step": 11823, "token_acc": 0.30254263713381213 }, { "epoch": 6.931105247727939, "grad_norm": 0.16102643415224085, "learning_rate": 0.00038912164236248723, "loss": 2.925581932067871, "step": 11824, "token_acc": 0.309238916148252 }, { "epoch": 6.931691586045148, "grad_norm": 0.15815514611320536, "learning_rate": 0.00038910150987557247, "loss": 3.010441303253174, "step": 11825, "token_acc": 0.2966284230032959 }, { "epoch": 6.932277924362357, "grad_norm": 0.1804558746195359, "learning_rate": 0.00038908137608198646, "loss": 2.988403797149658, "step": 11826, "token_acc": 0.30030603423128127 }, { "epoch": 6.9328642626795665, "grad_norm": 0.19672337922227914, "learning_rate": 0.0003890612409819184, "loss": 2.942270278930664, "step": 11827, "token_acc": 0.3059263636651436 }, { "epoch": 6.933450600996775, "grad_norm": 0.18218655435466163, "learning_rate": 0.0003890411045755574, "loss": 2.98616623878479, "step": 11828, "token_acc": 0.2995707269476347 }, { "epoch": 6.934036939313984, "grad_norm": 0.19786330011083625, "learning_rate": 0.00038902096686309253, "loss": 3.0078067779541016, "step": 11829, "token_acc": 0.29622496652595776 }, { "epoch": 6.934623277631193, "grad_norm": 0.2320478307161566, "learning_rate": 0.00038900082784471294, "loss": 2.9770588874816895, "step": 11830, "token_acc": 0.30183826640375194 }, { "epoch": 6.935209615948402, "grad_norm": 0.19534389549011252, "learning_rate": 0.00038898068752060797, "loss": 2.9735546112060547, "step": 11831, "token_acc": 0.3006743616428564 }, { "epoch": 6.935795954265611, "grad_norm": 0.1631540080647636, "learning_rate": 0.00038896054589096664, "loss": 3.0009546279907227, "step": 11832, "token_acc": 0.29754109857013145 }, { "epoch": 6.93638229258282, "grad_norm": 0.1750290044103385, "learning_rate": 0.0003889404029559783, "loss": 2.9819021224975586, "step": 11833, "token_acc": 0.30050715566969066 }, { "epoch": 6.936968630900029, "grad_norm": 0.1814293168820925, "learning_rate": 0.00038892025871583213, "loss": 2.943075180053711, "step": 11834, "token_acc": 0.30664539857890505 }, { "epoch": 6.9375549692172385, "grad_norm": 0.1667460063296604, "learning_rate": 0.0003889001131707173, "loss": 2.970065116882324, "step": 11835, "token_acc": 0.30195312985640577 }, { "epoch": 6.938141307534448, "grad_norm": 0.16504499316882293, "learning_rate": 0.000388879966320823, "loss": 2.948068141937256, "step": 11836, "token_acc": 0.3052755540853457 }, { "epoch": 6.938727645851657, "grad_norm": 0.1933100473902244, "learning_rate": 0.00038885981816633863, "loss": 2.986741542816162, "step": 11837, "token_acc": 0.29929654262657374 }, { "epoch": 6.939313984168866, "grad_norm": 0.15341977427726425, "learning_rate": 0.0003888396687074534, "loss": 2.990516185760498, "step": 11838, "token_acc": 0.29859228271509436 }, { "epoch": 6.939900322486075, "grad_norm": 0.17790928095873598, "learning_rate": 0.0003888195179443565, "loss": 2.980855941772461, "step": 11839, "token_acc": 0.3007755823462685 }, { "epoch": 6.940486660803283, "grad_norm": 0.17430760140775906, "learning_rate": 0.0003887993658772373, "loss": 2.9866514205932617, "step": 11840, "token_acc": 0.3010280879269602 }, { "epoch": 6.941072999120492, "grad_norm": 0.1664801969658795, "learning_rate": 0.0003887792125062851, "loss": 2.9780468940734863, "step": 11841, "token_acc": 0.300259372540319 }, { "epoch": 6.941659337437701, "grad_norm": 0.15428460033087163, "learning_rate": 0.0003887590578316893, "loss": 2.9663310050964355, "step": 11842, "token_acc": 0.30254358176327834 }, { "epoch": 6.9422456757549105, "grad_norm": 0.18367440256970818, "learning_rate": 0.00038873890185363904, "loss": 2.9993748664855957, "step": 11843, "token_acc": 0.2966317540683435 }, { "epoch": 6.94283201407212, "grad_norm": 0.2014406090181704, "learning_rate": 0.0003887187445723238, "loss": 2.961099147796631, "step": 11844, "token_acc": 0.3062983384327724 }, { "epoch": 6.943418352389329, "grad_norm": 0.14806715951974583, "learning_rate": 0.00038869858598793286, "loss": 2.9646453857421875, "step": 11845, "token_acc": 0.3021858519924503 }, { "epoch": 6.944004690706538, "grad_norm": 0.2308601416081772, "learning_rate": 0.0003886784261006555, "loss": 2.9629299640655518, "step": 11846, "token_acc": 0.30336975367118535 }, { "epoch": 6.944591029023747, "grad_norm": 0.3275526592804235, "learning_rate": 0.00038865826491068134, "loss": 2.963777780532837, "step": 11847, "token_acc": 0.30300747670210765 }, { "epoch": 6.945177367340956, "grad_norm": 0.21542420383451788, "learning_rate": 0.00038863810241819964, "loss": 3.040379524230957, "step": 11848, "token_acc": 0.29388647347552677 }, { "epoch": 6.945763705658165, "grad_norm": 0.2597587869362111, "learning_rate": 0.00038861793862339966, "loss": 2.9740726947784424, "step": 11849, "token_acc": 0.30098373205741624 }, { "epoch": 6.946350043975373, "grad_norm": 0.26398500369768646, "learning_rate": 0.00038859777352647103, "loss": 2.9820775985717773, "step": 11850, "token_acc": 0.3007370968673383 }, { "epoch": 6.9469363822925825, "grad_norm": 0.18177602710244337, "learning_rate": 0.00038857760712760305, "loss": 2.952928304672241, "step": 11851, "token_acc": 0.306773169300415 }, { "epoch": 6.947522720609792, "grad_norm": 0.21464013595721174, "learning_rate": 0.0003885574394269852, "loss": 2.9824817180633545, "step": 11852, "token_acc": 0.3016224868302931 }, { "epoch": 6.948109058927001, "grad_norm": 0.1890069637281629, "learning_rate": 0.0003885372704248069, "loss": 3.0263400077819824, "step": 11853, "token_acc": 0.29405302880625916 }, { "epoch": 6.94869539724421, "grad_norm": 0.19724463683013002, "learning_rate": 0.00038851710012125765, "loss": 2.954951286315918, "step": 11854, "token_acc": 0.30400521497367644 }, { "epoch": 6.949281735561419, "grad_norm": 0.1773223362984139, "learning_rate": 0.00038849692851652685, "loss": 2.9575095176696777, "step": 11855, "token_acc": 0.3042853420094588 }, { "epoch": 6.949868073878628, "grad_norm": 0.20850960361905455, "learning_rate": 0.00038847675561080403, "loss": 2.981243371963501, "step": 11856, "token_acc": 0.3015122070501023 }, { "epoch": 6.950454412195837, "grad_norm": 0.1769840979521871, "learning_rate": 0.00038845658140427875, "loss": 2.9723339080810547, "step": 11857, "token_acc": 0.3019284971418788 }, { "epoch": 6.951040750513046, "grad_norm": 0.21776011631044515, "learning_rate": 0.0003884364058971404, "loss": 2.9789717197418213, "step": 11858, "token_acc": 0.30142594872008843 }, { "epoch": 6.951627088830255, "grad_norm": 0.13654777909421822, "learning_rate": 0.0003884162290895786, "loss": 3.0028533935546875, "step": 11859, "token_acc": 0.29892559415937453 }, { "epoch": 6.9522134271474645, "grad_norm": 0.16933441713032454, "learning_rate": 0.0003883960509817828, "loss": 2.9555234909057617, "step": 11860, "token_acc": 0.30419066614083434 }, { "epoch": 6.952799765464674, "grad_norm": 0.17535406179964494, "learning_rate": 0.0003883758715739427, "loss": 2.9621896743774414, "step": 11861, "token_acc": 0.30250836867944475 }, { "epoch": 6.953386103781882, "grad_norm": 0.1515013427590912, "learning_rate": 0.0003883556908662477, "loss": 2.9844155311584473, "step": 11862, "token_acc": 0.3001840699202364 }, { "epoch": 6.953972442099091, "grad_norm": 0.16336225612330396, "learning_rate": 0.00038833550885888733, "loss": 2.9939842224121094, "step": 11863, "token_acc": 0.29771214451768624 }, { "epoch": 6.9545587804163, "grad_norm": 0.14918923848348314, "learning_rate": 0.0003883153255520514, "loss": 2.9977543354034424, "step": 11864, "token_acc": 0.2988264598633866 }, { "epoch": 6.955145118733509, "grad_norm": 0.20029159849200973, "learning_rate": 0.0003882951409459293, "loss": 2.9561972618103027, "step": 11865, "token_acc": 0.3027169300408196 }, { "epoch": 6.955731457050718, "grad_norm": 0.17376184548657353, "learning_rate": 0.00038827495504071066, "loss": 2.9627346992492676, "step": 11866, "token_acc": 0.302417413585817 }, { "epoch": 6.956317795367927, "grad_norm": 0.19640677986219787, "learning_rate": 0.0003882547678365852, "loss": 2.952059268951416, "step": 11867, "token_acc": 0.30445750566239016 }, { "epoch": 6.9569041336851365, "grad_norm": 0.17755505443936187, "learning_rate": 0.0003882345793337425, "loss": 2.954103946685791, "step": 11868, "token_acc": 0.3041139427663366 }, { "epoch": 6.957490472002346, "grad_norm": 0.18999952978638338, "learning_rate": 0.0003882143895323722, "loss": 3.0186946392059326, "step": 11869, "token_acc": 0.29581427886670725 }, { "epoch": 6.958076810319555, "grad_norm": 0.22788369107590167, "learning_rate": 0.0003881941984326639, "loss": 3.0099925994873047, "step": 11870, "token_acc": 0.29535835560547957 }, { "epoch": 6.958663148636763, "grad_norm": 0.16476671037416862, "learning_rate": 0.0003881740060348074, "loss": 2.960766077041626, "step": 11871, "token_acc": 0.30270131446849247 }, { "epoch": 6.959249486953972, "grad_norm": 0.29480484249150984, "learning_rate": 0.00038815381233899227, "loss": 2.9811296463012695, "step": 11872, "token_acc": 0.29947509217671986 }, { "epoch": 6.959835825271181, "grad_norm": 0.17722320324264834, "learning_rate": 0.00038813361734540833, "loss": 2.967946767807007, "step": 11873, "token_acc": 0.30201780683329926 }, { "epoch": 6.96042216358839, "grad_norm": 0.19070509247068793, "learning_rate": 0.00038811342105424506, "loss": 2.984808921813965, "step": 11874, "token_acc": 0.3005461592670895 }, { "epoch": 6.961008501905599, "grad_norm": 0.17290972718052475, "learning_rate": 0.0003880932234656923, "loss": 2.9692840576171875, "step": 11875, "token_acc": 0.3023128014647659 }, { "epoch": 6.9615948402228085, "grad_norm": 0.2509431827073499, "learning_rate": 0.00038807302457993987, "loss": 3.038240909576416, "step": 11876, "token_acc": 0.2924673117870425 }, { "epoch": 6.962181178540018, "grad_norm": 0.17789443476834227, "learning_rate": 0.0003880528243971774, "loss": 2.9883527755737305, "step": 11877, "token_acc": 0.30113352969280766 }, { "epoch": 6.962767516857227, "grad_norm": 0.20661350170485288, "learning_rate": 0.0003880326229175948, "loss": 3.0049376487731934, "step": 11878, "token_acc": 0.29725913283757976 }, { "epoch": 6.963353855174436, "grad_norm": 0.17826356169374144, "learning_rate": 0.0003880124201413815, "loss": 2.965385675430298, "step": 11879, "token_acc": 0.30393816200669266 }, { "epoch": 6.963940193491645, "grad_norm": 0.18557449076924937, "learning_rate": 0.0003879922160687276, "loss": 2.952457904815674, "step": 11880, "token_acc": 0.3029825900714634 }, { "epoch": 6.964526531808854, "grad_norm": 0.1542285801594966, "learning_rate": 0.00038797201069982275, "loss": 3.0019419193267822, "step": 11881, "token_acc": 0.29606803204082777 }, { "epoch": 6.965112870126063, "grad_norm": 0.19478810826296444, "learning_rate": 0.00038795180403485675, "loss": 2.9999148845672607, "step": 11882, "token_acc": 0.29880894109969 }, { "epoch": 6.965699208443271, "grad_norm": 0.15454891632472795, "learning_rate": 0.0003879315960740195, "loss": 2.9689674377441406, "step": 11883, "token_acc": 0.30258649067233645 }, { "epoch": 6.9662855467604805, "grad_norm": 0.17877819661328376, "learning_rate": 0.0003879113868175008, "loss": 2.981320381164551, "step": 11884, "token_acc": 0.30073210095454495 }, { "epoch": 6.96687188507769, "grad_norm": 0.1634616702727411, "learning_rate": 0.00038789117626549034, "loss": 2.960458278656006, "step": 11885, "token_acc": 0.30306764517352797 }, { "epoch": 6.967458223394899, "grad_norm": 0.15943051240147912, "learning_rate": 0.00038787096441817814, "loss": 2.943133592605591, "step": 11886, "token_acc": 0.3070735967080749 }, { "epoch": 6.968044561712108, "grad_norm": 0.16665852136587966, "learning_rate": 0.0003878507512757541, "loss": 3.0206692218780518, "step": 11887, "token_acc": 0.2949171347340684 }, { "epoch": 6.968630900029317, "grad_norm": 0.18896653961704424, "learning_rate": 0.0003878305368384079, "loss": 2.995310068130493, "step": 11888, "token_acc": 0.2982020584357589 }, { "epoch": 6.969217238346526, "grad_norm": 0.18449659799469423, "learning_rate": 0.00038781032110632956, "loss": 2.951279401779175, "step": 11889, "token_acc": 0.3048406164072964 }, { "epoch": 6.969803576663735, "grad_norm": 0.1684822466390478, "learning_rate": 0.00038779010407970893, "loss": 2.993988513946533, "step": 11890, "token_acc": 0.29925059508512897 }, { "epoch": 6.970389914980944, "grad_norm": 0.21365373519726502, "learning_rate": 0.000387769885758736, "loss": 2.98005747795105, "step": 11891, "token_acc": 0.30032944228274966 }, { "epoch": 6.970976253298153, "grad_norm": 0.15706201433295597, "learning_rate": 0.0003877496661436006, "loss": 3.001300096511841, "step": 11892, "token_acc": 0.2984726165203491 }, { "epoch": 6.971562591615362, "grad_norm": 0.18745137940522882, "learning_rate": 0.00038772944523449284, "loss": 2.9794063568115234, "step": 11893, "token_acc": 0.30212146477680285 }, { "epoch": 6.972148929932571, "grad_norm": 0.17064544968015227, "learning_rate": 0.0003877092230316024, "loss": 2.981074810028076, "step": 11894, "token_acc": 0.3007713307507566 }, { "epoch": 6.97273526824978, "grad_norm": 0.1793925047344118, "learning_rate": 0.0003876889995351194, "loss": 3.0228986740112305, "step": 11895, "token_acc": 0.29352387025114673 }, { "epoch": 6.973321606566989, "grad_norm": 0.20281970034469948, "learning_rate": 0.00038766877474523376, "loss": 3.0035018920898438, "step": 11896, "token_acc": 0.29833465995430913 }, { "epoch": 6.973907944884198, "grad_norm": 0.1522896412984321, "learning_rate": 0.0003876485486621355, "loss": 2.9274332523345947, "step": 11897, "token_acc": 0.30709156552670497 }, { "epoch": 6.974494283201407, "grad_norm": 0.18194504783118648, "learning_rate": 0.00038762832128601466, "loss": 2.989865303039551, "step": 11898, "token_acc": 0.3004782098239969 }, { "epoch": 6.975080621518616, "grad_norm": 0.16864427036411161, "learning_rate": 0.00038760809261706116, "loss": 2.9992339611053467, "step": 11899, "token_acc": 0.2993785222640501 }, { "epoch": 6.975666959835825, "grad_norm": 0.15783380818870318, "learning_rate": 0.000387587862655465, "loss": 2.9351344108581543, "step": 11900, "token_acc": 0.3083638603675362 }, { "epoch": 6.9762532981530345, "grad_norm": 0.1965535902373825, "learning_rate": 0.00038756763140141635, "loss": 2.972707986831665, "step": 11901, "token_acc": 0.30240944588075375 }, { "epoch": 6.976839636470244, "grad_norm": 0.1816684257278128, "learning_rate": 0.0003875473988551052, "loss": 3.009921073913574, "step": 11902, "token_acc": 0.2965945574681738 }, { "epoch": 6.977425974787453, "grad_norm": 0.16362313052834068, "learning_rate": 0.00038752716501672156, "loss": 2.986109733581543, "step": 11903, "token_acc": 0.3000424877221039 }, { "epoch": 6.978012313104662, "grad_norm": 0.17285328332209363, "learning_rate": 0.0003875069298864555, "loss": 2.9464831352233887, "step": 11904, "token_acc": 0.3053131051463893 }, { "epoch": 6.97859865142187, "grad_norm": 0.20766953746005248, "learning_rate": 0.0003874866934644972, "loss": 3.0101985931396484, "step": 11905, "token_acc": 0.29603603129033107 }, { "epoch": 6.979184989739079, "grad_norm": 0.18758376937072027, "learning_rate": 0.00038746645575103657, "loss": 2.9682278633117676, "step": 11906, "token_acc": 0.30467352774237766 }, { "epoch": 6.979771328056288, "grad_norm": 0.1686147319226834, "learning_rate": 0.0003874462167462639, "loss": 3.0142905712127686, "step": 11907, "token_acc": 0.2958079886455799 }, { "epoch": 6.980357666373497, "grad_norm": 0.20476151240962562, "learning_rate": 0.0003874259764503692, "loss": 2.950381278991699, "step": 11908, "token_acc": 0.3054970148427951 }, { "epoch": 6.9809440046907065, "grad_norm": 0.16670379881738911, "learning_rate": 0.00038740573486354264, "loss": 2.9987053871154785, "step": 11909, "token_acc": 0.2965705992979261 }, { "epoch": 6.981530343007916, "grad_norm": 0.18445184539402162, "learning_rate": 0.0003873854919859744, "loss": 3.0164108276367188, "step": 11910, "token_acc": 0.29492443434961124 }, { "epoch": 6.982116681325125, "grad_norm": 0.2223049892643541, "learning_rate": 0.00038736524781785454, "loss": 2.9460744857788086, "step": 11911, "token_acc": 0.3061510955374204 }, { "epoch": 6.982703019642334, "grad_norm": 0.15233253597623456, "learning_rate": 0.0003873450023593733, "loss": 2.989528179168701, "step": 11912, "token_acc": 0.2992863288960875 }, { "epoch": 6.983289357959543, "grad_norm": 0.20323017103114738, "learning_rate": 0.00038732475561072084, "loss": 2.9950332641601562, "step": 11913, "token_acc": 0.29852691545661414 }, { "epoch": 6.983875696276751, "grad_norm": 0.17552907008160606, "learning_rate": 0.0003873045075720873, "loss": 2.9589099884033203, "step": 11914, "token_acc": 0.30289009001581846 }, { "epoch": 6.98446203459396, "grad_norm": 0.19893439210842653, "learning_rate": 0.000387284258243663, "loss": 3.0079078674316406, "step": 11915, "token_acc": 0.29869623787723165 }, { "epoch": 6.985048372911169, "grad_norm": 0.1828970980134003, "learning_rate": 0.0003872640076256381, "loss": 2.97236967086792, "step": 11916, "token_acc": 0.3018000518000518 }, { "epoch": 6.9856347112283785, "grad_norm": 0.16028155127030871, "learning_rate": 0.00038724375571820276, "loss": 2.968897819519043, "step": 11917, "token_acc": 0.30180905632655997 }, { "epoch": 6.986221049545588, "grad_norm": 0.15967885963589842, "learning_rate": 0.00038722350252154723, "loss": 2.955930709838867, "step": 11918, "token_acc": 0.30374130276323635 }, { "epoch": 6.986807387862797, "grad_norm": 0.14767918044053518, "learning_rate": 0.00038720324803586195, "loss": 2.9799039363861084, "step": 11919, "token_acc": 0.30011162411802766 }, { "epoch": 6.987393726180006, "grad_norm": 0.15775098525841819, "learning_rate": 0.0003871829922613369, "loss": 2.999072551727295, "step": 11920, "token_acc": 0.2972999097002302 }, { "epoch": 6.987980064497215, "grad_norm": 0.15716450229005435, "learning_rate": 0.0003871627351981625, "loss": 2.962615966796875, "step": 11921, "token_acc": 0.3036182916892644 }, { "epoch": 6.988566402814424, "grad_norm": 0.15921029241824672, "learning_rate": 0.00038714247684652916, "loss": 2.9847588539123535, "step": 11922, "token_acc": 0.29856864871516803 }, { "epoch": 6.989152741131633, "grad_norm": 0.1533614395787824, "learning_rate": 0.000387122217206627, "loss": 3.0492870807647705, "step": 11923, "token_acc": 0.29162859663869933 }, { "epoch": 6.989739079448842, "grad_norm": 0.17076873290623834, "learning_rate": 0.0003871019562786463, "loss": 3.0162100791931152, "step": 11924, "token_acc": 0.29394284104132207 }, { "epoch": 6.990325417766051, "grad_norm": 0.15662061832022398, "learning_rate": 0.00038708169406277747, "loss": 3.0127248764038086, "step": 11925, "token_acc": 0.2971713852698073 }, { "epoch": 6.99091175608326, "grad_norm": 0.15796574300856658, "learning_rate": 0.00038706143055921096, "loss": 2.9685285091400146, "step": 11926, "token_acc": 0.3020518715235113 }, { "epoch": 6.991498094400469, "grad_norm": 0.18022603623534778, "learning_rate": 0.000387041165768137, "loss": 2.9645304679870605, "step": 11927, "token_acc": 0.30317055132762366 }, { "epoch": 6.992084432717678, "grad_norm": 0.15038844098152293, "learning_rate": 0.00038702089968974584, "loss": 2.9493703842163086, "step": 11928, "token_acc": 0.30412106386245824 }, { "epoch": 6.992670771034887, "grad_norm": 0.16580604505540705, "learning_rate": 0.000387000632324228, "loss": 3.0014567375183105, "step": 11929, "token_acc": 0.2982506467360826 }, { "epoch": 6.993257109352096, "grad_norm": 0.15455303867112485, "learning_rate": 0.00038698036367177383, "loss": 2.9551515579223633, "step": 11930, "token_acc": 0.30312639816639064 }, { "epoch": 6.993843447669305, "grad_norm": 0.15476251903111912, "learning_rate": 0.0003869600937325737, "loss": 2.9638864994049072, "step": 11931, "token_acc": 0.3029672111977141 }, { "epoch": 6.994429785986514, "grad_norm": 0.16329109158929905, "learning_rate": 0.0003869398225068181, "loss": 2.987675428390503, "step": 11932, "token_acc": 0.2980661975777248 }, { "epoch": 6.995016124303723, "grad_norm": 0.2111594294779838, "learning_rate": 0.00038691954999469746, "loss": 2.9428184032440186, "step": 11933, "token_acc": 0.30638792639981066 }, { "epoch": 6.9956024626209325, "grad_norm": 0.2688818403120885, "learning_rate": 0.000386899276196402, "loss": 2.9906678199768066, "step": 11934, "token_acc": 0.2982298359770297 }, { "epoch": 6.996188800938142, "grad_norm": 0.23164907186883807, "learning_rate": 0.00038687900111212235, "loss": 2.998396873474121, "step": 11935, "token_acc": 0.29772230793123544 }, { "epoch": 6.99677513925535, "grad_norm": 0.1452251209236702, "learning_rate": 0.00038685872474204905, "loss": 2.9725193977355957, "step": 11936, "token_acc": 0.30138719523837404 }, { "epoch": 6.997361477572559, "grad_norm": 0.19152440710098131, "learning_rate": 0.0003868384470863724, "loss": 2.9761931896209717, "step": 11937, "token_acc": 0.3008268406801465 }, { "epoch": 6.997947815889768, "grad_norm": 0.17738056247886136, "learning_rate": 0.0003868181681452829, "loss": 2.9518702030181885, "step": 11938, "token_acc": 0.3045169561020169 }, { "epoch": 6.998534154206977, "grad_norm": 0.14419577525768673, "learning_rate": 0.000386797887918971, "loss": 2.980222225189209, "step": 11939, "token_acc": 0.3002604066448592 }, { "epoch": 6.999120492524186, "grad_norm": 0.21397547315177062, "learning_rate": 0.0003867776064076274, "loss": 2.9301810264587402, "step": 11940, "token_acc": 0.3088196568013629 }, { "epoch": 6.999706830841395, "grad_norm": 0.17274637573927223, "learning_rate": 0.00038675732361144244, "loss": 2.983229160308838, "step": 11941, "token_acc": 0.29939774837093763 }, { "epoch": 7.0, "grad_norm": 0.19584859849622846, "learning_rate": 0.00038673703953060677, "loss": 3.0100951194763184, "step": 11942, "token_acc": 0.2968003427027383 }, { "epoch": 7.0, "eval_loss": 3.0641727447509766, "eval_runtime": 6.4411, "eval_samples_per_second": 39.744, "eval_steps_per_second": 4.968, "eval_token_acc": 0.29074987146860415, "step": 11942 } ], "logging_steps": 1, "max_steps": 34120, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": -34120, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2857968356818944.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }